diff options
Diffstat (limited to 'include/linux')
99 files changed, 3579 insertions, 3302 deletions
diff --git a/include/linux/average.h b/include/linux/average.h index d04aa58280de..7ddaf340d2ac 100644 --- a/include/linux/average.h +++ b/include/linux/average.h @@ -1,45 +1,66 @@ #ifndef _LINUX_AVERAGE_H #define _LINUX_AVERAGE_H -/* Exponentially weighted moving average (EWMA) */ +/* + * Exponentially weighted moving average (EWMA) + * + * This implements a fixed-precision EWMA algorithm, with both the + * precision and fall-off coefficient determined at compile-time + * and built into the generated helper funtions. + * + * The first argument to the macro is the name that will be used + * for the struct and helper functions. + * + * The second argument, the precision, expresses how many bits are + * used for the fractional part of the fixed-precision values. + * + * The third argument, the weight reciprocal, determines how the + * new values will be weighed vs. the old state, new values will + * get weight 1/weight_rcp and old values 1-1/weight_rcp. Note + * that this parameter must be a power of two for efficiency. + */ -#define DECLARE_EWMA(name, _factor, _weight) \ +#define DECLARE_EWMA(name, _precision, _weight_rcp) \ struct ewma_##name { \ unsigned long internal; \ }; \ static inline void ewma_##name##_init(struct ewma_##name *e) \ { \ - BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ + BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ + /* \ + * Even if you want to feed it just 0/1 you should have \ + * some bits for the non-fractional part... \ + */ \ + BUILD_BUG_ON((_precision) > 30); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ e->internal = 0; \ } \ static inline unsigned long \ ewma_##name##_read(struct ewma_##name *e) \ { \ - BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ - return e->internal >> ilog2(_factor); \ + BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ + BUILD_BUG_ON((_precision) > 30); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ + return e->internal >> (_precision); \ } \ static inline void ewma_##name##_add(struct ewma_##name *e, \ unsigned long val) \ { \ unsigned long internal = ACCESS_ONCE(e->internal); \ - unsigned long weight = ilog2(_weight); \ - unsigned long factor = ilog2(_factor); \ + unsigned long weight_rcp = ilog2(_weight_rcp); \ + unsigned long precision = _precision; \ \ - BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ + BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ + BUILD_BUG_ON((_precision) > 30); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ \ ACCESS_ONCE(e->internal) = internal ? \ - (((internal << weight) - internal) + \ - (val << factor)) >> weight : \ - (val << factor); \ + (((internal << weight_rcp) - internal) + \ + (val << precision)) >> weight_rcp : \ + (val << precision); \ } #endif /* _LINUX_AVERAGE_H */ diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 1303b570b18c..05488da3aee9 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -6,6 +6,8 @@ #include <asm/exec.h> #include <uapi/linux/binfmts.h> +struct filename; + #define CORENAME_MAX_SIZE 128 /* @@ -123,4 +125,12 @@ extern void install_exec_creds(struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t); +extern int do_execve(struct filename *, + const char __user * const __user *, + const char __user * const __user *); +extern int do_execveat(int, struct filename *, + const char __user * const __user *, + const char __user * const __user *, + int); + #endif /* _LINUX_BINFMTS_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h new file mode 100644 index 000000000000..b1ef6e14744f --- /dev/null +++ b/include/linux/blk-mq-virtio.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_BLK_MQ_VIRTIO_H +#define _LINUX_BLK_MQ_VIRTIO_H + +struct blk_mq_tag_set; +struct virtio_device; + +int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, + struct virtio_device *vdev, int first_vec); + +#endif /* _LINUX_BLK_MQ_VIRTIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 001d30d727c5..b296a9006117 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -245,6 +245,9 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); +void blk_mq_freeze_queue_wait(struct request_queue *q); +int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, + unsigned long timeout); int blk_mq_reinit_tagset(struct blk_mq_tag_set *set); int blk_mq_map_queues(struct blk_mq_tag_set *set); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aecca0e7d9ca..5a7da607ca04 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2,6 +2,7 @@ #define _LINUX_BLKDEV_H #include <linux/sched.h> +#include <linux/sched/clock.h> #ifdef CONFIG_BLOCK @@ -434,7 +435,6 @@ struct request_queue { struct delayed_work delay_work; struct backing_dev_info *backing_dev_info; - struct disk_devt *disk_devt; /* * The queue owner gets to use this for whatever they like. diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 1816c5e26581..88cd5dc8e238 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -48,6 +48,7 @@ struct ceph_options { unsigned long mount_timeout; /* jiffies */ unsigned long osd_idle_ttl; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */ + unsigned long osd_request_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -68,6 +69,7 @@ struct ceph_options { #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */ #define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000) #define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 2ea0c282f3dc..c125b5d9e13c 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -189,6 +189,7 @@ struct ceph_osd_request { /* internal */ unsigned long r_stamp; /* jiffies, send or check time */ + unsigned long r_start_stamp; /* jiffies */ int r_attempts; struct ceph_eversion r_replay_version; /* aka reassert_version */ u32 r_last_force_resend; diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 3c02404cfce9..6a3f850cabab 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -531,8 +531,8 @@ extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups * @tsk: target task * - * Called from threadgroup_change_begin() and allows cgroup operations to - * synchronize against threadgroup changes using a percpu_rw_semaphore. + * Allows cgroup operations to synchronize against threadgroup changes + * using a percpu_rw_semaphore. */ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) { @@ -543,8 +543,7 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups * @tsk: target task * - * Called from threadgroup_change_end(). Counterpart of - * cgroup_threadcgroup_change_begin(). + * Counterpart of cgroup_threadcgroup_change_begin(). */ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) { @@ -555,7 +554,11 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) #define CGROUP_SUBSYS_COUNT 0 -static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} +static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) +{ + might_sleep(); +} + static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #endif /* CONFIG_CGROUPS */ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 21f9c74496e7..f92081234afd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -30,6 +30,8 @@ struct cpu { extern void boot_cpu_init(void); extern void boot_cpu_state_init(void); +extern void cpu_init(void); +extern void trap_init(void); extern int register_cpu(struct cpu *cpu, int num); extern struct device *get_cpu_device(unsigned cpu); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index bb790c4db0c5..62d240e962f0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -26,7 +26,6 @@ enum cpuhp_state { CPUHP_ARM_OMAP_WAKE_DEAD, CPUHP_IRQ_POLL_DEAD, CPUHP_BLOCK_SOFTIRQ_DEAD, - CPUHP_VIRT_SCSI_DEAD, CPUHP_ACPI_CPUDRV_DEAD, CPUHP_S390_PFAULT_DEAD, CPUHP_BLK_MQ_DEAD, diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index bfc204e70338..611fce58d670 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -9,6 +9,8 @@ */ #include <linux/sched.h> +#include <linux/sched/topology.h> +#include <linux/sched/task.h> #include <linux/cpumask.h> #include <linux/nodemask.h> #include <linux/mm.h> diff --git a/include/linux/cputime.h b/include/linux/cputime.h deleted file mode 100644 index a691dc4ddc13..000000000000 --- a/include/linux/cputime.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __LINUX_CPUTIME_H -#define __LINUX_CPUTIME_H - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#include <asm/cputime.h> - -#ifndef cputime_to_nsecs -# define cputime_to_nsecs(__ct) \ - (cputime_to_usecs(__ct) * NSEC_PER_USEC) -#endif - -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -#endif /* __LINUX_CPUTIME_H */ diff --git a/include/linux/cred.h b/include/linux/cred.h index f0e70a1bb3ac..b03e7d049a64 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -18,8 +18,9 @@ #include <linux/selinux.h> #include <linux/atomic.h> #include <linux/uidgid.h> +#include <linux/sched.h> +#include <linux/sched/user.h> -struct user_struct; struct cred; struct inode; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 591b6c16f9c1..d2e38dc6172c 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -11,6 +11,7 @@ #include <linux/rcupdate.h> #include <linux/lockref.h> #include <linux/stringhash.h> +#include <linux/wait.h> struct path; struct vfsmount; diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 00e60f79a9cc..4178d2493547 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -18,8 +18,6 @@ #define _LINUX_DELAYACCT_H #include <uapi/linux/taskstats.h> -#include <linux/sched.h> -#include <linux/slab.h> /* * Per-task flags relevant to delay accounting @@ -30,7 +28,43 @@ #define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */ #ifdef CONFIG_TASK_DELAY_ACCT +struct task_delay_info { + spinlock_t lock; + unsigned int flags; /* Private per-task flags */ + + /* For each stat XXX, add following, aligned appropriately + * + * struct timespec XXX_start, XXX_end; + * u64 XXX_delay; + * u32 XXX_count; + * + * Atomicity of updates to XXX_delay, XXX_count protected by + * single lock above (split into XXX_lock if contention is an issue). + */ + + /* + * XXX_count is incremented on every XXX operation, the delay + * associated with the operation is added to XXX_delay. + * XXX_delay contains the accumulated delay time in nanoseconds. + */ + u64 blkio_start; /* Shared by blkio, swapin */ + u64 blkio_delay; /* wait for sync block io completion */ + u64 swapin_delay; /* wait for swapin block io completion */ + u32 blkio_count; /* total count of the number of sync block */ + /* io operations performed */ + u32 swapin_count; /* total count of the number of swapin block */ + /* io operations performed */ + + u64 freepages_start; + u64 freepages_delay; /* wait for memory reclaim */ + u32 freepages_count; /* total count of memory reclaim */ +}; +#endif +#include <linux/sched.h> +#include <linux/slab.h> + +#ifdef CONFIG_TASK_DELAY_ACCT extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e9bc9292bd3a..e8ffba1052d3 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -26,7 +26,7 @@ #include <linux/msi.h> #include <linux/irqreturn.h> #include <linux/rwsem.h> -#include <linux/rcupdate.h> +#include <linux/rculist.h> struct acpi_dmar_header; diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 698d51a0eea3..c8240a12c42d 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -3,6 +3,8 @@ #include <linux/user.h> #include <linux/bug.h> +#include <linux/sched/task_stack.h> + #include <asm/elf.h> #include <uapi/linux/elfcore.h> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f4956d8601c..728d4e0292aa 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -61,6 +61,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, #endif /* CONFIG_FAULT_INJECTION */ +struct kmem_cache; + #ifdef CONFIG_FAILSLAB extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags); #else diff --git a/include/linux/fs.h b/include/linux/fs.h index c64f2cb7d364..7251f7bb45e8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1567,6 +1567,9 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); extern int vfs_whiteout(struct inode *, struct dentry *); +extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, + int open_flag); + /* * VFS file helper functions. */ @@ -1706,7 +1709,7 @@ struct inode_operations { int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); - int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); + int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); @@ -1718,6 +1721,29 @@ struct inode_operations { int (*set_acl)(struct inode *, struct posix_acl *, int); } ____cacheline_aligned; +static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->read_iter(kio, iter); +} + +static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->write_iter(kio, iter); +} + +static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +{ + return file->f_op->mmap(file, vma); +} + +static inline int call_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + return file->f_op->fsync(file, start, end, datasync); +} + ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, @@ -1744,19 +1770,6 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len) -{ - int ret; - - sb_start_write(file_inode(file_out)->i_sb); - ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); - sb_end_write(file_inode(file_out)->i_sb); - - return ret; -} - struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); @@ -2568,6 +2581,19 @@ static inline void file_end_write(struct file *file) __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); } +static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len) +{ + int ret; + + file_start_write(file_out); + ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); + file_end_write(file_out); + + return ret; +} + /* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. @@ -2652,7 +2678,7 @@ static const char * const kernel_read_file_str[] = { static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) { - if (id < 0 || id >= READING_MAX_ID) + if ((unsigned)id >= READING_MAX_ID) return kernel_read_file_str[READING_UNKNOWN]; return kernel_read_file_str[id]; @@ -2876,8 +2902,8 @@ extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); extern void generic_fillattr(struct inode *, struct kstat *); -int vfs_getattr_nosec(struct path *path, struct kstat *stat); -extern int vfs_getattr(struct path *, struct kstat *); +extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); +extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); void __inode_sub_bytes(struct inode *inode, loff_t bytes); @@ -2890,10 +2916,29 @@ extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); -extern int vfs_stat(const char __user *, struct kstat *); -extern int vfs_lstat(const char __user *, struct kstat *); -extern int vfs_fstat(unsigned int, struct kstat *); -extern int vfs_fstatat(int , const char __user *, struct kstat *, int); +extern int vfs_statx(int, const char __user *, int, struct kstat *, u32); +extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int); + +static inline int vfs_stat(const char __user *filename, struct kstat *stat) +{ + return vfs_statx(AT_FDCWD, filename, 0, stat, STATX_BASIC_STATS); +} +static inline int vfs_lstat(const char __user *name, struct kstat *stat) +{ + return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW, + stat, STATX_BASIC_STATS); +} +static inline int vfs_fstatat(int dfd, const char __user *filename, + struct kstat *stat, int flags) +{ + return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); +} +static inline int vfs_fstat(int fd, struct kstat *stat) +{ + return vfs_statx_fd(fd, stat, STATX_BASIC_STATS, 0); +} + + extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int); @@ -2923,7 +2968,7 @@ extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, struct dir_context *); extern int simple_setattr(struct dentry *, struct iattr *); -extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int simple_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index a999d281a2f1..76f39754e7b0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -167,13 +167,6 @@ struct blk_integrity { }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ -struct disk_devt { - atomic_t count; - void (*release)(struct disk_devt *disk_devt); -}; - -void put_disk_devt(struct disk_devt *disk_devt); -void get_disk_devt(struct disk_devt *disk_devt); struct gendisk { /* major, first_minor and minors are input parameters only, @@ -183,7 +176,6 @@ struct gendisk { int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - struct disk_devt *disk_devt; char disk_name[DISK_NAME_LEN]; /* name of major driver */ char *(*devnode)(struct gendisk *gd, umode_t *mode); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index e52b427223ba..249e579ecd4c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -19,7 +19,6 @@ #include <linux/ktime.h> #include <linux/init.h> #include <linux/list.h> -#include <linux/wait.h> #include <linux/percpu.h> #include <linux/timer.h> #include <linux/timerqueue.h> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 503099d8aada..b857fc8cc2ec 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); int pmd_huge(pmd_t pmd); -int pud_huge(pud_t pmd); +int pud_huge(pud_t pud); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); @@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, #ifndef pgd_huge #define pgd_huge(x) 0 #endif +#ifndef p4d_huge +#define p4d_huge(x) 0 +#endif #ifndef pgd_write static inline int pgd_write(pgd_t pgd) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index bed8fbb45f31..6b183521c616 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -30,6 +30,7 @@ #include <linux/device.h> /* for struct device */ #include <linux/sched.h> /* for completion */ #include <linux/mutex.h> +#include <linux/rtmutex.h> #include <linux/irqdomain.h> /* for Host Notify IRQ */ #include <linux/of.h> /* for struct device_node */ #include <linux/swab.h> /* for swab16 */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 3a85d61f7614..91d9049f0039 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -12,8 +12,10 @@ #include <linux/securebits.h> #include <linux/seqlock.h> #include <linux/rbtree.h> +#include <linux/sched/autogroup.h> #include <net/net_namespace.h> #include <linux/sched/rt.h> +#include <linux/mm_types.h> #include <asm/thread_info.h> @@ -149,8 +151,6 @@ extern struct group_info init_groups; extern struct cred init_cred; -extern struct task_group root_task_group; - #ifdef CONFIG_CGROUP_SCHED # define INIT_CGROUP_SCHED(tsk) \ .sched_task_group = &root_task_group, diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 672cfef72fc8..97cbca19430d 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -373,6 +373,8 @@ #define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) #define ICC_IGRPEN1_EL1_SHIFT 0 #define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) +#define ICC_SRE_EL1_DIB (1U << 2) +#define ICC_SRE_EL1_DFB (1U << 1) #define ICC_SRE_EL1_SRE (1U << 0) /* diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 188eced6813e..9f3616085423 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode( { return NULL; } +static inline bool irq_domain_check_msi_remap(void) +{ + return false; +} #endif /* !CONFIG_IRQ_DOMAIN */ #endif /* _LINUX_IRQDOMAIN_H */ diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 8e06d758ee48..2afd74b9d844 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -90,6 +90,13 @@ extern bool static_key_initialized; struct static_key { atomic_t enabled; /* + * Note: + * To make anonymous unions work with old compilers, the static + * initialization of them requires brackets. This creates a dependency + * on the order of the struct with the initializers. If any fields + * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need + * to be modified. + * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod @@ -166,10 +173,10 @@ extern void static_key_disable(struct static_key *key); */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ - .entries = (void *)JUMP_TYPE_TRUE } + { .entries = (void *)JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ - .entries = (void *)JUMP_TYPE_FALSE } + { .entries = (void *)JUMP_TYPE_FALSE } } #else /* !HAVE_JUMP_LABEL */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index c908b25bf5a5..1c823bef4c15 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -1,7 +1,6 @@ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H -#include <linux/sched.h> #include <linux/types.h> struct kmem_cache; @@ -19,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; extern pud_t kasan_zero_pud[PTRS_PER_PUD]; +extern p4d_t kasan_zero_p4d[PTRS_PER_P4D]; void kasan_populate_zero_shadow(const void *shadow_start, const void *shadow_end); @@ -30,16 +30,10 @@ static inline void *kasan_mem_to_shadow(const void *addr) } /* Enable reporting bugs after kasan_disable_current() */ -static inline void kasan_enable_current(void) -{ - current->kasan_depth++; -} +extern void kasan_enable_current(void); /* Disable reporting bugs for current task */ -static inline void kasan_disable_current(void) -{ - current->kasan_depth--; -} +extern void kasan_disable_current(void); void kasan_unpoison_shadow(const void *address, size_t size); diff --git a/include/linux/key.h b/include/linux/key.h index 722914798f37..e45212f2777e 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -354,7 +354,10 @@ static inline bool key_is_instantiated(const struct key *key) !test_bit(KEY_FLAG_NEGATIVE, &key->flags); } -#define rcu_dereference_key(KEY) \ +#define dereference_key_rcu(KEY) \ + (rcu_dereference((KEY)->payload.rcu_data0)) + +#define dereference_key_locked(KEY) \ (rcu_dereference_protected((KEY)->payload.rcu_data0, \ rwsem_is_locked(&((struct key *)(KEY))->sem))) diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 1e032a1ddb3e..5d9a400af509 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -1,7 +1,8 @@ #ifndef _LINUX_KHUGEPAGED_H #define _LINUX_KHUGEPAGED_H -#include <linux/sched.h> /* MMF_VM_HUGEPAGE */ +#include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern struct attribute_group khugepaged_attr_group; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 481c8c4627ca..e1cfda4bee58 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -12,6 +12,7 @@ #include <linux/pagemap.h> #include <linux/rmap.h> #include <linux/sched.h> +#include <linux/sched/coredump.h> struct stable_node; struct mem_cgroup; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8d69d5150748..2c14ad9809da 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -26,6 +26,7 @@ #include <linux/context_tracking.h> #include <linux/irqbypass.h> #include <linux/swait.h> +#include <linux/refcount.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -401,7 +402,7 @@ struct kvm { #endif struct kvm_vm_stat stat; struct kvm_arch arch; - atomic_t users_count; + refcount_t users_count; #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; spinlock_t ring_lock; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 8458c5351e56..77e7af32543f 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -70,6 +70,8 @@ struct nd_cmd_desc { struct nd_interleave_set { u64 cookie; + /* compatibility with initial buggy Linux implementation */ + u64 altcookie; }; struct nd_mapping_desc { diff --git a/include/linux/log2.h b/include/linux/log2.h index ef3d4f67118c..c373295f359f 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -16,12 +16,6 @@ #include <linux/bitops.h> /* - * deal with unrepresentable constant logarithms - */ -extern __attribute__((const, noreturn)) -int ____ilog2_NaN(void); - -/* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented * more efficiently than using fls() and fls64() @@ -85,7 +79,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n) < 1 ? ____ilog2_NaN() : \ + (n) < 2 ? 0 : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ @@ -148,10 +142,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ - (n) & (1ULL << 1) ? 1 : \ - (n) & (1ULL << 0) ? 0 : \ - ____ilog2_NaN() \ - ) : \ + 1 ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index e965e5090d96..a858bcb6220b 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -109,7 +109,7 @@ static inline void mlx4_u64_to_mac(u8 *addr, u64 mac) int i; for (i = ETH_ALEN; i > 0; i--) { - addr[i - 1] = mac && 0xFF; + addr[i - 1] = mac & 0xFF; mac >>= 8; } } diff --git a/include/linux/mm.h b/include/linux/mm.h index 0d65dd72c0f4..5f01c88f0800 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, return ptep; } +#ifdef __PAGETABLE_P4D_FOLDED +static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return 0; +} +#else +int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +#endif + #ifdef __PAGETABLE_PUD_FOLDED -static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, +static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { return 0; } #else -int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); #endif #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) @@ -1619,11 +1629,22 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); * Remove it when 4level-fixup.h has been removed. */ #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) -static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) + +#ifndef __ARCH_HAS_5LEVEL_HACK +static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ? + NULL : p4d_offset(pgd, address); +} + +static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d, + unsigned long address) { - return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? - NULL: pud_offset(pgd, address); + return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ? + NULL : pud_offset(p4d, address); } +#endif /* !__ARCH_HAS_5LEVEL_HACK */ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { @@ -2385,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, struct page *sparse_mem_map_populate(unsigned long pnum, int nid); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); -pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); +p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); +pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4f6d440ad785..f60f45fe226f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1,9 +1,9 @@ #ifndef _LINUX_MM_TYPES_H #define _LINUX_MM_TYPES_H +#include <linux/mm_types_task.h> + #include <linux/auxvec.h> -#include <linux/types.h> -#include <linux/threads.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rbtree.h> @@ -13,7 +13,7 @@ #include <linux/uprobes.h> #include <linux/page-flags-layout.h> #include <linux/workqueue.h> -#include <asm/page.h> + #include <asm/mmu.h> #ifndef AT_VECTOR_SIZE_ARCH @@ -24,11 +24,6 @@ struct address_space; struct mem_cgroup; -#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) -#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ - IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) -#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) - /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -231,17 +226,6 @@ struct page { #endif ; -struct page_frag { - struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 offset; - __u32 size; -#else - __u16 offset; - __u16 size; -#endif -}; - #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) @@ -371,27 +355,6 @@ struct core_state { struct completion startup; }; -enum { - MM_FILEPAGES, /* Resident file mapping pages */ - MM_ANONPAGES, /* Resident anonymous pages */ - MM_SWAPENTS, /* Anonymous swap entries */ - MM_SHMEMPAGES, /* Resident shared memory pages */ - NR_MM_COUNTERS -}; - -#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU) -#define SPLIT_RSS_COUNTING -/* per-thread cached information, */ -struct task_rss_stat { - int events; /* for synchronization threshold */ - int count[NR_MM_COUNTERS]; -}; -#endif /* USE_SPLIT_PTE_PTLOCKS */ - -struct mm_rss_stat { - atomic_long_t count[NR_MM_COUNTERS]; -}; - struct kioctx_table; struct mm_struct { struct vm_area_struct *mmap; /* list of VMAs */ @@ -534,6 +497,8 @@ struct mm_struct { struct work_struct async_put_work; }; +extern struct mm_struct init_mm; + static inline void mm_init_cpumask(struct mm_struct *mm) { #ifdef CONFIG_CPUMASK_OFFSTACK diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h new file mode 100644 index 000000000000..136dfdf63ba1 --- /dev/null +++ b/include/linux/mm_types_task.h @@ -0,0 +1,87 @@ +#ifndef _LINUX_MM_TYPES_TASK_H +#define _LINUX_MM_TYPES_TASK_H + +/* + * Here are the definitions of the MM data types that are embedded in 'struct task_struct'. + * + * (These are defined separately to decouple sched.h from mm_types.h as much as possible.) + */ + +#include <linux/types.h> +#include <linux/threads.h> +#include <linux/atomic.h> +#include <linux/cpumask.h> + +#include <asm/page.h> + +#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) +#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ + IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) +#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) + +/* + * The per task VMA cache array: + */ +#define VMACACHE_BITS 2 +#define VMACACHE_SIZE (1U << VMACACHE_BITS) +#define VMACACHE_MASK (VMACACHE_SIZE - 1) + +struct vmacache { + u32 seqnum; + struct vm_area_struct *vmas[VMACACHE_SIZE]; +}; + +enum { + MM_FILEPAGES, /* Resident file mapping pages */ + MM_ANONPAGES, /* Resident anonymous pages */ + MM_SWAPENTS, /* Anonymous swap entries */ + MM_SHMEMPAGES, /* Resident shared memory pages */ + NR_MM_COUNTERS +}; + +#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU) +#define SPLIT_RSS_COUNTING +/* per-thread cached information, */ +struct task_rss_stat { + int events; /* for synchronization threshold */ + int count[NR_MM_COUNTERS]; +}; +#endif /* USE_SPLIT_PTE_PTLOCKS */ + +struct mm_rss_stat { + atomic_long_t count[NR_MM_COUNTERS]; +}; + +struct page_frag { + struct page *page; +#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) + __u32 offset; + __u32 size; +#else + __u16 offset; + __u16 size; +#endif +}; + +/* Track pages that require TLB flushes */ +struct tlbflush_unmap_batch { +#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + /* + * Each bit set is a CPU that potentially has a TLB entry for one of + * the PFNs being flushed. See set_tlb_ubc_flush_pending(). + */ + struct cpumask cpumask; + + /* True if any bit in cpumask is set */ + bool flush_required; + + /* + * If true then the PTE was dirty when unmapped. The entry must be + * flushed before IO is initiated or a stale TLB entry potentially + * allows an update without redirtying the page. + */ + bool writable; +#endif +}; + +#endif /* _LINUX_MM_TYPES_TASK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f40f0ab3847a..97456b2539e4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -330,6 +330,7 @@ struct napi_struct { enum { NAPI_STATE_SCHED, /* Poll is scheduled */ + NAPI_STATE_MISSED, /* reschedule a napi */ NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ @@ -338,12 +339,13 @@ enum { }; enum { - NAPIF_STATE_SCHED = (1UL << NAPI_STATE_SCHED), - NAPIF_STATE_DISABLE = (1UL << NAPI_STATE_DISABLE), - NAPIF_STATE_NPSVC = (1UL << NAPI_STATE_NPSVC), - NAPIF_STATE_HASHED = (1UL << NAPI_STATE_HASHED), - NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL), - NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL), + NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED), + NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), + NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), + NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), + NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED), + NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), + NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), }; enum gro_result { @@ -414,20 +416,7 @@ static inline bool napi_disable_pending(struct napi_struct *n) return test_bit(NAPI_STATE_DISABLE, &n->state); } -/** - * napi_schedule_prep - check if NAPI can be scheduled - * @n: NAPI context - * - * Test if NAPI routine is already running, and if not mark - * it as running. This is used as a condition variable to - * insure only one NAPI poll instance runs. We also make - * sure there is no pending NAPI disable. - */ -static inline bool napi_schedule_prep(struct napi_struct *n) -{ - return !napi_disable_pending(n) && - !test_and_set_bit(NAPI_STATE_SCHED, &n->state); -} +bool napi_schedule_prep(struct napi_struct *n); /** * napi_schedule - schedule NAPI poll diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f1da8c8dd473..287f34161086 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -335,7 +335,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr); -extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int nfs_getattr(const struct path *, struct kstat *, u32, unsigned int); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct inode *, int); diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 0a3fadc32693..aa3cd0878270 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -7,6 +7,43 @@ #include <linux/sched.h> #include <asm/irq.h> +#ifdef CONFIG_LOCKUP_DETECTOR +extern void touch_softlockup_watchdog_sched(void); +extern void touch_softlockup_watchdog(void); +extern void touch_softlockup_watchdog_sync(void); +extern void touch_all_softlockup_watchdogs(void); +extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); +extern unsigned int softlockup_panic; +extern unsigned int hardlockup_panic; +void lockup_detector_init(void); +#else +static inline void touch_softlockup_watchdog_sched(void) +{ +} +static inline void touch_softlockup_watchdog(void) +{ +} +static inline void touch_softlockup_watchdog_sync(void) +{ +} +static inline void touch_all_softlockup_watchdogs(void) +{ +} +static inline void lockup_detector_init(void) +{ +} +#endif + +#ifdef CONFIG_DETECT_HUNG_TASK +void reset_hung_task_detector(void); +#else +static inline void reset_hung_task_detector(void) +{ +} +#endif + /* * The run state of the lockup detectors is controlled by the content of the * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - diff --git a/include/linux/oom.h b/include/linux/oom.h index b4e36e92bc87..8a266e2be5a6 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -2,7 +2,7 @@ #define __INCLUDE_LINUX_OOM_H -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/types.h> #include <linux/nodemask.h> #include <uapi/linux/oom.h> diff --git a/include/linux/pci.h b/include/linux/pci.h index 282ed32244ce..eb3da1a04e6c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1323,6 +1323,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); +int pci_irq_get_node(struct pci_dev *pdev, int vec); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } @@ -1370,6 +1371,11 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, { return cpu_possible_mask; } + +static inline int pci_irq_get_node(struct pci_dev *pdev, int vec) +{ + return first_online_node; +} #endif static inline int diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index a5f98d53d732..9b7dd59fe28d 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -1,6 +1,8 @@ #ifndef _LINUX_PERF_REGS_H #define _LINUX_PERF_REGS_H +#include <linux/sched/task_stack.h> + struct perf_regs { __u64 abi; struct pt_regs *regs; diff --git a/include/linux/pid.h b/include/linux/pid.h index 298ead5512e5..4d179316e431 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -1,7 +1,7 @@ #ifndef _LINUX_PID_H #define _LINUX_PID_H -#include <linux/rcupdate.h> +#include <linux/rculist.h> enum pid_type { diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index d4d34791e463..032b55909145 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -146,8 +146,6 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier); int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier); -int dev_pm_qos_add_global_notifier(struct notifier_block *notifier); -int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier); void dev_pm_qos_constraints_init(struct device *dev); void dev_pm_qos_constraints_destroy(struct device *dev); int dev_pm_qos_add_ancestor_request(struct device *dev, @@ -172,6 +170,12 @@ static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return dev->power.qos->flags_req->data.flr.flags; } + +static inline s32 dev_pm_qos_raw_read_value(struct device *dev) +{ + return IS_ERR_OR_NULL(dev->power.qos) ? + 0 : pm_qos_read_value(&dev->power.qos->resume_latency); +} #else static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask) @@ -199,12 +203,6 @@ static inline int dev_pm_qos_add_notifier(struct device *dev, static inline int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier) { return 0; } -static inline int dev_pm_qos_add_global_notifier( - struct notifier_block *notifier) - { return 0; } -static inline int dev_pm_qos_remove_global_notifier( - struct notifier_block *notifier) - { return 0; } static inline void dev_pm_qos_constraints_init(struct device *dev) { dev->power.power_state = PMSG_ON; @@ -236,6 +234,7 @@ static inline void dev_pm_qos_hide_latency_tolerance(struct device *dev) {} static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { return 0; } static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; } +static inline s32 dev_pm_qos_raw_read_value(struct device *dev) { return 0; } #endif #endif diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7eeceac52dea..cae461224948 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -55,6 +55,27 @@ /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 +#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) + +/* + * Disable preemption until the scheduler is running -- use an unconditional + * value so that it also works on !PREEMPT_COUNT kernels. + * + * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count(). + */ +#define INIT_PREEMPT_COUNT PREEMPT_OFFSET + +/* + * Initial preempt_count value; reflects the preempt_count schedule invariant + * which states that during context switches: + * + * preempt_count() == 2*PREEMPT_DISABLE_OFFSET + * + * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels. + * Note: See finish_task_switch(). + */ +#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) + /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ #include <asm/preempt.h> diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index e0e539321ab9..422bc2e4cb6a 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -3,6 +3,7 @@ #include <linux/compiler.h> /* For unlikely. */ #include <linux/sched.h> /* For struct task_struct. */ +#include <linux/sched/signal.h> /* For send_sig(), same_thread_group(), etc. */ #include <linux/err.h> /* for IS_ERR_VALUE */ #include <linux/bug.h> /* For BUG_ON. */ #include <linux/pid_namespace.h> /* For task_active_pid_ns. */ diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h new file mode 100644 index 000000000000..d60d4e278609 --- /dev/null +++ b/include/linux/purgatory.h @@ -0,0 +1,23 @@ +#ifndef _LINUX_PURGATORY_H +#define _LINUX_PURGATORY_H + +#include <linux/types.h> +#include <crypto/sha.h> +#include <uapi/linux/kexec.h> + +struct kexec_sha_region { + unsigned long start; + unsigned long len; +}; + +/* + * These forward declarations serve two purposes: + * + * 1) Make sparse happy when checking arch/purgatory + * 2) Document that these are required to be global so the symbol + * lookup in kexec works + */ +extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; +extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; + +#endif diff --git a/include/linux/random.h b/include/linux/random.h index 7bd2403e4fef..ed5c3838780d 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -37,14 +37,26 @@ extern void get_random_bytes(void *buf, int nbytes); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern void get_random_bytes_arch(void *buf, int nbytes); -extern int random_int_secret_init(void); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; #endif -unsigned int get_random_int(void); -unsigned long get_random_long(void); +u32 get_random_u32(void); +u64 get_random_u64(void); +static inline unsigned int get_random_int(void) +{ + return get_random_u32(); +} +static inline unsigned long get_random_long(void) +{ +#if BITS_PER_LONG == 64 + return get_random_u64(); +#else + return get_random_u32(); +#endif +} + unsigned long randomize_page(unsigned long start, unsigned long range); u32 prandom_u32(void); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6ade6a52d9d4..de88b33c0974 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -40,7 +40,6 @@ #include <linux/cpumask.h> #include <linux/seqlock.h> #include <linux/lockdep.h> -#include <linux/completion.h> #include <linux/debugobjects.h> #include <linux/bug.h> #include <linux/compiler.h> @@ -226,45 +225,6 @@ void call_rcu_sched(struct rcu_head *head, void synchronize_sched(void); -/* - * Structure allowing asynchronous waiting on RCU. - */ -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; -void wakeme_after_rcu(struct rcu_head *head); - -void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, - struct rcu_synchronize *rs_array); - -#define _wait_rcu_gp(checktiny, ...) \ -do { \ - call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ - struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ - __wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \ - __crcu_array, __rs_array); \ -} while (0) - -#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) - -/** - * synchronize_rcu_mult - Wait concurrently for multiple grace periods - * @...: List of call_rcu() functions for the flavors to wait on. - * - * This macro waits concurrently for multiple flavors of RCU grace periods. - * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait - * on concurrent RCU and RCU-bh grace periods. Waiting on a give SRCU - * domain requires you to write a wrapper function for that SRCU domain's - * call_srcu() function, supplying the corresponding srcu_struct. - * - * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU - * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called - * is automatically a grace period. - */ -#define synchronize_rcu_mult(...) \ - _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) - /** * call_rcu_tasks() - Queue an RCU for invocation task-based grace period * @head: structure to be used for queueing the RCU updates. diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h new file mode 100644 index 000000000000..e774b4f5f220 --- /dev/null +++ b/include/linux/rcupdate_wait.h @@ -0,0 +1,50 @@ +#ifndef _LINUX_SCHED_RCUPDATE_WAIT_H +#define _LINUX_SCHED_RCUPDATE_WAIT_H + +/* + * RCU synchronization types and methods: + */ + +#include <linux/rcupdate.h> +#include <linux/completion.h> + +/* + * Structure allowing asynchronous waiting on RCU. + */ +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; +void wakeme_after_rcu(struct rcu_head *head); + +void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, + struct rcu_synchronize *rs_array); + +#define _wait_rcu_gp(checktiny, ...) \ +do { \ + call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ + struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ + __wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \ + __crcu_array, __rs_array); \ +} while (0) + +#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) + +/** + * synchronize_rcu_mult - Wait concurrently for multiple grace periods + * @...: List of call_rcu() functions for the flavors to wait on. + * + * This macro waits concurrently for multiple flavors of RCU grace periods. + * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait + * on concurrent RCU and RCU-bh grace periods. Waiting on a give SRCU + * domain requires you to write a wrapper function for that SRCU domain's + * call_srcu() function, supplying the corresponding srcu_struct. + * + * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU + * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called + * is automatically a grace period. + */ +#define synchronize_rcu_mult(...) \ + _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) + +#endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 4f9b2fa2173d..b452953e21c8 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -53,15 +53,8 @@ static inline void cond_synchronize_sched(unsigned long oldstate) might_sleep(); } -static inline void rcu_barrier_bh(void) -{ - wait_rcu_gp(call_rcu_bh); -} - -static inline void rcu_barrier_sched(void) -{ - wait_rcu_gp(call_rcu_sched); -} +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); static inline void synchronize_rcu_expedited(void) { diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index ad3e5158e586..c9f795e9a2ee 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -65,7 +65,7 @@ struct regulator_state { int uV; /* suspend voltage */ unsigned int mode; /* suspend regulator operating mode */ int enabled; /* is regulator enabled in this suspend state */ - int disabled; /* is the regulator disbled in this suspend state */ + int disabled; /* is the regulator disabled in this suspend state */ }; /** diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f2e12a845910..092292b6675e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -25,7 +25,7 @@ #include <linux/list_nulls.h> #include <linux/workqueue.h> #include <linux/mutex.h> -#include <linux/rcupdate.h> +#include <linux/rculist.h> /* * The end of the chain is marked with a special nulls marks which has diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a28deb5f210..d67eee84fd43 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1,197 +1,57 @@ #ifndef _LINUX_SCHED_H #define _LINUX_SCHED_H -#include <uapi/linux/sched.h> - -#include <linux/sched/prio.h> - - -struct sched_param { - int sched_priority; -}; - -#include <asm/param.h> /* for HZ */ +/* + * Define 'struct task_struct' and provide the main scheduler + * APIs (schedule(), wakeup variants, etc.) + */ -#include <linux/capability.h> -#include <linux/threads.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/timex.h> -#include <linux/jiffies.h> -#include <linux/plist.h> -#include <linux/rbtree.h> -#include <linux/thread_info.h> -#include <linux/cpumask.h> -#include <linux/errno.h> -#include <linux/nodemask.h> -#include <linux/mm_types.h> -#include <linux/preempt.h> +#include <uapi/linux/sched.h> -#include <asm/page.h> -#include <asm/ptrace.h> +#include <asm/current.h> -#include <linux/smp.h> +#include <linux/pid.h> #include <linux/sem.h> #include <linux/shm.h> -#include <linux/signal.h> -#include <linux/compiler.h> -#include <linux/completion.h> -#include <linux/pid.h> -#include <linux/percpu.h> -#include <linux/topology.h> +#include <linux/kcov.h> +#include <linux/mutex.h> +#include <linux/plist.h> +#include <linux/hrtimer.h> #include <linux/seccomp.h> +#include <linux/nodemask.h> #include <linux/rcupdate.h> -#include <linux/rculist.h> -#include <linux/rtmutex.h> - -#include <linux/time.h> -#include <linux/param.h> #include <linux/resource.h> -#include <linux/timer.h> -#include <linux/hrtimer.h> -#include <linux/kcov.h> -#include <linux/task_io_accounting.h> #include <linux/latencytop.h> -#include <linux/cred.h> -#include <linux/llist.h> -#include <linux/uidgid.h> -#include <linux/gfp.h> -#include <linux/magic.h> -#include <linux/cgroup-defs.h> - -#include <asm/processor.h> - -#define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */ - -/* - * Extended scheduling parameters data structure. - * - * This is needed because the original struct sched_param can not be - * altered without introducing ABI issues with legacy applications - * (e.g., in sched_getparam()). - * - * However, the possibility of specifying more than just a priority for - * the tasks may be useful for a wide variety of application fields, e.g., - * multimedia, streaming, automation and control, and many others. - * - * This variant (sched_attr) is meant at describing a so-called - * sporadic time-constrained task. In such model a task is specified by: - * - the activation period or minimum instance inter-arrival time; - * - the maximum (or average, depending on the actual scheduling - * discipline) computation time of all instances, a.k.a. runtime; - * - the deadline (relative to the actual activation time) of each - * instance. - * Very briefly, a periodic (sporadic) task asks for the execution of - * some specific computation --which is typically called an instance-- - * (at most) every period. Moreover, each instance typically lasts no more - * than the runtime and must be completed by time instant t equal to - * the instance activation time + the deadline. - * - * This is reflected by the actual fields of the sched_attr structure: - * - * @size size of the structure, for fwd/bwd compat. - * - * @sched_policy task's scheduling policy - * @sched_flags for customizing the scheduler behaviour - * @sched_nice task's nice value (SCHED_NORMAL/BATCH) - * @sched_priority task's static priority (SCHED_FIFO/RR) - * @sched_deadline representative of the task's deadline - * @sched_runtime representative of the task's runtime - * @sched_period representative of the task's period - * - * Given this task model, there are a multiplicity of scheduling algorithms - * and policies, that can be used to ensure all the tasks will make their - * timing constraints. - * - * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the - * only user of this new interface. More information about the algorithm - * available in the scheduling class file or in Documentation/. - */ -struct sched_attr { - u32 size; - - u32 sched_policy; - u64 sched_flags; - - /* SCHED_NORMAL, SCHED_BATCH */ - s32 sched_nice; - - /* SCHED_FIFO, SCHED_RR */ - u32 sched_priority; - - /* SCHED_DEADLINE */ - u64 sched_runtime; - u64 sched_deadline; - u64 sched_period; -}; +#include <linux/sched/prio.h> +#include <linux/signal_types.h> +#include <linux/mm_types_task.h> +#include <linux/task_io_accounting.h> -struct futex_pi_state; -struct robust_list_head; +/* task_struct member predeclarations (sorted alphabetically): */ +struct audit_context; +struct backing_dev_info; struct bio_list; -struct fs_struct; -struct perf_event_context; struct blk_plug; -struct filename; +struct cfs_rq; +struct fs_struct; +struct futex_pi_state; +struct io_context; +struct mempolicy; struct nameidata; - -#define VMACACHE_BITS 2 -#define VMACACHE_SIZE (1U << VMACACHE_BITS) -#define VMACACHE_MASK (VMACACHE_SIZE - 1) - -/* - * These are the constant used to fake the fixed-point load-average - * counting. Some notes: - * - 11 bit fractions expand to 22 bits by the multiplies: this gives - * a load-average precision of 10 bits integer + 11 bits fractional - * - if you want to count load-averages more often, you need more - * precision, or rounding will get you. With 2-second counting freq, - * the EXP_n values would be 1981, 2034 and 2043 if still using only - * 11 bit fractions. - */ -extern unsigned long avenrun[]; /* Load averages */ -extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); - -#define FSHIFT 11 /* nr of bits of precision */ -#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ -#define LOAD_FREQ (5*HZ+1) /* 5 sec intervals */ -#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */ -#define EXP_5 2014 /* 1/exp(5sec/5min) */ -#define EXP_15 2037 /* 1/exp(5sec/15min) */ - -#define CALC_LOAD(load,exp,n) \ - load *= exp; \ - load += n*(FIXED_1-exp); \ - load >>= FSHIFT; - -extern unsigned long total_forks; -extern int nr_threads; -DECLARE_PER_CPU(unsigned long, process_counts); -extern int nr_processes(void); -extern unsigned long nr_running(void); -extern bool single_task_running(void); -extern unsigned long nr_iowait(void); -extern unsigned long nr_iowait_cpu(int cpu); -extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); - -extern void calc_global_load(unsigned long ticks); - -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void cpu_load_update_nohz_start(void); -extern void cpu_load_update_nohz_stop(void); -#else -static inline void cpu_load_update_nohz_start(void) { } -static inline void cpu_load_update_nohz_stop(void) { } -#endif - -extern void dump_cpu_task(int cpu); - +struct nsproxy; +struct perf_event_context; +struct pid_namespace; +struct pipe_inode_info; +struct rcu_node; +struct reclaim_state; +struct robust_list_head; +struct sched_attr; +struct sched_param; struct seq_file; -struct cfs_rq; +struct sighand_struct; +struct signal_struct; +struct task_delay_info; struct task_group; -#ifdef CONFIG_SCHED_DEBUG -extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); -extern void proc_sched_set_task(struct task_struct *p); -#endif /* * Task state bitmask. NOTE! These bits are also @@ -203,53 +63,53 @@ extern void proc_sched_set_task(struct task_struct *p); * modifying one set can't modify the other one by * mistake. */ -#define TASK_RUNNING 0 -#define TASK_INTERRUPTIBLE 1 -#define TASK_UNINTERRUPTIBLE 2 -#define __TASK_STOPPED 4 -#define __TASK_TRACED 8 -/* in tsk->exit_state */ -#define EXIT_DEAD 16 -#define EXIT_ZOMBIE 32 -#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) -/* in tsk->state again */ -#define TASK_DEAD 64 -#define TASK_WAKEKILL 128 -#define TASK_WAKING 256 -#define TASK_PARKED 512 -#define TASK_NOLOAD 1024 -#define TASK_NEW 2048 -#define TASK_STATE_MAX 4096 - -#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" - -extern char ___assert_task_state[1 - 2*!!( - sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; - -/* Convenience macros for the sake of set_current_state */ -#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) -#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) -#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) - -#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) - -/* Convenience macros for the sake of wake_up */ -#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) -#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) - -/* get_task_state() */ -#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ - TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ - __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) - -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) -#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) -#define task_is_stopped_or_traced(task) \ - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) -#define task_contributes_to_load(task) \ - ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FROZEN) == 0 && \ - (task->state & TASK_NOLOAD) == 0) + +/* Used in tsk->state: */ +#define TASK_RUNNING 0 +#define TASK_INTERRUPTIBLE 1 +#define TASK_UNINTERRUPTIBLE 2 +#define __TASK_STOPPED 4 +#define __TASK_TRACED 8 +/* Used in tsk->exit_state: */ +#define EXIT_DEAD 16 +#define EXIT_ZOMBIE 32 +#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) +/* Used in tsk->state again: */ +#define TASK_DEAD 64 +#define TASK_WAKEKILL 128 +#define TASK_WAKING 256 +#define TASK_PARKED 512 +#define TASK_NOLOAD 1024 +#define TASK_NEW 2048 +#define TASK_STATE_MAX 4096 + +#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" + +/* Convenience macros for the sake of set_current_state: */ +#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) +#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) +#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) + +#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) + +/* Convenience macros for the sake of wake_up(): */ +#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) +#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) + +/* get_task_state(): */ +#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ + TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ + __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) + +#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) + +#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) + +#define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) + +#define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ + (task->flags & PF_FROZEN) == 0 && \ + (task->state & TASK_NOLOAD) == 0) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP @@ -299,139 +159,24 @@ extern char ___assert_task_state[1 - 2*!!( * * Also see the comments of try_to_wake_up(). */ -#define __set_current_state(state_value) \ - do { current->state = (state_value); } while (0) -#define set_current_state(state_value) \ - smp_store_mb(current->state, (state_value)) - +#define __set_current_state(state_value) do { current->state = (state_value); } while (0) +#define set_current_state(state_value) smp_store_mb(current->state, (state_value)) #endif -/* Task command name length */ -#define TASK_COMM_LEN 16 - -#include <linux/spinlock.h> - -/* - * This serializes "schedule()" and also protects - * the run-queue from deletions/modifications (but - * _adding_ to the beginning of the run-queue has - * a separate lock). - */ -extern rwlock_t tasklist_lock; -extern spinlock_t mmlist_lock; - -struct task_struct; - -#ifdef CONFIG_PROVE_RCU -extern int lockdep_tasklist_lock_is_held(void); -#endif /* #ifdef CONFIG_PROVE_RCU */ - -extern void sched_init(void); -extern void sched_init_smp(void); -extern asmlinkage void schedule_tail(struct task_struct *prev); -extern void init_idle(struct task_struct *idle, int cpu); -extern void init_idle_bootup_task(struct task_struct *idle); - -extern cpumask_var_t cpu_isolated_map; - -extern int runqueue_is_locked(int cpu); - -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void nohz_balance_enter_idle(int cpu); -extern void set_cpu_sd_state_idle(void); -extern int get_nohz_timer_target(void); -#else -static inline void nohz_balance_enter_idle(int cpu) { } -static inline void set_cpu_sd_state_idle(void) { } -#endif - -/* - * Only dump TASK_* tasks. (0 for all tasks) - */ -extern void show_state_filter(unsigned long state_filter); - -static inline void show_state(void) -{ - show_state_filter(0); -} +/* Task command name length: */ +#define TASK_COMM_LEN 16 -extern void show_regs(struct pt_regs *); +extern cpumask_var_t cpu_isolated_map; -/* - * TASK is a pointer to the task whose backtrace we want to see (or NULL for current - * task), SP is the stack pointer of the first frame that should be shown in the back - * trace (or NULL if the entire call-chain of the task should be shown). - */ -extern void show_stack(struct task_struct *task, unsigned long *sp); - -extern void cpu_init (void); -extern void trap_init(void); -extern void update_process_times(int user); extern void scheduler_tick(void); -extern int sched_cpu_starting(unsigned int cpu); -extern int sched_cpu_activate(unsigned int cpu); -extern int sched_cpu_deactivate(unsigned int cpu); - -#ifdef CONFIG_HOTPLUG_CPU -extern int sched_cpu_dying(unsigned int cpu); -#else -# define sched_cpu_dying NULL -#endif - -extern void sched_show_task(struct task_struct *p); - -#ifdef CONFIG_LOCKUP_DETECTOR -extern void touch_softlockup_watchdog_sched(void); -extern void touch_softlockup_watchdog(void); -extern void touch_softlockup_watchdog_sync(void); -extern void touch_all_softlockup_watchdogs(void); -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); -extern unsigned int softlockup_panic; -extern unsigned int hardlockup_panic; -void lockup_detector_init(void); -#else -static inline void touch_softlockup_watchdog_sched(void) -{ -} -static inline void touch_softlockup_watchdog(void) -{ -} -static inline void touch_softlockup_watchdog_sync(void) -{ -} -static inline void touch_all_softlockup_watchdogs(void) -{ -} -static inline void lockup_detector_init(void) -{ -} -#endif - -#ifdef CONFIG_DETECT_HUNG_TASK -void reset_hung_task_detector(void); -#else -static inline void reset_hung_task_detector(void) -{ -} -#endif - -/* Attach to any functions which should be ignored in wchan output. */ -#define __sched __attribute__((__section__(".sched.text"))) -/* Linker adds these: start and end of __sched functions */ -extern char __sched_text_start[], __sched_text_end[]; +#define MAX_SCHEDULE_TIMEOUT LONG_MAX -/* Is this address in the __sched functions? */ -extern int in_sched_functions(unsigned long addr); - -#define MAX_SCHEDULE_TIMEOUT LONG_MAX -extern signed long schedule_timeout(signed long timeout); -extern signed long schedule_timeout_interruptible(signed long timeout); -extern signed long schedule_timeout_killable(signed long timeout); -extern signed long schedule_timeout_uninterruptible(signed long timeout); -extern signed long schedule_timeout_idle(signed long timeout); +extern long schedule_timeout(long timeout); +extern long schedule_timeout_interruptible(long timeout); +extern long schedule_timeout_killable(long timeout); +extern long schedule_timeout_uninterruptible(long timeout); +extern long schedule_timeout_idle(long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); @@ -440,112 +185,6 @@ extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); -void __noreturn do_task_dead(void); - -struct nsproxy; -struct user_namespace; - -#ifdef CONFIG_MMU -extern void arch_pick_mmap_layout(struct mm_struct *mm); -extern unsigned long -arch_get_unmapped_area(struct file *, unsigned long, unsigned long, - unsigned long, unsigned long); -extern unsigned long -arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); -#else -static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} -#endif - -#define SUID_DUMP_DISABLE 0 /* No setuid dumping */ -#define SUID_DUMP_USER 1 /* Dump as user of process */ -#define SUID_DUMP_ROOT 2 /* Dump as root */ - -/* mm flags */ - -/* for SUID_DUMP_* above */ -#define MMF_DUMPABLE_BITS 2 -#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) - -extern void set_dumpable(struct mm_struct *mm, int value); -/* - * This returns the actual value of the suid_dumpable flag. For things - * that are using this for checking for privilege transitions, it must - * test against SUID_DUMP_USER rather than treating it as a boolean - * value. - */ -static inline int __get_dumpable(unsigned long mm_flags) -{ - return mm_flags & MMF_DUMPABLE_MASK; -} - -static inline int get_dumpable(struct mm_struct *mm) -{ - return __get_dumpable(mm->flags); -} - -/* coredump filter bits */ -#define MMF_DUMP_ANON_PRIVATE 2 -#define MMF_DUMP_ANON_SHARED 3 -#define MMF_DUMP_MAPPED_PRIVATE 4 -#define MMF_DUMP_MAPPED_SHARED 5 -#define MMF_DUMP_ELF_HEADERS 6 -#define MMF_DUMP_HUGETLB_PRIVATE 7 -#define MMF_DUMP_HUGETLB_SHARED 8 -#define MMF_DUMP_DAX_PRIVATE 9 -#define MMF_DUMP_DAX_SHARED 10 - -#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 9 -#define MMF_DUMP_FILTER_MASK \ - (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) -#define MMF_DUMP_FILTER_DEFAULT \ - ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ - (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) - -#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS -# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) -#else -# define MMF_DUMP_MASK_DEFAULT_ELF 0 -#endif - /* leave room for more dump flags */ -#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ -#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ -/* - * This one-shot flag is dropped due to necessity of changing exe once again - * on NFS restore - */ -//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ - -#define MMF_HAS_UPROBES 19 /* has uprobes */ -#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ -#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ -#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ -#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ - -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) - -struct sighand_struct { - atomic_t count; - struct k_sigaction action[_NSIG]; - spinlock_t siglock; - wait_queue_head_t signalfd_wqh; -}; - -struct pacct_struct { - int ac_flag; - long ac_exitcode; - unsigned long ac_mem; - u64 ac_utime, ac_stime; - unsigned long ac_minflt, ac_majflt; -}; - -struct cpu_itimer { - u64 expires; - u64 incr; -}; - /** * struct prev_cputime - snaphsot of system and user cputime * @utime: time spent in user mode @@ -557,20 +196,12 @@ struct cpu_itimer { */ struct prev_cputime { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - u64 utime; - u64 stime; - raw_spinlock_t lock; + u64 utime; + u64 stime; + raw_spinlock_t lock; #endif }; -static inline void prev_cputime_init(struct prev_cputime *prev) -{ -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - prev->utime = prev->stime = 0; - raw_spin_lock_init(&prev->lock); -#endif -} - /** * struct task_cputime - collected CPU time counts * @utime: time spent in user mode, in nanoseconds @@ -582,376 +213,35 @@ static inline void prev_cputime_init(struct prev_cputime *prev) * these counts together and treat all three of them in parallel. */ struct task_cputime { - u64 utime; - u64 stime; - unsigned long long sum_exec_runtime; + u64 utime; + u64 stime; + unsigned long long sum_exec_runtime; }; -/* Alternate field names when used to cache expirations. */ -#define virt_exp utime -#define prof_exp stime -#define sched_exp sum_exec_runtime - -/* - * This is the atomic variant of task_cputime, which can be used for - * storing and updating task_cputime statistics without locking. - */ -struct task_cputime_atomic { - atomic64_t utime; - atomic64_t stime; - atomic64_t sum_exec_runtime; -}; - -#define INIT_CPUTIME_ATOMIC \ - (struct task_cputime_atomic) { \ - .utime = ATOMIC64_INIT(0), \ - .stime = ATOMIC64_INIT(0), \ - .sum_exec_runtime = ATOMIC64_INIT(0), \ - } - -#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) - -/* - * Disable preemption until the scheduler is running -- use an unconditional - * value so that it also works on !PREEMPT_COUNT kernels. - * - * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count(). - */ -#define INIT_PREEMPT_COUNT PREEMPT_OFFSET - -/* - * Initial preempt_count value; reflects the preempt_count schedule invariant - * which states that during context switches: - * - * preempt_count() == 2*PREEMPT_DISABLE_OFFSET - * - * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels. - * Note: See finish_task_switch(). - */ -#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) - -/** - * struct thread_group_cputimer - thread group interval timer counts - * @cputime_atomic: atomic thread group interval timers. - * @running: true when there are timers running and - * @cputime_atomic receives updates. - * @checking_timer: true when a thread in the group is in the - * process of checking for thread group timers. - * - * This structure contains the version of task_cputime, above, that is - * used for thread group CPU timer calculations. - */ -struct thread_group_cputimer { - struct task_cputime_atomic cputime_atomic; - bool running; - bool checking_timer; -}; - -#include <linux/rwsem.h> -struct autogroup; - -/* - * NOTE! "signal_struct" does not have its own - * locking, because a shared signal_struct always - * implies a shared sighand_struct, so locking - * sighand_struct is always a proper superset of - * the locking of signal_struct. - */ -struct signal_struct { - atomic_t sigcnt; - atomic_t live; - int nr_threads; - struct list_head thread_head; - - wait_queue_head_t wait_chldexit; /* for wait4() */ - - /* current thread group signal load-balancing target: */ - struct task_struct *curr_target; - - /* shared signal handling: */ - struct sigpending shared_pending; - - /* thread group exit support */ - int group_exit_code; - /* overloaded: - * - notify group_exit_task when ->count is equal to notify_count - * - everyone except group_exit_task is stopped during signal delivery - * of fatal signals, group_exit_task processes the signal. - */ - int notify_count; - struct task_struct *group_exit_task; - - /* thread group stop support, overloads group_exit_code too */ - int group_stop_count; - unsigned int flags; /* see SIGNAL_* flags below */ - - /* - * PR_SET_CHILD_SUBREAPER marks a process, like a service - * manager, to re-parent orphan (double-forking) child processes - * to this process instead of 'init'. The service manager is - * able to receive SIGCHLD signals and is able to investigate - * the process until it calls wait(). All children of this - * process will inherit a flag if they should look for a - * child_subreaper process at exit. - */ - unsigned int is_child_subreaper:1; - unsigned int has_child_subreaper:1; - -#ifdef CONFIG_POSIX_TIMERS - - /* POSIX.1b Interval Timers */ - int posix_timer_id; - struct list_head posix_timers; - - /* ITIMER_REAL timer for the process */ - struct hrtimer real_timer; - ktime_t it_real_incr; - - /* - * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use - * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these - * values are defined to 0 and 1 respectively - */ - struct cpu_itimer it[2]; - - /* - * Thread group totals for process CPU timers. - * See thread_group_cputimer(), et al, for details. - */ - struct thread_group_cputimer cputimer; - - /* Earliest-expiration cache. */ - struct task_cputime cputime_expires; - - struct list_head cpu_timers[3]; - -#endif - - struct pid *leader_pid; - -#ifdef CONFIG_NO_HZ_FULL - atomic_t tick_dep_mask; -#endif - - struct pid *tty_old_pgrp; - - /* boolean value for session group leader */ - int leader; - - struct tty_struct *tty; /* NULL if no tty */ - -#ifdef CONFIG_SCHED_AUTOGROUP - struct autogroup *autogroup; -#endif - /* - * Cumulative resource counters for dead threads in the group, - * and for reaped dead child processes forked by this group. - * Live threads maintain their own counters and add to these - * in __exit_signal, except for the group leader. - */ - seqlock_t stats_lock; - u64 utime, stime, cutime, cstime; - u64 gtime; - u64 cgtime; - struct prev_cputime prev_cputime; - unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; - unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; - unsigned long inblock, oublock, cinblock, coublock; - unsigned long maxrss, cmaxrss; - struct task_io_accounting ioac; - - /* - * Cumulative ns of schedule CPU time fo dead threads in the - * group, not including a zombie group leader, (This only differs - * from jiffies_to_ns(utime + stime) if sched_clock uses something - * other than jiffies.) - */ - unsigned long long sum_sched_runtime; - - /* - * We don't bother to synchronize most readers of this at all, - * because there is no reader checking a limit that actually needs - * to get both rlim_cur and rlim_max atomically, and either one - * alone is a single word that can safely be read normally. - * getrlimit/setrlimit use task_lock(current->group_leader) to - * protect this instead of the siglock, because they really - * have no need to disable irqs. - */ - struct rlimit rlim[RLIM_NLIMITS]; - -#ifdef CONFIG_BSD_PROCESS_ACCT - struct pacct_struct pacct; /* per-process accounting information */ -#endif -#ifdef CONFIG_TASKSTATS - struct taskstats *stats; -#endif -#ifdef CONFIG_AUDIT - unsigned audit_tty; - struct tty_audit_buf *tty_audit_buf; -#endif - - /* - * Thread is the potential origin of an oom condition; kill first on - * oom - */ - bool oom_flag_origin; - short oom_score_adj; /* OOM kill score adjustment */ - short oom_score_adj_min; /* OOM kill score adjustment min value. - * Only settable by CAP_SYS_RESOURCE. */ - struct mm_struct *oom_mm; /* recorded mm when the thread group got - * killed by the oom killer */ - - struct mutex cred_guard_mutex; /* guard against foreign influences on - * credential calculations - * (notably. ptrace) */ -}; - -/* - * Bits in flags field of signal_struct. - */ -#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ -#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ -#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ -#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ -/* - * Pending notifications to parent. - */ -#define SIGNAL_CLD_STOPPED 0x00000010 -#define SIGNAL_CLD_CONTINUED 0x00000020 -#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) - -#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ - -#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ - SIGNAL_STOP_CONTINUED) - -static inline void signal_set_stop_flags(struct signal_struct *sig, - unsigned int flags) -{ - WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); - sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; -} - -/* If true, all threads except ->group_exit_task have pending SIGKILL */ -static inline int signal_group_exit(const struct signal_struct *sig) -{ - return (sig->flags & SIGNAL_GROUP_EXIT) || - (sig->group_exit_task != NULL); -} - -/* - * Some day this will be a full-fledged user tracking system.. - */ -struct user_struct { - atomic_t __count; /* reference count */ - atomic_t processes; /* How many processes does this user have? */ - atomic_t sigpending; /* How many pending signals does this user have? */ -#ifdef CONFIG_FANOTIFY - atomic_t fanotify_listeners; -#endif -#ifdef CONFIG_EPOLL - atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ -#endif -#ifdef CONFIG_POSIX_MQUEUE - /* protected by mq_lock */ - unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ -#endif - unsigned long locked_shm; /* How many pages of mlocked shm ? */ - unsigned long unix_inflight; /* How many files in flight in unix sockets */ - atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ - -#ifdef CONFIG_KEYS - struct key *uid_keyring; /* UID specific keyring */ - struct key *session_keyring; /* UID's default session keyring */ -#endif - - /* Hash table maintenance information */ - struct hlist_node uidhash_node; - kuid_t uid; - -#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) - atomic_long_t locked_vm; -#endif -}; - -extern int uids_sysfs_init(void); - -extern struct user_struct *find_user(kuid_t); - -extern struct user_struct root_user; -#define INIT_USER (&root_user) - - -struct backing_dev_info; -struct reclaim_state; +/* Alternate field names when used on cache expirations: */ +#define virt_exp utime +#define prof_exp stime +#define sched_exp sum_exec_runtime -#ifdef CONFIG_SCHED_INFO struct sched_info { - /* cumulative counters */ - unsigned long pcount; /* # of times run on this cpu */ - unsigned long long run_delay; /* time spent waiting on a runqueue */ +#ifdef CONFIG_SCHED_INFO + /* Cumulative counters: */ - /* timestamps */ - unsigned long long last_arrival,/* when we last ran on a cpu */ - last_queued; /* when we were last queued to run */ -}; -#endif /* CONFIG_SCHED_INFO */ + /* # of times we have run on this CPU: */ + unsigned long pcount; -#ifdef CONFIG_TASK_DELAY_ACCT -struct task_delay_info { - spinlock_t lock; - unsigned int flags; /* Private per-task flags */ + /* Time spent waiting on a runqueue: */ + unsigned long long run_delay; - /* For each stat XXX, add following, aligned appropriately - * - * struct timespec XXX_start, XXX_end; - * u64 XXX_delay; - * u32 XXX_count; - * - * Atomicity of updates to XXX_delay, XXX_count protected by - * single lock above (split into XXX_lock if contention is an issue). - */ + /* Timestamps: */ - /* - * XXX_count is incremented on every XXX operation, the delay - * associated with the operation is added to XXX_delay. - * XXX_delay contains the accumulated delay time in nanoseconds. - */ - u64 blkio_start; /* Shared by blkio, swapin */ - u64 blkio_delay; /* wait for sync block io completion */ - u64 swapin_delay; /* wait for swapin block io completion */ - u32 blkio_count; /* total count of the number of sync block */ - /* io operations performed */ - u32 swapin_count; /* total count of the number of swapin block */ - /* io operations performed */ - - u64 freepages_start; - u64 freepages_delay; /* wait for memory reclaim */ - u32 freepages_count; /* total count of memory reclaim */ -}; -#endif /* CONFIG_TASK_DELAY_ACCT */ + /* When did we last run on a CPU? */ + unsigned long long last_arrival; -static inline int sched_info_on(void) -{ -#ifdef CONFIG_SCHEDSTATS - return 1; -#elif defined(CONFIG_TASK_DELAY_ACCT) - extern int delayacct_on; - return delayacct_on; -#else - return 0; -#endif -} - -#ifdef CONFIG_SCHEDSTATS -void force_schedstat_enabled(void); -#endif + /* When were we last queued to run? */ + unsigned long long last_queued; -enum cpu_idle_type { - CPU_IDLE, - CPU_NOT_IDLE, - CPU_NEWLY_IDLE, - CPU_MAX_IDLE_TYPES +#endif /* CONFIG_SCHED_INFO */ }; /* @@ -961,290 +251,12 @@ enum cpu_idle_type { * We define a basic fixed point arithmetic range, and then formalize * all these metrics based on that basic range. */ -# define SCHED_FIXEDPOINT_SHIFT 10 -# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) - -/* - * Increase resolution of cpu_capacity calculations - */ -#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT -#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) - -/* - * Wake-queues are lists of tasks with a pending wakeup, whose - * callers have already marked the task as woken internally, - * and can thus carry on. A common use case is being able to - * do the wakeups once the corresponding user lock as been - * released. - * - * We hold reference to each task in the list across the wakeup, - * thus guaranteeing that the memory is still valid by the time - * the actual wakeups are performed in wake_up_q(). - * - * One per task suffices, because there's never a need for a task to be - * in two wake queues simultaneously; it is forbidden to abandon a task - * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is - * already in a wake queue, the wakeup will happen soon and the second - * waker can just skip it. - * - * The DEFINE_WAKE_Q macro declares and initializes the list head. - * wake_up_q() does NOT reinitialize the list; it's expected to be - * called near the end of a function. Otherwise, the list can be - * re-initialized for later re-use by wake_q_init(). - * - * Note that this can cause spurious wakeups. schedule() callers - * must ensure the call is done inside a loop, confirming that the - * wakeup condition has in fact occurred. - */ -struct wake_q_node { - struct wake_q_node *next; -}; - -struct wake_q_head { - struct wake_q_node *first; - struct wake_q_node **lastp; -}; - -#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) - -#define DEFINE_WAKE_Q(name) \ - struct wake_q_head name = { WAKE_Q_TAIL, &name.first } - -static inline void wake_q_init(struct wake_q_head *head) -{ - head->first = WAKE_Q_TAIL; - head->lastp = &head->first; -} - -extern void wake_q_add(struct wake_q_head *head, - struct task_struct *task); -extern void wake_up_q(struct wake_q_head *head); - -/* - * sched-domains (multiprocessor balancing) declarations: - */ -#ifdef CONFIG_SMP -#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ -#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ -#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ -#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ -#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ -#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ -#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */ -#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */ -#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ -#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ -#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ -#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ -#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ -#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ -#define SD_NUMA 0x4000 /* cross-node balancing */ - -#ifdef CONFIG_SCHED_SMT -static inline int cpu_smt_flags(void) -{ - return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; -} -#endif - -#ifdef CONFIG_SCHED_MC -static inline int cpu_core_flags(void) -{ - return SD_SHARE_PKG_RESOURCES; -} -#endif - -#ifdef CONFIG_NUMA -static inline int cpu_numa_flags(void) -{ - return SD_NUMA; -} -#endif - -extern int arch_asym_cpu_priority(int cpu); - -struct sched_domain_attr { - int relax_domain_level; -}; - -#define SD_ATTR_INIT (struct sched_domain_attr) { \ - .relax_domain_level = -1, \ -} - -extern int sched_domain_level_max; - -struct sched_group; - -struct sched_domain_shared { - atomic_t ref; - atomic_t nr_busy_cpus; - int has_idle_cores; -}; - -struct sched_domain { - /* These fields must be setup */ - struct sched_domain *parent; /* top domain must be null terminated */ - struct sched_domain *child; /* bottom domain must be null terminated */ - struct sched_group *groups; /* the balancing groups of the domain */ - unsigned long min_interval; /* Minimum balance interval ms */ - unsigned long max_interval; /* Maximum balance interval ms */ - unsigned int busy_factor; /* less balancing by factor if busy */ - unsigned int imbalance_pct; /* No balance until over watermark */ - unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ - unsigned int busy_idx; - unsigned int idle_idx; - unsigned int newidle_idx; - unsigned int wake_idx; - unsigned int forkexec_idx; - unsigned int smt_gain; - - int nohz_idle; /* NOHZ IDLE status */ - int flags; /* See SD_* */ - int level; - - /* Runtime fields. */ - unsigned long last_balance; /* init to jiffies. units in jiffies */ - unsigned int balance_interval; /* initialise to 1. units in ms. */ - unsigned int nr_balance_failed; /* initialise to 0 */ - - /* idle_balance() stats */ - u64 max_newidle_lb_cost; - unsigned long next_decay_max_lb_cost; - - u64 avg_scan_cost; /* select_idle_sibling */ - -#ifdef CONFIG_SCHEDSTATS - /* load_balance() stats */ - unsigned int lb_count[CPU_MAX_IDLE_TYPES]; - unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; - unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; - unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; - unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; - unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; - unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; - unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; - - /* Active load balancing */ - unsigned int alb_count; - unsigned int alb_failed; - unsigned int alb_pushed; - - /* SD_BALANCE_EXEC stats */ - unsigned int sbe_count; - unsigned int sbe_balanced; - unsigned int sbe_pushed; - - /* SD_BALANCE_FORK stats */ - unsigned int sbf_count; - unsigned int sbf_balanced; - unsigned int sbf_pushed; - - /* try_to_wake_up() stats */ - unsigned int ttwu_wake_remote; - unsigned int ttwu_move_affine; - unsigned int ttwu_move_balance; -#endif -#ifdef CONFIG_SCHED_DEBUG - char *name; -#endif - union { - void *private; /* used during construction */ - struct rcu_head rcu; /* used during destruction */ - }; - struct sched_domain_shared *shared; - - unsigned int span_weight; - /* - * Span of all CPUs in this domain. - * - * NOTE: this field is variable length. (Allocated dynamically - * by attaching extra space to the end of the structure, - * depending on how many CPUs the kernel has booted up with) - */ - unsigned long span[0]; -}; - -static inline struct cpumask *sched_domain_span(struct sched_domain *sd) -{ - return to_cpumask(sd->span); -} - -extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new); - -/* Allocate an array of sched domains, for partition_sched_domains(). */ -cpumask_var_t *alloc_sched_domains(unsigned int ndoms); -void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); - -bool cpus_share_cache(int this_cpu, int that_cpu); - -typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); -typedef int (*sched_domain_flags_f)(void); - -#define SDTL_OVERLAP 0x01 - -struct sd_data { - struct sched_domain **__percpu sd; - struct sched_domain_shared **__percpu sds; - struct sched_group **__percpu sg; - struct sched_group_capacity **__percpu sgc; -}; - -struct sched_domain_topology_level { - sched_domain_mask_f mask; - sched_domain_flags_f sd_flags; - int flags; - int numa_level; - struct sd_data data; -#ifdef CONFIG_SCHED_DEBUG - char *name; -#endif -}; - -extern void set_sched_topology(struct sched_domain_topology_level *tl); -extern void wake_up_if_idle(int cpu); - -#ifdef CONFIG_SCHED_DEBUG -# define SD_INIT_NAME(type) .name = #type -#else -# define SD_INIT_NAME(type) -#endif - -#else /* CONFIG_SMP */ - -struct sched_domain_attr; - -static inline void -partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new) -{ -} - -static inline bool cpus_share_cache(int this_cpu, int that_cpu) -{ - return true; -} - -#endif /* !CONFIG_SMP */ - - -struct io_context; /* See blkdev.h */ - - -#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK -extern void prefetch_stack(struct task_struct *t); -#else -static inline void prefetch_stack(struct task_struct *t) { } -#endif - -struct audit_context; /* See audit.c */ -struct mempolicy; -struct pipe_inode_info; -struct uts_namespace; +# define SCHED_FIXEDPOINT_SHIFT 10 +# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) struct load_weight { - unsigned long weight; - u32 inv_weight; + unsigned long weight; + u32 inv_weight; }; /* @@ -1300,71 +312,73 @@ struct load_weight { * issues. */ struct sched_avg { - u64 last_update_time, load_sum; - u32 util_sum, period_contrib; - unsigned long load_avg, util_avg; + u64 last_update_time; + u64 load_sum; + u32 util_sum; + u32 period_contrib; + unsigned long load_avg; + unsigned long util_avg; }; -#ifdef CONFIG_SCHEDSTATS struct sched_statistics { - u64 wait_start; - u64 wait_max; - u64 wait_count; - u64 wait_sum; - u64 iowait_count; - u64 iowait_sum; - - u64 sleep_start; - u64 sleep_max; - s64 sum_sleep_runtime; - - u64 block_start; - u64 block_max; - u64 exec_max; - u64 slice_max; - - u64 nr_migrations_cold; - u64 nr_failed_migrations_affine; - u64 nr_failed_migrations_running; - u64 nr_failed_migrations_hot; - u64 nr_forced_migrations; - - u64 nr_wakeups; - u64 nr_wakeups_sync; - u64 nr_wakeups_migrate; - u64 nr_wakeups_local; - u64 nr_wakeups_remote; - u64 nr_wakeups_affine; - u64 nr_wakeups_affine_attempts; - u64 nr_wakeups_passive; - u64 nr_wakeups_idle; -}; +#ifdef CONFIG_SCHEDSTATS + u64 wait_start; + u64 wait_max; + u64 wait_count; + u64 wait_sum; + u64 iowait_count; + u64 iowait_sum; + + u64 sleep_start; + u64 sleep_max; + s64 sum_sleep_runtime; + + u64 block_start; + u64 block_max; + u64 exec_max; + u64 slice_max; + + u64 nr_migrations_cold; + u64 nr_failed_migrations_affine; + u64 nr_failed_migrations_running; + u64 nr_failed_migrations_hot; + u64 nr_forced_migrations; + + u64 nr_wakeups; + u64 nr_wakeups_sync; + u64 nr_wakeups_migrate; + u64 nr_wakeups_local; + u64 nr_wakeups_remote; + u64 nr_wakeups_affine; + u64 nr_wakeups_affine_attempts; + u64 nr_wakeups_passive; + u64 nr_wakeups_idle; #endif +}; struct sched_entity { - struct load_weight load; /* for load-balancing */ - struct rb_node run_node; - struct list_head group_node; - unsigned int on_rq; + /* For load-balancing: */ + struct load_weight load; + struct rb_node run_node; + struct list_head group_node; + unsigned int on_rq; - u64 exec_start; - u64 sum_exec_runtime; - u64 vruntime; - u64 prev_sum_exec_runtime; + u64 exec_start; + u64 sum_exec_runtime; + u64 vruntime; + u64 prev_sum_exec_runtime; - u64 nr_migrations; + u64 nr_migrations; -#ifdef CONFIG_SCHEDSTATS - struct sched_statistics statistics; -#endif + struct sched_statistics statistics; #ifdef CONFIG_FAIR_GROUP_SCHED - int depth; - struct sched_entity *parent; + int depth; + struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ - struct cfs_rq *cfs_rq; + struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ - struct cfs_rq *my_q; + struct cfs_rq *my_q; #endif #ifdef CONFIG_SMP @@ -1374,49 +388,49 @@ struct sched_entity { * Put into separate cache line so it does not * collide with read-mostly values above. */ - struct sched_avg avg ____cacheline_aligned_in_smp; + struct sched_avg avg ____cacheline_aligned_in_smp; #endif }; struct sched_rt_entity { - struct list_head run_list; - unsigned long timeout; - unsigned long watchdog_stamp; - unsigned int time_slice; - unsigned short on_rq; - unsigned short on_list; - - struct sched_rt_entity *back; + struct list_head run_list; + unsigned long timeout; + unsigned long watchdog_stamp; + unsigned int time_slice; + unsigned short on_rq; + unsigned short on_list; + + struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED - struct sched_rt_entity *parent; + struct sched_rt_entity *parent; /* rq on which this entity is (to be) queued: */ - struct rt_rq *rt_rq; + struct rt_rq *rt_rq; /* rq "owned" by this entity/group: */ - struct rt_rq *my_q; + struct rt_rq *my_q; #endif }; struct sched_dl_entity { - struct rb_node rb_node; + struct rb_node rb_node; /* * Original scheduling parameters. Copied here from sched_attr * during sched_setattr(), they will remain the same until * the next sched_setattr(). */ - u64 dl_runtime; /* maximum runtime for each instance */ - u64 dl_deadline; /* relative deadline of each instance */ - u64 dl_period; /* separation of two instances (period) */ - u64 dl_bw; /* dl_runtime / dl_deadline */ + u64 dl_runtime; /* Maximum runtime for each instance */ + u64 dl_deadline; /* Relative deadline of each instance */ + u64 dl_period; /* Separation of two instances (period) */ + u64 dl_bw; /* dl_runtime / dl_deadline */ /* * Actual scheduling parameters. Initialized with the values above, * they are continously updated during task execution. Note that * the remaining runtime could be < 0 in case we are in overrun. */ - s64 runtime; /* remaining runtime for this instance */ - u64 deadline; /* absolute deadline for this instance */ - unsigned int flags; /* specifying the scheduler behaviour */ + s64 runtime; /* Remaining runtime for this instance */ + u64 deadline; /* Absolute deadline for this instance */ + unsigned int flags; /* Specifying the scheduler behaviour */ /* * Some bool flags: @@ -1429,28 +443,31 @@ struct sched_dl_entity { * outside bandwidth enforcement mechanism (but only until we * exit the critical section); * - * @dl_yielded tells if task gave up the cpu before consuming + * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. */ - int dl_throttled, dl_boosted, dl_yielded; + int dl_throttled; + int dl_boosted; + int dl_yielded; /* * Bandwidth enforcement timer. Each -deadline task has its * own bandwidth to be enforced, thus we need one timer per task. */ - struct hrtimer dl_timer; + struct hrtimer dl_timer; }; union rcu_special { struct { - u8 blocked; - u8 need_qs; - u8 exp_need_qs; - u8 pad; /* Otherwise the compiler can store garbage here. */ + u8 blocked; + u8 need_qs; + u8 exp_need_qs; + + /* Otherwise the compiler can store garbage here: */ + u8 pad; } b; /* Bits. */ u32 s; /* Set of bits. */ }; -struct rcu_node; enum perf_event_task_context { perf_invalid_context = -1, @@ -1459,23 +476,8 @@ enum perf_event_task_context { perf_nr_task_contexts, }; -/* Track pages that require TLB flushes */ -struct tlbflush_unmap_batch { - /* - * Each bit set is a CPU that potentially has a TLB entry for one of - * the PFNs being flushed. See set_tlb_ubc_flush_pending(). - */ - struct cpumask cpumask; - - /* True if any bit in cpumask is set */ - bool flush_required; - - /* - * If true then the PTE was dirty when unmapped. The entry must be - * flushed before IO is initiated or a stale TLB entry potentially - * allows an update without redirtying the page. - */ - bool writable; +struct wake_q_node { + struct wake_q_node *next; }; struct task_struct { @@ -1484,362 +486,417 @@ struct task_struct { * For reasons of header soup (see current_thread_info()), this * must be the first element of task_struct. */ - struct thread_info thread_info; + struct thread_info thread_info; #endif - volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - void *stack; - atomic_t usage; - unsigned int flags; /* per process flags, defined below */ - unsigned int ptrace; + /* -1 unrunnable, 0 runnable, >0 stopped: */ + volatile long state; + void *stack; + atomic_t usage; + /* Per task flags (PF_*), defined further below: */ + unsigned int flags; + unsigned int ptrace; #ifdef CONFIG_SMP - struct llist_node wake_entry; - int on_cpu; + struct llist_node wake_entry; + int on_cpu; #ifdef CONFIG_THREAD_INFO_IN_TASK - unsigned int cpu; /* current CPU */ + /* Current CPU: */ + unsigned int cpu; #endif - unsigned int wakee_flips; - unsigned long wakee_flip_decay_ts; - struct task_struct *last_wakee; + unsigned int wakee_flips; + unsigned long wakee_flip_decay_ts; + struct task_struct *last_wakee; - int wake_cpu; + int wake_cpu; #endif - int on_rq; + int on_rq; - int prio, static_prio, normal_prio; - unsigned int rt_priority; - const struct sched_class *sched_class; - struct sched_entity se; - struct sched_rt_entity rt; + int prio; + int static_prio; + int normal_prio; + unsigned int rt_priority; + + const struct sched_class *sched_class; + struct sched_entity se; + struct sched_rt_entity rt; #ifdef CONFIG_CGROUP_SCHED - struct task_group *sched_task_group; + struct task_group *sched_task_group; #endif - struct sched_dl_entity dl; + struct sched_dl_entity dl; #ifdef CONFIG_PREEMPT_NOTIFIERS - /* list of struct preempt_notifier: */ - struct hlist_head preempt_notifiers; + /* List of struct preempt_notifier: */ + struct hlist_head preempt_notifiers; #endif #ifdef CONFIG_BLK_DEV_IO_TRACE - unsigned int btrace_seq; + unsigned int btrace_seq; #endif - unsigned int policy; - int nr_cpus_allowed; - cpumask_t cpus_allowed; + unsigned int policy; + int nr_cpus_allowed; + cpumask_t cpus_allowed; #ifdef CONFIG_PREEMPT_RCU - int rcu_read_lock_nesting; - union rcu_special rcu_read_unlock_special; - struct list_head rcu_node_entry; - struct rcu_node *rcu_blocked_node; + int rcu_read_lock_nesting; + union rcu_special rcu_read_unlock_special; + struct list_head rcu_node_entry; + struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_PREEMPT_RCU */ + #ifdef CONFIG_TASKS_RCU - unsigned long rcu_tasks_nvcsw; - bool rcu_tasks_holdout; - struct list_head rcu_tasks_holdout_list; - int rcu_tasks_idle_cpu; + unsigned long rcu_tasks_nvcsw; + bool rcu_tasks_holdout; + struct list_head rcu_tasks_holdout_list; + int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ -#ifdef CONFIG_SCHED_INFO - struct sched_info sched_info; -#endif + struct sched_info sched_info; - struct list_head tasks; + struct list_head tasks; #ifdef CONFIG_SMP - struct plist_node pushable_tasks; - struct rb_node pushable_dl_tasks; -#endif - - struct mm_struct *mm, *active_mm; - /* per-thread vma caching */ - u32 vmacache_seqnum; - struct vm_area_struct *vmacache[VMACACHE_SIZE]; -#if defined(SPLIT_RSS_COUNTING) - struct task_rss_stat rss_stat; -#endif -/* task state */ - int exit_state; - int exit_code, exit_signal; - int pdeath_signal; /* The signal sent when the parent dies */ - unsigned long jobctl; /* JOBCTL_*, siglock protected */ - - /* Used for emulating ABI behavior of previous Linux versions */ - unsigned int personality; - - /* scheduler bits, serialized by scheduler locks */ - unsigned sched_reset_on_fork:1; - unsigned sched_contributes_to_load:1; - unsigned sched_migrated:1; - unsigned sched_remote_wakeup:1; - unsigned :0; /* force alignment to the next boundary */ - - /* unserialized, strictly 'current' */ - unsigned in_execve:1; /* bit to tell LSMs we're in execve */ - unsigned in_iowait:1; -#if !defined(TIF_RESTORE_SIGMASK) - unsigned restore_sigmask:1; + struct plist_node pushable_tasks; + struct rb_node pushable_dl_tasks; +#endif + + struct mm_struct *mm; + struct mm_struct *active_mm; + + /* Per-thread vma caching: */ + struct vmacache vmacache; + +#ifdef SPLIT_RSS_COUNTING + struct task_rss_stat rss_stat; +#endif + int exit_state; + int exit_code; + int exit_signal; + /* The signal sent when the parent dies: */ + int pdeath_signal; + /* JOBCTL_*, siglock protected: */ + unsigned long jobctl; + + /* Used for emulating ABI behavior of previous Linux versions: */ + unsigned int personality; + + /* Scheduler bits, serialized by scheduler locks: */ + unsigned sched_reset_on_fork:1; + unsigned sched_contributes_to_load:1; + unsigned sched_migrated:1; + unsigned sched_remote_wakeup:1; + /* Force alignment to the next boundary: */ + unsigned :0; + + /* Unserialized, strictly 'current' */ + + /* Bit to tell LSMs we're in execve(): */ + unsigned in_execve:1; + unsigned in_iowait:1; +#ifndef TIF_RESTORE_SIGMASK + unsigned restore_sigmask:1; #endif #ifdef CONFIG_MEMCG - unsigned memcg_may_oom:1; + unsigned memcg_may_oom:1; #ifndef CONFIG_SLOB - unsigned memcg_kmem_skip_account:1; + unsigned memcg_kmem_skip_account:1; #endif #endif #ifdef CONFIG_COMPAT_BRK - unsigned brk_randomized:1; + unsigned brk_randomized:1; #endif - unsigned long atomic_flags; /* Flags needing atomic access. */ + unsigned long atomic_flags; /* Flags requiring atomic access. */ - struct restart_block restart_block; + struct restart_block restart_block; - pid_t pid; - pid_t tgid; + pid_t pid; + pid_t tgid; #ifdef CONFIG_CC_STACKPROTECTOR - /* Canary value for the -fstack-protector gcc feature */ - unsigned long stack_canary; + /* Canary value for the -fstack-protector GCC feature: */ + unsigned long stack_canary; #endif /* - * pointers to (original) parent process, youngest child, younger sibling, + * Pointers to the (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ - struct task_struct __rcu *real_parent; /* real parent process */ - struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ + + /* Real parent process: */ + struct task_struct __rcu *real_parent; + + /* Recipient of SIGCHLD, wait4() reports: */ + struct task_struct __rcu *parent; + /* - * children/sibling forms the list of my natural children + * Children/sibling form the list of natural children: */ - struct list_head children; /* list of my children */ - struct list_head sibling; /* linkage in my parent's children list */ - struct task_struct *group_leader; /* threadgroup leader */ + struct list_head children; + struct list_head sibling; + struct task_struct *group_leader; /* - * ptraced is the list of tasks this task is using ptrace on. + * 'ptraced' is the list of tasks this task is using ptrace() on. + * * This includes both natural children and PTRACE_ATTACH targets. - * p->ptrace_entry is p's link on the p->parent->ptraced list. + * 'ptrace_entry' is this task's link on the p->parent->ptraced list. */ - struct list_head ptraced; - struct list_head ptrace_entry; + struct list_head ptraced; + struct list_head ptrace_entry; /* PID/PID hash table linkage. */ - struct pid_link pids[PIDTYPE_MAX]; - struct list_head thread_group; - struct list_head thread_node; + struct pid_link pids[PIDTYPE_MAX]; + struct list_head thread_group; + struct list_head thread_node; + + struct completion *vfork_done; + + /* CLONE_CHILD_SETTID: */ + int __user *set_child_tid; - struct completion *vfork_done; /* for vfork() */ - int __user *set_child_tid; /* CLONE_CHILD_SETTID */ - int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ + /* CLONE_CHILD_CLEARTID: */ + int __user *clear_child_tid; - u64 utime, stime; + u64 utime; + u64 stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME - u64 utimescaled, stimescaled; + u64 utimescaled; + u64 stimescaled; #endif - u64 gtime; - struct prev_cputime prev_cputime; + u64 gtime; + struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - seqcount_t vtime_seqcount; - unsigned long long vtime_snap; + seqcount_t vtime_seqcount; + unsigned long long vtime_snap; enum { - /* Task is sleeping or running in a CPU with VTIME inactive */ + /* Task is sleeping or running in a CPU with VTIME inactive: */ VTIME_INACTIVE = 0, - /* Task runs in userspace in a CPU with VTIME active */ + /* Task runs in userspace in a CPU with VTIME active: */ VTIME_USER, - /* Task runs in kernelspace in a CPU with VTIME active */ + /* Task runs in kernelspace in a CPU with VTIME active: */ VTIME_SYS, } vtime_snap_whence; #endif #ifdef CONFIG_NO_HZ_FULL - atomic_t tick_dep_mask; + atomic_t tick_dep_mask; #endif - unsigned long nvcsw, nivcsw; /* context switch counts */ - u64 start_time; /* monotonic time in nsec */ - u64 real_start_time; /* boot based time in nsec */ -/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ - unsigned long min_flt, maj_flt; + /* Context switch counts: */ + unsigned long nvcsw; + unsigned long nivcsw; + + /* Monotonic time in nsecs: */ + u64 start_time; + + /* Boot based time in nsecs: */ + u64 real_start_time; + + /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */ + unsigned long min_flt; + unsigned long maj_flt; #ifdef CONFIG_POSIX_TIMERS - struct task_cputime cputime_expires; - struct list_head cpu_timers[3]; -#endif - -/* process credentials */ - const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ - const struct cred __rcu *real_cred; /* objective and real subjective task - * credentials (COW) */ - const struct cred __rcu *cred; /* effective (overridable) subjective task - * credentials (COW) */ - char comm[TASK_COMM_LEN]; /* executable name excluding path - - access with [gs]et_task_comm (which lock - it with task_lock()) - - initialized normally by setup_new_exec */ -/* file system info */ - struct nameidata *nameidata; + struct task_cputime cputime_expires; + struct list_head cpu_timers[3]; +#endif + + /* Process credentials: */ + + /* Tracer's credentials at attach: */ + const struct cred __rcu *ptracer_cred; + + /* Objective and real subjective task credentials (COW): */ + const struct cred __rcu *real_cred; + + /* Effective (overridable) subjective task credentials (COW): */ + const struct cred __rcu *cred; + + /* + * executable name, excluding path. + * + * - normally initialized setup_new_exec() + * - access it with [gs]et_task_comm() + * - lock it with task_lock() + */ + char comm[TASK_COMM_LEN]; + + struct nameidata *nameidata; + #ifdef CONFIG_SYSVIPC -/* ipc stuff */ - struct sysv_sem sysvsem; - struct sysv_shm sysvshm; + struct sysv_sem sysvsem; + struct sysv_shm sysvshm; #endif #ifdef CONFIG_DETECT_HUNG_TASK -/* hung task detection */ - unsigned long last_switch_count; -#endif -/* filesystem information */ - struct fs_struct *fs; -/* open file information */ - struct files_struct *files; -/* namespaces */ - struct nsproxy *nsproxy; -/* signal handlers */ - struct signal_struct *signal; - struct sighand_struct *sighand; - - sigset_t blocked, real_blocked; - sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ - struct sigpending pending; - - unsigned long sas_ss_sp; - size_t sas_ss_size; - unsigned sas_ss_flags; - - struct callback_head *task_works; - - struct audit_context *audit_context; + unsigned long last_switch_count; +#endif + /* Filesystem information: */ + struct fs_struct *fs; + + /* Open file information: */ + struct files_struct *files; + + /* Namespaces: */ + struct nsproxy *nsproxy; + + /* Signal handlers: */ + struct signal_struct *signal; + struct sighand_struct *sighand; + sigset_t blocked; + sigset_t real_blocked; + /* Restored if set_restore_sigmask() was used: */ + sigset_t saved_sigmask; + struct sigpending pending; + unsigned long sas_ss_sp; + size_t sas_ss_size; + unsigned int sas_ss_flags; + + struct callback_head *task_works; + + struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL - kuid_t loginuid; - unsigned int sessionid; + kuid_t loginuid; + unsigned int sessionid; #endif - struct seccomp seccomp; + struct seccomp seccomp; -/* Thread group tracking */ - u32 parent_exec_id; - u32 self_exec_id; -/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, - * mempolicy */ - spinlock_t alloc_lock; + /* Thread group tracking: */ + u32 parent_exec_id; + u32 self_exec_id; + + /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ + spinlock_t alloc_lock; /* Protection of the PI data structures: */ - raw_spinlock_t pi_lock; + raw_spinlock_t pi_lock; - struct wake_q_node wake_q; + struct wake_q_node wake_q; #ifdef CONFIG_RT_MUTEXES - /* PI waiters blocked on a rt_mutex held by this task */ - struct rb_root pi_waiters; - struct rb_node *pi_waiters_leftmost; - /* Deadlock detection and priority inheritance handling */ - struct rt_mutex_waiter *pi_blocked_on; + /* PI waiters blocked on a rt_mutex held by this task: */ + struct rb_root pi_waiters; + struct rb_node *pi_waiters_leftmost; + /* Deadlock detection and priority inheritance handling: */ + struct rt_mutex_waiter *pi_blocked_on; #endif #ifdef CONFIG_DEBUG_MUTEXES - /* mutex deadlock detection */ - struct mutex_waiter *blocked_on; + /* Mutex deadlock detection: */ + struct mutex_waiter *blocked_on; #endif + #ifdef CONFIG_TRACE_IRQFLAGS - unsigned int irq_events; - unsigned long hardirq_enable_ip; - unsigned long hardirq_disable_ip; - unsigned int hardirq_enable_event; - unsigned int hardirq_disable_event; - int hardirqs_enabled; - int hardirq_context; - unsigned long softirq_disable_ip; - unsigned long softirq_enable_ip; - unsigned int softirq_disable_event; - unsigned int softirq_enable_event; - int softirqs_enabled; - int softirq_context; + unsigned int irq_events; + unsigned long hardirq_enable_ip; + unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; + unsigned int hardirq_disable_event; + int hardirqs_enabled; + int hardirq_context; + unsigned long softirq_disable_ip; + unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; + unsigned int softirq_enable_event; + int softirqs_enabled; + int softirq_context; #endif + #ifdef CONFIG_LOCKDEP -# define MAX_LOCK_DEPTH 48UL - u64 curr_chain_key; - int lockdep_depth; - unsigned int lockdep_recursion; - struct held_lock held_locks[MAX_LOCK_DEPTH]; - gfp_t lockdep_reclaim_gfp; +# define MAX_LOCK_DEPTH 48UL + u64 curr_chain_key; + int lockdep_depth; + unsigned int lockdep_recursion; + struct held_lock held_locks[MAX_LOCK_DEPTH]; + gfp_t lockdep_reclaim_gfp; #endif + #ifdef CONFIG_UBSAN - unsigned int in_ubsan; + unsigned int in_ubsan; #endif -/* journalling filesystem info */ - void *journal_info; + /* Journalling filesystem info: */ + void *journal_info; -/* stacked block device info */ - struct bio_list *bio_list; + /* Stacked block device info: */ + struct bio_list *bio_list; #ifdef CONFIG_BLOCK -/* stack plugging */ - struct blk_plug *plug; + /* Stack plugging: */ + struct blk_plug *plug; #endif -/* VM state */ - struct reclaim_state *reclaim_state; + /* VM state: */ + struct reclaim_state *reclaim_state; - struct backing_dev_info *backing_dev_info; + struct backing_dev_info *backing_dev_info; - struct io_context *io_context; + struct io_context *io_context; - unsigned long ptrace_message; - siginfo_t *last_siginfo; /* For ptrace use. */ - struct task_io_accounting ioac; -#if defined(CONFIG_TASK_XACCT) - u64 acct_rss_mem1; /* accumulated rss usage */ - u64 acct_vm_mem1; /* accumulated virtual memory usage */ - u64 acct_timexpd; /* stime + utime since last update */ + /* Ptrace state: */ + unsigned long ptrace_message; + siginfo_t *last_siginfo; + + struct task_io_accounting ioac; +#ifdef CONFIG_TASK_XACCT + /* Accumulated RSS usage: */ + u64 acct_rss_mem1; + /* Accumulated virtual memory usage: */ + u64 acct_vm_mem1; + /* stime + utime since last update: */ + u64 acct_timexpd; #endif #ifdef CONFIG_CPUSETS - nodemask_t mems_allowed; /* Protected by alloc_lock */ - seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ - int cpuset_mem_spread_rotor; - int cpuset_slab_spread_rotor; + /* Protected by ->alloc_lock: */ + nodemask_t mems_allowed; + /* Seqence number to catch updates: */ + seqcount_t mems_allowed_seq; + int cpuset_mem_spread_rotor; + int cpuset_slab_spread_rotor; #endif #ifdef CONFIG_CGROUPS - /* Control Group info protected by css_set_lock */ - struct css_set __rcu *cgroups; - /* cg_list protected by css_set_lock and tsk->alloc_lock */ - struct list_head cg_list; + /* Control Group info protected by css_set_lock: */ + struct css_set __rcu *cgroups; + /* cg_list protected by css_set_lock and tsk->alloc_lock: */ + struct list_head cg_list; #endif #ifdef CONFIG_INTEL_RDT_A - int closid; + int closid; #endif #ifdef CONFIG_FUTEX - struct robust_list_head __user *robust_list; + struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT struct compat_robust_list_head __user *compat_robust_list; #endif - struct list_head pi_state_list; - struct futex_pi_state *pi_state_cache; + struct list_head pi_state_list; + struct futex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; - struct mutex perf_event_mutex; - struct list_head perf_event_list; + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; + struct mutex perf_event_mutex; + struct list_head perf_event_list; #endif #ifdef CONFIG_DEBUG_PREEMPT - unsigned long preempt_disable_ip; + unsigned long preempt_disable_ip; #endif #ifdef CONFIG_NUMA - struct mempolicy *mempolicy; /* Protected by alloc_lock */ - short il_next; - short pref_node_fork; + /* Protected by alloc_lock: */ + struct mempolicy *mempolicy; + short il_next; + short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING - int numa_scan_seq; - unsigned int numa_scan_period; - unsigned int numa_scan_period_max; - int numa_preferred_nid; - unsigned long numa_migrate_retry; - u64 node_stamp; /* migration stamp */ - u64 last_task_numa_placement; - u64 last_sum_exec_runtime; - struct callback_head numa_work; - - struct list_head numa_entry; - struct numa_group *numa_group; + int numa_scan_seq; + unsigned int numa_scan_period; + unsigned int numa_scan_period_max; + int numa_preferred_nid; + unsigned long numa_migrate_retry; + /* Migration stamp: */ + u64 node_stamp; + u64 last_task_numa_placement; + u64 last_sum_exec_runtime; + struct callback_head numa_work; + + struct list_head numa_entry; + struct numa_group *numa_group; /* * numa_faults is an array split into four regions: @@ -1855,8 +912,8 @@ struct task_struct { * during the current scan window. When the scan completes, the counts * in faults_memory and faults_cpu decay and these values are copied. */ - unsigned long *numa_faults; - unsigned long total_numa_faults; + unsigned long *numa_faults; + unsigned long total_numa_faults; /* * numa_faults_locality tracks if faults recorded during the last @@ -1864,208 +921,133 @@ struct task_struct { * period is adapted based on the locality of the faults with different * weights depending on whether they were shared or private faults */ - unsigned long numa_faults_locality[3]; + unsigned long numa_faults_locality[3]; - unsigned long numa_pages_migrated; + unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH - struct tlbflush_unmap_batch tlb_ubc; -#endif + struct tlbflush_unmap_batch tlb_ubc; - struct rcu_head rcu; + struct rcu_head rcu; - /* - * cache last used pipe for splice - */ - struct pipe_inode_info *splice_pipe; + /* Cache last used pipe for splice(): */ + struct pipe_inode_info *splice_pipe; - struct page_frag task_frag; + struct page_frag task_frag; -#ifdef CONFIG_TASK_DELAY_ACCT - struct task_delay_info *delays; +#ifdef CONFIG_TASK_DELAY_ACCT + struct task_delay_info *delays; #endif + #ifdef CONFIG_FAULT_INJECTION - int make_it_fail; + int make_it_fail; #endif /* - * when (nr_dirtied >= nr_dirtied_pause), it's time to call - * balance_dirty_pages() for some dirty throttling pause + * When (nr_dirtied >= nr_dirtied_pause), it's time to call + * balance_dirty_pages() for a dirty throttling pause: */ - int nr_dirtied; - int nr_dirtied_pause; - unsigned long dirty_paused_when; /* start of a write-and-pause period */ + int nr_dirtied; + int nr_dirtied_pause; + /* Start of a write-and-pause period: */ + unsigned long dirty_paused_when; #ifdef CONFIG_LATENCYTOP - int latency_record_count; - struct latency_record latency_record[LT_SAVECOUNT]; + int latency_record_count; + struct latency_record latency_record[LT_SAVECOUNT]; #endif /* - * time slack values; these are used to round up poll() and + * Time slack values; these are used to round up poll() and * select() etc timeout values. These are in nanoseconds. */ - u64 timer_slack_ns; - u64 default_timer_slack_ns; + u64 timer_slack_ns; + u64 default_timer_slack_ns; #ifdef CONFIG_KASAN - unsigned int kasan_depth; + unsigned int kasan_depth; #endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* Index of current stored address in ret_stack */ - int curr_ret_stack; - /* Stack of return addresses for return function tracing */ - struct ftrace_ret_stack *ret_stack; - /* time stamp for last schedule */ - unsigned long long ftrace_timestamp; + /* Index of current stored address in ret_stack: */ + int curr_ret_stack; + + /* Stack of return addresses for return function tracing: */ + struct ftrace_ret_stack *ret_stack; + + /* Timestamp for last schedule: */ + unsigned long long ftrace_timestamp; + /* * Number of functions that haven't been traced - * because of depth overrun. + * because of depth overrun: */ - atomic_t trace_overrun; - /* Pause for the tracing */ - atomic_t tracing_graph_pause; + atomic_t trace_overrun; + + /* Pause tracing: */ + atomic_t tracing_graph_pause; #endif + #ifdef CONFIG_TRACING - /* state flags for use by tracers */ - unsigned long trace; - /* bitmask and counter of trace recursion */ - unsigned long trace_recursion; + /* State flags for use by tracers: */ + unsigned long trace; + + /* Bitmask and counter of trace recursion: */ + unsigned long trace_recursion; #endif /* CONFIG_TRACING */ + #ifdef CONFIG_KCOV - /* Coverage collection mode enabled for this task (0 if disabled). */ - enum kcov_mode kcov_mode; - /* Size of the kcov_area. */ - unsigned kcov_size; - /* Buffer for coverage collection. */ - void *kcov_area; - /* kcov desciptor wired with this task or NULL. */ - struct kcov *kcov; + /* Coverage collection mode enabled for this task (0 if disabled): */ + enum kcov_mode kcov_mode; + + /* Size of the kcov_area: */ + unsigned int kcov_size; + + /* Buffer for coverage collection: */ + void *kcov_area; + + /* KCOV descriptor wired with this task or NULL: */ + struct kcov *kcov; #endif + #ifdef CONFIG_MEMCG - struct mem_cgroup *memcg_in_oom; - gfp_t memcg_oom_gfp_mask; - int memcg_oom_order; + struct mem_cgroup *memcg_in_oom; + gfp_t memcg_oom_gfp_mask; + int memcg_oom_order; - /* number of pages to reclaim on returning to userland */ - unsigned int memcg_nr_pages_over_high; + /* Number of pages to reclaim on returning to userland: */ + unsigned int memcg_nr_pages_over_high; #endif + #ifdef CONFIG_UPROBES - struct uprobe_task *utask; + struct uprobe_task *utask; #endif #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) - unsigned int sequential_io; - unsigned int sequential_io_avg; + unsigned int sequential_io; + unsigned int sequential_io_avg; #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP - unsigned long task_state_change; + unsigned long task_state_change; #endif - int pagefault_disabled; + int pagefault_disabled; #ifdef CONFIG_MMU - struct task_struct *oom_reaper_list; + struct task_struct *oom_reaper_list; #endif #ifdef CONFIG_VMAP_STACK - struct vm_struct *stack_vm_area; + struct vm_struct *stack_vm_area; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK - /* A live task holds one reference. */ - atomic_t stack_refcount; + /* A live task holds one reference: */ + atomic_t stack_refcount; #endif -/* CPU-specific state of this task */ - struct thread_struct thread; -/* - * WARNING: on x86, 'thread_struct' contains a variable-sized - * structure. It *MUST* be at the end of 'task_struct'. - * - * Do not put anything below here! - */ -}; - -#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT -extern int arch_task_struct_size __read_mostly; -#else -# define arch_task_struct_size (sizeof(struct task_struct)) -#endif - -#ifdef CONFIG_VMAP_STACK -static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) -{ - return t->stack_vm_area; -} -#else -static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) -{ - return NULL; -} -#endif - -/* Future-safe accessor for struct task_struct's cpus_allowed. */ -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) - -static inline int tsk_nr_cpus_allowed(struct task_struct *p) -{ - return p->nr_cpus_allowed; -} - -#define TNF_MIGRATED 0x01 -#define TNF_NO_GROUP 0x02 -#define TNF_SHARED 0x04 -#define TNF_FAULT_LOCAL 0x08 -#define TNF_MIGRATE_FAIL 0x10 - -static inline bool in_vfork(struct task_struct *tsk) -{ - bool ret; + /* CPU-specific state of this task: */ + struct thread_struct thread; /* - * need RCU to access ->real_parent if CLONE_VM was used along with - * CLONE_PARENT. - * - * We check real_parent->mm == tsk->mm because CLONE_VFORK does not - * imply CLONE_VM - * - * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus - * ->real_parent is not necessarily the task doing vfork(), so in - * theory we can't rely on task_lock() if we want to dereference it. + * WARNING: on x86, 'thread_struct' contains a variable-sized + * structure. It *MUST* be at the end of 'task_struct'. * - * And in this case we can't trust the real_parent->mm == tsk->mm - * check, it can be false negative. But we do not care, if init or - * another oom-unkillable task does this it should blame itself. + * Do not put anything below here! */ - rcu_read_lock(); - ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; - rcu_read_unlock(); - - return ret; -} - -#ifdef CONFIG_NUMA_BALANCING -extern void task_numa_fault(int last_node, int node, int pages, int flags); -extern pid_t task_numa_group_id(struct task_struct *p); -extern void set_numabalancing_state(bool enabled); -extern void task_numa_free(struct task_struct *p); -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, - int src_nid, int dst_cpu); -#else -static inline void task_numa_fault(int last_node, int node, int pages, - int flags) -{ -} -static inline pid_t task_numa_group_id(struct task_struct *p) -{ - return 0; -} -static inline void set_numabalancing_state(bool enabled) -{ -} -static inline void task_numa_free(struct task_struct *p) -{ -} -static inline bool should_numa_migrate_memory(struct task_struct *p, - struct page *page, int src_nid, int dst_cpu) -{ - return true; -} -#endif +}; static inline struct pid *task_pid(struct task_struct *task) { @@ -2078,7 +1060,7 @@ static inline struct pid *task_tgid(struct task_struct *task) } /* - * Without tasklist or rcu lock it is not safe to dereference + * Without tasklist or RCU lock it is not safe to dereference * the result of task_pgrp/task_session even if task == current, * we can race with another thread doing sys_setsid/sys_setpgid. */ @@ -2092,8 +1074,6 @@ static inline struct pid *task_session(struct task_struct *task) return task->group_leader->pids[PIDTYPE_SID].pid; } -struct pid_namespace; - /* * the helpers to get the task's different pids as they are seen * from various namespaces @@ -2107,16 +1087,14 @@ struct pid_namespace; * * see also pid_nr() etc in include/linux/pid.h */ -pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, - struct pid_namespace *ns); +pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); static inline pid_t task_pid_nr(struct task_struct *tsk) { return tsk->pid; } -static inline pid_t task_pid_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) +static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); } @@ -2132,15 +1110,28 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk) return tsk->tgid; } -pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); +extern pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); static inline pid_t task_tgid_vnr(struct task_struct *tsk) { return pid_vnr(task_tgid(tsk)); } +/** + * pid_alive - check that a task structure is not stale + * @p: Task structure to be checked. + * + * Test if a process is not yet dead (at most zombie state) + * If pid_alive fails, then pointers within the task structure + * can be stale and must not be dereferenced. + * + * Return: 1 if the process is alive. 0 otherwise. + */ +static inline int pid_alive(const struct task_struct *p) +{ + return p->pids[PIDTYPE_PID].pid != NULL; +} -static inline int pid_alive(const struct task_struct *p); static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) { pid_t pid = 0; @@ -2158,8 +1149,7 @@ static inline pid_t task_ppid_nr(const struct task_struct *tsk) return task_ppid_nr_ns(tsk, &init_pid_ns); } -static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) +static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); } @@ -2170,8 +1160,7 @@ static inline pid_t task_pgrp_vnr(struct task_struct *tsk) } -static inline pid_t task_session_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) +static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); } @@ -2181,28 +1170,13 @@ static inline pid_t task_session_vnr(struct task_struct *tsk) return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } -/* obsolete, do not use */ +/* Obsolete, do not use: */ static inline pid_t task_pgrp_nr(struct task_struct *tsk) { return task_pgrp_nr_ns(tsk, &init_pid_ns); } /** - * pid_alive - check that a task structure is not stale - * @p: Task structure to be checked. - * - * Test if a process is not yet dead (at most zombie state) - * If pid_alive fails, then pointers within the task structure - * can be stale and must not be dereferenced. - * - * Return: 1 if the process is alive. 0 otherwise. - */ -static inline int pid_alive(const struct task_struct *p) -{ - return p->pids[PIDTYPE_PID].pid != NULL; -} - -/** * is_global_init - check if a task structure is init. Since init * is free to have sub-threads we need to check tgid. * @tsk: Task structure to be checked. @@ -2218,89 +1192,37 @@ static inline int is_global_init(struct task_struct *tsk) extern struct pid *cad_pid; -extern void free_task(struct task_struct *tsk); -#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) - -extern void __put_task_struct(struct task_struct *t); - -static inline void put_task_struct(struct task_struct *t) -{ - if (atomic_dec_and_test(&t->usage)) - __put_task_struct(t); -} - -struct task_struct *task_rcu_dereference(struct task_struct **ptask); -struct task_struct *try_get_task_struct(struct task_struct **ptask); - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void task_cputime(struct task_struct *t, - u64 *utime, u64 *stime); -extern u64 task_gtime(struct task_struct *t); -#else -static inline void task_cputime(struct task_struct *t, - u64 *utime, u64 *stime) -{ - *utime = t->utime; - *stime = t->stime; -} - -static inline u64 task_gtime(struct task_struct *t) -{ - return t->gtime; -} -#endif - -#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME -static inline void task_cputime_scaled(struct task_struct *t, - u64 *utimescaled, - u64 *stimescaled) -{ - *utimescaled = t->utimescaled; - *stimescaled = t->stimescaled; -} -#else -static inline void task_cputime_scaled(struct task_struct *t, - u64 *utimescaled, - u64 *stimescaled) -{ - task_cputime(t, utimescaled, stimescaled); -} -#endif - -extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); -extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); - /* * Per process flags */ -#define PF_IDLE 0x00000002 /* I am an IDLE thread */ -#define PF_EXITING 0x00000004 /* getting shut down */ -#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ -#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ -#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ -#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ -#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ -#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ -#define PF_DUMPCORE 0x00000200 /* dumped core */ -#define PF_SIGNALED 0x00000400 /* killed by a signal */ -#define PF_MEMALLOC 0x00000800 /* Allocating memory */ -#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ -#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */ -#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ -#define PF_FROZEN 0x00010000 /* frozen for system suspend */ -#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ -#define PF_KSWAPD 0x00040000 /* I am kswapd */ -#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ -#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ -#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ -#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ -#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ -#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ -#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ -#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */ +#define PF_IDLE 0x00000002 /* I am an IDLE thread */ +#define PF_EXITING 0x00000004 /* Getting shut down */ +#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ +#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ +#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ +#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ +#define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */ +#define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ +#define PF_DUMPCORE 0x00000200 /* Dumped core */ +#define PF_SIGNALED 0x00000400 /* Killed by a signal */ +#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ +#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ +#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ +#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ +#define PF_FROZEN 0x00010000 /* Frozen for system suspend */ +#define PF_FSTRANS 0x00020000 /* Inside a filesystem transaction */ +#define PF_KSWAPD 0x00040000 /* I am kswapd */ +#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ +#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ +#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ +#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ +#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ +#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ +#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ +#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ /* * Only the _current_ task can read/write to tsk->flags, but other @@ -2313,55 +1235,38 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *s * child is not running and in turn not changing child->flags * at the same time the parent does it. */ -#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) -#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) -#define clear_used_math() clear_stopped_child_used_math(current) -#define set_used_math() set_stopped_child_used_math(current) +#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) +#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) +#define clear_used_math() clear_stopped_child_used_math(current) +#define set_used_math() set_stopped_child_used_math(current) + #define conditional_stopped_child_used_math(condition, child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) -#define conditional_used_math(condition) \ - conditional_stopped_child_used_math(condition, current) -#define copy_to_stopped_child_used_math(child) \ - do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) -/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ -#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) -#define used_math() tsk_used_math(current) -/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags - * __GFP_FS is also cleared as it implies __GFP_IO. - */ -static inline gfp_t memalloc_noio_flags(gfp_t flags) -{ - if (unlikely(current->flags & PF_MEMALLOC_NOIO)) - flags &= ~(__GFP_IO | __GFP_FS); - return flags; -} +#define conditional_used_math(condition) conditional_stopped_child_used_math(condition, current) -static inline unsigned int memalloc_noio_save(void) -{ - unsigned int flags = current->flags & PF_MEMALLOC_NOIO; - current->flags |= PF_MEMALLOC_NOIO; - return flags; -} +#define copy_to_stopped_child_used_math(child) \ + do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) -static inline void memalloc_noio_restore(unsigned int flags) -{ - current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; -} +/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ +#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) +#define used_math() tsk_used_math(current) /* Per-process atomic flags. */ -#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ -#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ -#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ -#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ +#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ +#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ +#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ +#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ { return test_bit(PFA_##name, &p->atomic_flags); } + #define TASK_PFA_SET(name, func) \ static inline void task_set_##func(struct task_struct *p) \ { set_bit(PFA_##name, &p->atomic_flags); } + #define TASK_PFA_CLEAR(name, func) \ static inline void task_clear_##func(struct task_struct *p) \ { clear_bit(PFA_##name, &p->atomic_flags); } @@ -2380,75 +1285,23 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) TASK_PFA_TEST(LMK_WAITING, lmk_waiting) TASK_PFA_SET(LMK_WAITING, lmk_waiting) -/* - * task->jobctl flags - */ -#define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */ - -#define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */ -#define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */ -#define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */ -#define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */ -#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ -#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ -#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ - -#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) -#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) -#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) -#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT) -#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) -#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) -#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) - -#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) - -extern bool task_set_jobctl_pending(struct task_struct *task, - unsigned long mask); -extern void task_clear_jobctl_trapping(struct task_struct *task); -extern void task_clear_jobctl_pending(struct task_struct *task, - unsigned long mask); - -static inline void rcu_copy_process(struct task_struct *p) -{ -#ifdef CONFIG_PREEMPT_RCU - p->rcu_read_lock_nesting = 0; - p->rcu_read_unlock_special.s = 0; - p->rcu_blocked_node = NULL; - INIT_LIST_HEAD(&p->rcu_node_entry); -#endif /* #ifdef CONFIG_PREEMPT_RCU */ -#ifdef CONFIG_TASKS_RCU - p->rcu_tasks_holdout = false; - INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); - p->rcu_tasks_idle_cpu = -1; -#endif /* #ifdef CONFIG_TASKS_RCU */ -} - -static inline void tsk_restore_flags(struct task_struct *task, - unsigned long orig_flags, unsigned long flags) +static inline void +tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) { task->flags &= ~flags; task->flags |= orig_flags & flags; } -extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, - const struct cpumask *trial); -extern int task_can_attach(struct task_struct *p, - const struct cpumask *cs_cpus_allowed); +extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); +extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); #ifdef CONFIG_SMP -extern void do_set_cpus_allowed(struct task_struct *p, - const struct cpumask *new_mask); - -extern int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask); +extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); +extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); #else -static inline void do_set_cpus_allowed(struct task_struct *p, - const struct cpumask *new_mask) +static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { } -static inline int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask) +static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { if (!cpumask_test_cpu(0, new_mask)) return -EINVAL; @@ -2456,165 +1309,14 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif -#ifdef CONFIG_NO_HZ_COMMON -void calc_load_enter_idle(void); -void calc_load_exit_idle(void); -#else -static inline void calc_load_enter_idle(void) { } -static inline void calc_load_exit_idle(void) { } -#endif /* CONFIG_NO_HZ_COMMON */ - #ifndef cpu_relax_yield #define cpu_relax_yield() cpu_relax() #endif -/* - * Do not use outside of architecture code which knows its limitations. - * - * sched_clock() has no promise of monotonicity or bounded drift between - * CPUs, use (which you should not) requires disabling IRQs. - * - * Please use one of the three interfaces below. - */ -extern unsigned long long notrace sched_clock(void); -/* - * See the comment in kernel/sched/clock.c - */ -extern u64 running_clock(void); -extern u64 sched_clock_cpu(int cpu); - - -extern void sched_clock_init(void); - -#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -static inline void sched_clock_init_late(void) -{ -} - -static inline void sched_clock_tick(void) -{ -} - -static inline void clear_sched_clock_stable(void) -{ -} - -static inline void sched_clock_idle_sleep_event(void) -{ -} - -static inline void sched_clock_idle_wakeup_event(u64 delta_ns) -{ -} - -static inline u64 cpu_clock(int cpu) -{ - return sched_clock(); -} - -static inline u64 local_clock(void) -{ - return sched_clock(); -} -#else -extern void sched_clock_init_late(void); -/* - * Architectures can set this to 1 if they have specified - * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, - * but then during bootup it turns out that sched_clock() - * is reliable after all: - */ -extern int sched_clock_stable(void); -extern void clear_sched_clock_stable(void); - -extern void sched_clock_tick(void); -extern void sched_clock_idle_sleep_event(void); -extern void sched_clock_idle_wakeup_event(u64 delta_ns); - -/* - * As outlined in clock.c, provides a fast, high resolution, nanosecond - * time source that is monotonic per cpu argument and has bounded drift - * between cpus. - * - * ######################### BIG FAT WARNING ########################## - * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # - * # go backwards !! # - * #################################################################### - */ -static inline u64 cpu_clock(int cpu) -{ - return sched_clock_cpu(cpu); -} - -static inline u64 local_clock(void) -{ - return sched_clock_cpu(raw_smp_processor_id()); -} -#endif - -#ifdef CONFIG_IRQ_TIME_ACCOUNTING -/* - * An i/f to runtime opt-in for irq time accounting based off of sched_clock. - * The reason for this explicit opt-in is not to have perf penalty with - * slow sched_clocks. - */ -extern void enable_sched_clock_irqtime(void); -extern void disable_sched_clock_irqtime(void); -#else -static inline void enable_sched_clock_irqtime(void) {} -static inline void disable_sched_clock_irqtime(void) {} -#endif - -extern unsigned long long -task_sched_runtime(struct task_struct *task); - -/* sched_exec is called by processes performing an exec */ -#ifdef CONFIG_SMP -extern void sched_exec(void); -#else -#define sched_exec() {} -#endif - -extern void sched_clock_idle_sleep_event(void); -extern void sched_clock_idle_wakeup_event(u64 delta_ns); - -#ifdef CONFIG_HOTPLUG_CPU -extern void idle_task_exit(void); -#else -static inline void idle_task_exit(void) {} -#endif - -#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) -extern void wake_up_nohz_cpu(int cpu); -#else -static inline void wake_up_nohz_cpu(int cpu) { } -#endif - -#ifdef CONFIG_NO_HZ_FULL -extern u64 scheduler_tick_max_deferment(void); -#endif - -#ifdef CONFIG_SCHED_AUTOGROUP -extern void sched_autogroup_create_attach(struct task_struct *p); -extern void sched_autogroup_detach(struct task_struct *p); -extern void sched_autogroup_fork(struct signal_struct *sig); -extern void sched_autogroup_exit(struct signal_struct *sig); -extern void sched_autogroup_exit_task(struct task_struct *p); -#ifdef CONFIG_PROC_FS -extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); -extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); -#endif -#else -static inline void sched_autogroup_create_attach(struct task_struct *p) { } -static inline void sched_autogroup_detach(struct task_struct *p) { } -static inline void sched_autogroup_fork(struct signal_struct *sig) { } -static inline void sched_autogroup_exit(struct signal_struct *sig) { } -static inline void sched_autogroup_exit_task(struct task_struct *p) { } -#endif - extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); + /** * task_nice - return the nice value of a given task. * @p: the task in question. @@ -2625,16 +1327,15 @@ static inline int task_nice(const struct task_struct *p) { return PRIO_TO_NICE((p)->static_prio); } + extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); -extern int sched_setscheduler(struct task_struct *, int, - const struct sched_param *); -extern int sched_setscheduler_nocheck(struct task_struct *, int, - const struct sched_param *); -extern int sched_setattr(struct task_struct *, - const struct sched_attr *); +extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); +extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); +extern int sched_setattr(struct task_struct *, const struct sched_attr *); extern struct task_struct *idle_task(int cpu); + /** * is_idle_task - is the specified task an idle task? * @p: the task in question. @@ -2645,6 +1346,7 @@ static inline bool is_idle_task(const struct task_struct *p) { return !!(p->flags & PF_IDLE); } + extern struct task_struct *curr_task(int cpu); extern void ia64_set_curr_task(int cpu, struct task_struct *p); @@ -2657,23 +1359,15 @@ union thread_union { unsigned long stack[THREAD_SIZE/sizeof(long)]; }; -#ifndef __HAVE_ARCH_KSTACK_END -static inline int kstack_end(void *addr) +#ifdef CONFIG_THREAD_INFO_IN_TASK +static inline struct thread_info *task_thread_info(struct task_struct *task) { - /* Reliable end of stack detection: - * Some APM bios versions misalign the stack - */ - return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); + return &task->thread_info; } +#elif !defined(__HAVE_THREAD_FUNCTIONS) +# define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif -extern union thread_union init_thread_union; -extern struct task_struct init_task; - -extern struct mm_struct init_mm; - -extern struct pid_namespace init_pid_ns; - /* * find a task by one of its numerical ids * @@ -2686,365 +1380,25 @@ extern struct pid_namespace init_pid_ns; */ extern struct task_struct *find_task_by_vpid(pid_t nr); -extern struct task_struct *find_task_by_pid_ns(pid_t nr, - struct pid_namespace *ns); - -/* per-UID process charging. */ -extern struct user_struct * alloc_uid(kuid_t); -static inline struct user_struct *get_uid(struct user_struct *u) -{ - atomic_inc(&u->__count); - return u; -} -extern void free_uid(struct user_struct *); - -#include <asm/current.h> - -extern void xtime_update(unsigned long ticks); +extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); -#ifdef CONFIG_SMP - extern void kick_process(struct task_struct *tsk); -#else - static inline void kick_process(struct task_struct *tsk) { } -#endif -extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_dead(struct task_struct *p); - -extern void proc_caches_init(void); -extern void flush_signals(struct task_struct *); -extern void ignore_signals(struct task_struct *); -extern void flush_signal_handlers(struct task_struct *, int force_default); -extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); - -static inline int kernel_dequeue_signal(siginfo_t *info) -{ - struct task_struct *tsk = current; - siginfo_t __info; - int ret; - - spin_lock_irq(&tsk->sighand->siglock); - ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info); - spin_unlock_irq(&tsk->sighand->siglock); - - return ret; -} - -static inline void kernel_signal_stop(void) -{ - spin_lock_irq(¤t->sighand->siglock); - if (current->jobctl & JOBCTL_STOP_DEQUEUED) - __set_current_state(TASK_STOPPED); - spin_unlock_irq(¤t->sighand->siglock); - - schedule(); -} - -extern void release_task(struct task_struct * p); -extern int send_sig_info(int, struct siginfo *, struct task_struct *); -extern int force_sigsegv(int, struct task_struct *); -extern int force_sig_info(int, struct siginfo *, struct task_struct *); -extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); -extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); -extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, - const struct cred *, u32); -extern int kill_pgrp(struct pid *pid, int sig, int priv); -extern int kill_pid(struct pid *pid, int sig, int priv); -extern int kill_proc_info(int, struct siginfo *, pid_t); -extern __must_check bool do_notify_parent(struct task_struct *, int); -extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); -extern void force_sig(int, struct task_struct *); -extern int send_sig(int, struct task_struct *, int); -extern int zap_other_threads(struct task_struct *p); -extern struct sigqueue *sigqueue_alloc(void); -extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); -extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); - -#ifdef TIF_RESTORE_SIGMASK -/* - * Legacy restore_sigmask accessors. These are inefficient on - * SMP architectures because they require atomic operations. - */ - -/** - * set_restore_sigmask() - make sure saved_sigmask processing gets done - * - * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code - * will run before returning to user mode, to process the flag. For - * all callers, TIF_SIGPENDING is already set or it's no harm to set - * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the - * arch code will notice on return to user mode, in case those bits - * are scarce. We set TIF_SIGPENDING here to ensure that the arch - * signal code always gets run when TIF_RESTORE_SIGMASK is set. - */ -static inline void set_restore_sigmask(void) -{ - set_thread_flag(TIF_RESTORE_SIGMASK); - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); -} -static inline void clear_restore_sigmask(void) -{ - clear_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_restore_sigmask(void) -{ - return test_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_and_clear_restore_sigmask(void) -{ - return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); -} - -#else /* TIF_RESTORE_SIGMASK */ - -/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ -static inline void set_restore_sigmask(void) -{ - current->restore_sigmask = true; - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); -} -static inline void clear_restore_sigmask(void) -{ - current->restore_sigmask = false; -} -static inline bool test_restore_sigmask(void) -{ - return current->restore_sigmask; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - if (!current->restore_sigmask) - return false; - current->restore_sigmask = false; - return true; -} -#endif - -static inline void restore_saved_sigmask(void) -{ - if (test_and_clear_restore_sigmask()) - __set_current_blocked(¤t->saved_sigmask); -} - -static inline sigset_t *sigmask_to_save(void) -{ - sigset_t *res = ¤t->blocked; - if (unlikely(test_restore_sigmask())) - res = ¤t->saved_sigmask; - return res; -} - -static inline int kill_cad_pid(int sig, int priv) -{ - return kill_pid(cad_pid, sig, priv); -} - -/* These can be the second arg to send_sig_info/send_group_sig_info. */ -#define SEND_SIG_NOINFO ((struct siginfo *) 0) -#define SEND_SIG_PRIV ((struct siginfo *) 1) -#define SEND_SIG_FORCED ((struct siginfo *) 2) - -/* - * True if we are on the alternate signal stack. - */ -static inline int on_sig_stack(unsigned long sp) -{ - /* - * If the signal stack is SS_AUTODISARM then, by construction, we - * can't be on the signal stack unless user code deliberately set - * SS_AUTODISARM when we were already on it. - * - * This improves reliability: if user state gets corrupted such that - * the stack pointer points very close to the end of the signal stack, - * then this check will enable the signal to be handled anyway. - */ - if (current->sas_ss_flags & SS_AUTODISARM) - return 0; - -#ifdef CONFIG_STACK_GROWSUP - return sp >= current->sas_ss_sp && - sp - current->sas_ss_sp < current->sas_ss_size; -#else - return sp > current->sas_ss_sp && - sp - current->sas_ss_sp <= current->sas_ss_size; -#endif -} - -static inline int sas_ss_flags(unsigned long sp) -{ - if (!current->sas_ss_size) - return SS_DISABLE; - - return on_sig_stack(sp) ? SS_ONSTACK : 0; -} - -static inline void sas_ss_reset(struct task_struct *p) -{ - p->sas_ss_sp = 0; - p->sas_ss_size = 0; - p->sas_ss_flags = SS_DISABLE; -} - -static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) -{ - if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) -#ifdef CONFIG_STACK_GROWSUP - return current->sas_ss_sp; -#else - return current->sas_ss_sp + current->sas_ss_size; -#endif - return sp; -} - -/* - * Routines for handling mm_structs - */ -extern struct mm_struct * mm_alloc(void); - -/** - * mmgrab() - Pin a &struct mm_struct. - * @mm: The &struct mm_struct to pin. - * - * Make sure that @mm will not get freed even after the owning task - * exits. This doesn't guarantee that the associated address space - * will still exist later on and mmget_not_zero() has to be used before - * accessing it. - * - * This is a preferred way to to pin @mm for a longer/unbounded amount - * of time. - * - * Use mmdrop() to release the reference acquired by mmgrab(). - * - * See also <Documentation/vm/active_mm.txt> for an in-depth explanation - * of &mm_struct.mm_count vs &mm_struct.mm_users. - */ -static inline void mmgrab(struct mm_struct *mm) -{ - atomic_inc(&mm->mm_count); -} - -/* mmdrop drops the mm and the page tables */ -extern void __mmdrop(struct mm_struct *); -static inline void mmdrop(struct mm_struct *mm) -{ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) - __mmdrop(mm); -} - -static inline void mmdrop_async_fn(struct work_struct *work) -{ - struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); - __mmdrop(mm); -} - -static inline void mmdrop_async(struct mm_struct *mm) -{ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) { - INIT_WORK(&mm->async_put_work, mmdrop_async_fn); - schedule_work(&mm->async_put_work); - } -} - -/** - * mmget() - Pin the address space associated with a &struct mm_struct. - * @mm: The address space to pin. - * - * Make sure that the address space of the given &struct mm_struct doesn't - * go away. This does not protect against parts of the address space being - * modified or freed, however. - * - * Never use this function to pin this address space for an - * unbounded/indefinite amount of time. - * - * Use mmput() to release the reference acquired by mmget(). - * - * See also <Documentation/vm/active_mm.txt> for an in-depth explanation - * of &mm_struct.mm_count vs &mm_struct.mm_users. - */ -static inline void mmget(struct mm_struct *mm) -{ - atomic_inc(&mm->mm_users); -} - -static inline bool mmget_not_zero(struct mm_struct *mm) -{ - return atomic_inc_not_zero(&mm->mm_users); -} - -/* mmput gets rid of the mappings and all user-space */ -extern void mmput(struct mm_struct *); -#ifdef CONFIG_MMU -/* same as above but performs the slow path from the async context. Can - * be called from the atomic context as well - */ -extern void mmput_async(struct mm_struct *); -#endif - -/* Grab a reference to a task's mm, if it is not already going away */ -extern struct mm_struct *get_task_mm(struct task_struct *task); -/* - * Grab a reference to a task's mm, if it is not already going away - * and ptrace_may_access with the mode parameter passed to it - * succeeds. - */ -extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(struct task_struct *, struct mm_struct *); - -#ifdef CONFIG_HAVE_COPY_THREAD_TLS -extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, - struct task_struct *, unsigned long); -#else -extern int copy_thread(unsigned long, unsigned long, unsigned long, - struct task_struct *); - -/* Architectures that haven't opted into copy_thread_tls get the tls argument - * via pt_regs, so ignore the tls argument passed via C. */ -static inline int copy_thread_tls( - unsigned long clone_flags, unsigned long sp, unsigned long arg, - struct task_struct *p, unsigned long tls) -{ - return copy_thread(clone_flags, sp, arg, p); -} -#endif -extern void flush_thread(void); -#ifdef CONFIG_HAVE_EXIT_THREAD -extern void exit_thread(struct task_struct *tsk); +#ifdef CONFIG_SMP +extern void kick_process(struct task_struct *tsk); #else -static inline void exit_thread(struct task_struct *tsk) -{ -} +static inline void kick_process(struct task_struct *tsk) { } #endif -extern void exit_files(struct task_struct *); -extern void __cleanup_sighand(struct sighand_struct *); - -extern void exit_itimers(struct signal_struct *); -extern void flush_itimer_signals(void); - -extern void do_group_exit(int); - -extern int do_execve(struct filename *, - const char __user * const __user *, - const char __user * const __user *); -extern int do_execveat(int, struct filename *, - const char __user * const __user *, - const char __user * const __user *, - int); -extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); -extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); -struct task_struct *fork_idle(int); -extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); - extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); + static inline void set_task_comm(struct task_struct *tsk, const char *from) { __set_task_comm(tsk, from, false); } + extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP @@ -3052,263 +1406,15 @@ void scheduler_ipi(void); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else static inline void scheduler_ipi(void) { } -static inline unsigned long wait_task_inactive(struct task_struct *p, - long match_state) +static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) { return 1; } #endif -#define tasklist_empty() \ - list_empty(&init_task.tasks) - -#define next_task(p) \ - list_entry_rcu((p)->tasks.next, struct task_struct, tasks) - -#define for_each_process(p) \ - for (p = &init_task ; (p = next_task(p)) != &init_task ; ) - -extern bool current_is_single_threaded(void); - -/* - * Careful: do_each_thread/while_each_thread is a double loop so - * 'break' will not work as expected - use goto instead. - */ -#define do_each_thread(g, t) \ - for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do - -#define while_each_thread(g, t) \ - while ((t = next_thread(t)) != g) - -#define __for_each_thread(signal, t) \ - list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) - -#define for_each_thread(p, t) \ - __for_each_thread((p)->signal, t) - -/* Careful: this is a double loop, 'break' won't work as expected. */ -#define for_each_process_thread(p, t) \ - for_each_process(p) for_each_thread(p, t) - -typedef int (*proc_visitor)(struct task_struct *p, void *data); -void walk_process_tree(struct task_struct *top, proc_visitor, void *); - -static inline int get_nr_threads(struct task_struct *tsk) -{ - return tsk->signal->nr_threads; -} - -static inline bool thread_group_leader(struct task_struct *p) -{ - return p->exit_signal >= 0; -} - -/* Do to the insanities of de_thread it is possible for a process - * to have the pid of the thread group leader without actually being - * the thread group leader. For iteration through the pids in proc - * all we care about is that we have a task with the appropriate - * pid, we don't actually care if we have the right task. - */ -static inline bool has_group_leader_pid(struct task_struct *p) -{ - return task_pid(p) == p->signal->leader_pid; -} - -static inline -bool same_thread_group(struct task_struct *p1, struct task_struct *p2) -{ - return p1->signal == p2->signal; -} - -static inline struct task_struct *next_thread(const struct task_struct *p) -{ - return list_entry_rcu(p->thread_group.next, - struct task_struct, thread_group); -} - -static inline int thread_group_empty(struct task_struct *p) -{ - return list_empty(&p->thread_group); -} - -#define delay_group_leader(p) \ - (thread_group_leader(p) && !thread_group_empty(p)) - -/* - * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring - * subscriptions and synchronises with wait4(). Also used in procfs. Also - * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. And ->vfork_done. - * - * Nests both inside and outside of read_lock(&tasklist_lock). - * It must not be nested with write_lock_irq(&tasklist_lock), - * neither inside nor outside. - */ -static inline void task_lock(struct task_struct *p) -{ - spin_lock(&p->alloc_lock); -} - -static inline void task_unlock(struct task_struct *p) -{ - spin_unlock(&p->alloc_lock); -} - -extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, - unsigned long *flags); - -static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, - unsigned long *flags) -{ - struct sighand_struct *ret; - - ret = __lock_task_sighand(tsk, flags); - (void)__cond_lock(&tsk->sighand->siglock, ret); - return ret; -} - -static inline void unlock_task_sighand(struct task_struct *tsk, - unsigned long *flags) -{ - spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); -} - -/** - * threadgroup_change_begin - mark the beginning of changes to a threadgroup - * @tsk: task causing the changes - * - * All operations which modify a threadgroup - a new thread joining the - * group, death of a member thread (the assertion of PF_EXITING) and - * exec(2) dethreading the process and replacing the leader - are wrapped - * by threadgroup_change_{begin|end}(). This is to provide a place which - * subsystems needing threadgroup stability can hook into for - * synchronization. - */ -static inline void threadgroup_change_begin(struct task_struct *tsk) -{ - might_sleep(); - cgroup_threadgroup_change_begin(tsk); -} - -/** - * threadgroup_change_end - mark the end of changes to a threadgroup - * @tsk: task causing the changes - * - * See threadgroup_change_begin(). - */ -static inline void threadgroup_change_end(struct task_struct *tsk) -{ - cgroup_threadgroup_change_end(tsk); -} - -#ifdef CONFIG_THREAD_INFO_IN_TASK - -static inline struct thread_info *task_thread_info(struct task_struct *task) -{ - return &task->thread_info; -} - /* - * When accessing the stack of a non-current task that might exit, use - * try_get_task_stack() instead. task_stack_page will return a pointer - * that could get freed out from under you. - */ -static inline void *task_stack_page(const struct task_struct *task) -{ - return task->stack; -} - -#define setup_thread_stack(new,old) do { } while(0) - -static inline unsigned long *end_of_stack(const struct task_struct *task) -{ - return task->stack; -} - -#elif !defined(__HAVE_THREAD_FUNCTIONS) - -#define task_thread_info(task) ((struct thread_info *)(task)->stack) -#define task_stack_page(task) ((void *)(task)->stack) - -static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) -{ - *task_thread_info(p) = *task_thread_info(org); - task_thread_info(p)->task = p; -} - -/* - * Return the address of the last usable long on the stack. - * - * When the stack grows down, this is just above the thread - * info struct. Going any lower will corrupt the threadinfo. - * - * When the stack grows up, this is the highest address. - * Beyond that position, we corrupt data on the next page. - */ -static inline unsigned long *end_of_stack(struct task_struct *p) -{ -#ifdef CONFIG_STACK_GROWSUP - return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; -#else - return (unsigned long *)(task_thread_info(p) + 1); -#endif -} - -#endif - -#ifdef CONFIG_THREAD_INFO_IN_TASK -static inline void *try_get_task_stack(struct task_struct *tsk) -{ - return atomic_inc_not_zero(&tsk->stack_refcount) ? - task_stack_page(tsk) : NULL; -} - -extern void put_task_stack(struct task_struct *tsk); -#else -static inline void *try_get_task_stack(struct task_struct *tsk) -{ - return task_stack_page(tsk); -} - -static inline void put_task_stack(struct task_struct *tsk) {} -#endif - -#define task_stack_end_corrupted(task) \ - (*(end_of_stack(task)) != STACK_END_MAGIC) - -static inline int object_is_on_stack(void *obj) -{ - void *stack = task_stack_page(current); - - return (obj >= stack) && (obj < (stack + THREAD_SIZE)); -} - -extern void thread_stack_cache_init(void); - -#ifdef CONFIG_DEBUG_STACK_USAGE -static inline unsigned long stack_not_used(struct task_struct *p) -{ - unsigned long *n = end_of_stack(p); - - do { /* Skip over canary */ -# ifdef CONFIG_STACK_GROWSUP - n--; -# else - n++; -# endif - } while (!*n); - -# ifdef CONFIG_STACK_GROWSUP - return (unsigned long)end_of_stack(p) - (unsigned long)n; -# else - return (unsigned long)n - (unsigned long)end_of_stack(p); -# endif -} -#endif -extern void set_task_stack_end_magic(struct task_struct *tsk); - -/* set thread flags in other task's structures - * - see asm/thread_info.h for TIF_xxxx flags available + * Set thread flags in other task's structures. + * See asm/thread_info.h for TIF_xxxx flags available: */ static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) { @@ -3350,37 +1456,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } -static inline int restart_syscall(void) -{ - set_tsk_thread_flag(current, TIF_SIGPENDING); - return -ERESTARTNOINTR; -} - -static inline int signal_pending(struct task_struct *p) -{ - return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); -} - -static inline int __fatal_signal_pending(struct task_struct *p) -{ - return unlikely(sigismember(&p->pending.signal, SIGKILL)); -} - -static inline int fatal_signal_pending(struct task_struct *p) -{ - return signal_pending(p) && __fatal_signal_pending(p); -} - -static inline int signal_pending_state(long state, struct task_struct *p) -{ - if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) - return 0; - if (!signal_pending(p)) - return 0; - - return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); -} - /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return @@ -3422,15 +1497,6 @@ static inline void cond_resched_rcu(void) #endif } -static inline unsigned long get_preempt_disable_ip(struct task_struct *p) -{ -#ifdef CONFIG_DEBUG_PREEMPT - return p->preempt_disable_ip; -#else - return 0; -#endif -} - /* * Does a critical section need to be broken due to another * task waiting?: (technically does not depend on CONFIG_PREEMPT, @@ -3445,114 +1511,12 @@ static inline int spin_needbreak(spinlock_t *lock) #endif } -/* - * Idle thread specific functions to determine the need_resched - * polling state. - */ -#ifdef TIF_POLLING_NRFLAG -static inline int tsk_is_polling(struct task_struct *p) -{ - return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); -} - -static inline void __current_set_polling(void) -{ - set_thread_flag(TIF_POLLING_NRFLAG); -} - -static inline bool __must_check current_set_polling_and_test(void) -{ - __current_set_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_curr() - */ - smp_mb__after_atomic(); - - return unlikely(tif_need_resched()); -} - -static inline void __current_clr_polling(void) -{ - clear_thread_flag(TIF_POLLING_NRFLAG); -} - -static inline bool __must_check current_clr_polling_and_test(void) -{ - __current_clr_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_curr() - */ - smp_mb__after_atomic(); - - return unlikely(tif_need_resched()); -} - -#else -static inline int tsk_is_polling(struct task_struct *p) { return 0; } -static inline void __current_set_polling(void) { } -static inline void __current_clr_polling(void) { } - -static inline bool __must_check current_set_polling_and_test(void) -{ - return unlikely(tif_need_resched()); -} -static inline bool __must_check current_clr_polling_and_test(void) -{ - return unlikely(tif_need_resched()); -} -#endif - -static inline void current_clr_polling(void) -{ - __current_clr_polling(); - - /* - * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. - * Once the bit is cleared, we'll get IPIs with every new - * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also - * fold. - */ - smp_mb(); /* paired with resched_curr() */ - - preempt_fold_need_resched(); -} - static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); } /* - * Thread group CPU time accounting. - */ -void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); - -/* - * Reevaluate whether the task has signals pending delivery. - * Wake the task if so. - * This is required every time the blocked sigset_t changes. - * callers must hold sighand->siglock. - */ -extern void recalc_sigpending_and_wake(struct task_struct *t); -extern void recalc_sigpending(void); - -extern void signal_wake_up_state(struct task_struct *t, unsigned int state); - -static inline void signal_wake_up(struct task_struct *t, bool resume) -{ - signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); -} -static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) -{ - signal_wake_up_state(t, resume ? __TASK_TRACED : 0); -} - -/* * Wrappers for p->thread_info->cpu access. No-op on UP. */ #ifdef CONFIG_SMP @@ -3566,11 +1530,6 @@ static inline unsigned int task_cpu(const struct task_struct *p) #endif } -static inline int task_node(const struct task_struct *p) -{ - return cpu_to_node(task_cpu(p)); -} - extern void set_task_cpu(struct task_struct *p, unsigned int cpu); #else @@ -3601,100 +1560,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); -#ifdef CONFIG_CGROUP_SCHED -extern struct task_group root_task_group; -#endif /* CONFIG_CGROUP_SCHED */ - -extern int task_can_switch_user(struct user_struct *up, - struct task_struct *tsk); - -#ifdef CONFIG_TASK_XACCT -static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -{ - tsk->ioac.rchar += amt; -} - -static inline void add_wchar(struct task_struct *tsk, ssize_t amt) -{ - tsk->ioac.wchar += amt; -} - -static inline void inc_syscr(struct task_struct *tsk) -{ - tsk->ioac.syscr++; -} - -static inline void inc_syscw(struct task_struct *tsk) -{ - tsk->ioac.syscw++; -} -#else -static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -{ -} - -static inline void add_wchar(struct task_struct *tsk, ssize_t amt) -{ -} - -static inline void inc_syscr(struct task_struct *tsk) -{ -} - -static inline void inc_syscw(struct task_struct *tsk) -{ -} -#endif - #ifndef TASK_SIZE_OF #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -#ifdef CONFIG_MEMCG -extern void mm_update_next_owner(struct mm_struct *mm); -#else -static inline void mm_update_next_owner(struct mm_struct *mm) -{ -} -#endif /* CONFIG_MEMCG */ - -static inline unsigned long task_rlimit(const struct task_struct *tsk, - unsigned int limit) -{ - return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); -} - -static inline unsigned long task_rlimit_max(const struct task_struct *tsk, - unsigned int limit) -{ - return READ_ONCE(tsk->signal->rlim[limit].rlim_max); -} - -static inline unsigned long rlimit(unsigned int limit) -{ - return task_rlimit(current, limit); -} - -static inline unsigned long rlimit_max(unsigned int limit) -{ - return task_rlimit_max(current, limit); -} - -#define SCHED_CPUFREQ_RT (1U << 0) -#define SCHED_CPUFREQ_DL (1U << 1) -#define SCHED_CPUFREQ_IOWAIT (1U << 2) - -#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) - -#ifdef CONFIG_CPU_FREQ -struct update_util_data { - void (*func)(struct update_util_data *data, u64 time, unsigned int flags); -}; - -void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, - void (*func)(struct update_util_data *data, u64 time, - unsigned int flags)); -void cpufreq_remove_update_util_hook(int cpu); -#endif /* CONFIG_CPU_FREQ */ - #endif diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h new file mode 100644 index 000000000000..55cd496df884 --- /dev/null +++ b/include/linux/sched/autogroup.h @@ -0,0 +1,31 @@ +#ifndef _LINUX_SCHED_AUTOGROUP_H +#define _LINUX_SCHED_AUTOGROUP_H + +struct signal_struct; +struct task_struct; +struct task_group; +struct seq_file; + +#ifdef CONFIG_SCHED_AUTOGROUP +extern void sched_autogroup_create_attach(struct task_struct *p); +extern void sched_autogroup_detach(struct task_struct *p); +extern void sched_autogroup_fork(struct signal_struct *sig); +extern void sched_autogroup_exit(struct signal_struct *sig); +extern void sched_autogroup_exit_task(struct task_struct *p); +#ifdef CONFIG_PROC_FS +extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); +extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); +#endif +#else +static inline void sched_autogroup_create_attach(struct task_struct *p) { } +static inline void sched_autogroup_detach(struct task_struct *p) { } +static inline void sched_autogroup_fork(struct signal_struct *sig) { } +static inline void sched_autogroup_exit(struct signal_struct *sig) { } +static inline void sched_autogroup_exit_task(struct task_struct *p) { } +#endif + +#ifdef CONFIG_CGROUP_SCHED +extern struct task_group root_task_group; +#endif /* CONFIG_CGROUP_SCHED */ + +#endif /* _LINUX_SCHED_AUTOGROUP_H */ diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h new file mode 100644 index 000000000000..4a68c6791207 --- /dev/null +++ b/include/linux/sched/clock.h @@ -0,0 +1,104 @@ +#ifndef _LINUX_SCHED_CLOCK_H +#define _LINUX_SCHED_CLOCK_H + +#include <linux/smp.h> + +/* + * Do not use outside of architecture code which knows its limitations. + * + * sched_clock() has no promise of monotonicity or bounded drift between + * CPUs, use (which you should not) requires disabling IRQs. + * + * Please use one of the three interfaces below. + */ +extern unsigned long long notrace sched_clock(void); + +/* + * See the comment in kernel/sched/clock.c + */ +extern u64 running_clock(void); +extern u64 sched_clock_cpu(int cpu); + + +extern void sched_clock_init(void); + +#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +static inline void sched_clock_init_late(void) +{ +} + +static inline void sched_clock_tick(void) +{ +} + +static inline void clear_sched_clock_stable(void) +{ +} + +static inline void sched_clock_idle_sleep_event(void) +{ +} + +static inline void sched_clock_idle_wakeup_event(u64 delta_ns) +{ +} + +static inline u64 cpu_clock(int cpu) +{ + return sched_clock(); +} + +static inline u64 local_clock(void) +{ + return sched_clock(); +} +#else +extern void sched_clock_init_late(void); +/* + * Architectures can set this to 1 if they have specified + * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, + * but then during bootup it turns out that sched_clock() + * is reliable after all: + */ +extern int sched_clock_stable(void); +extern void clear_sched_clock_stable(void); + +extern void sched_clock_tick(void); +extern void sched_clock_idle_sleep_event(void); +extern void sched_clock_idle_wakeup_event(u64 delta_ns); + +/* + * As outlined in clock.c, provides a fast, high resolution, nanosecond + * time source that is monotonic per cpu argument and has bounded drift + * between cpus. + * + * ######################### BIG FAT WARNING ########################## + * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # + * # go backwards !! # + * #################################################################### + */ +static inline u64 cpu_clock(int cpu) +{ + return sched_clock_cpu(cpu); +} + +static inline u64 local_clock(void) +{ + return sched_clock_cpu(raw_smp_processor_id()); +} +#endif + +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +/* + * An i/f to runtime opt-in for irq time accounting based off of sched_clock. + * The reason for this explicit opt-in is not to have perf penalty with + * slow sched_clocks. + */ +extern void enable_sched_clock_irqtime(void); +extern void disable_sched_clock_irqtime(void); +#else +static inline void enable_sched_clock_irqtime(void) {} +static inline void disable_sched_clock_irqtime(void) {} +#endif + +#endif /* _LINUX_SCHED_CLOCK_H */ diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h new file mode 100644 index 000000000000..69eedcef8f03 --- /dev/null +++ b/include/linux/sched/coredump.h @@ -0,0 +1,74 @@ +#ifndef _LINUX_SCHED_COREDUMP_H +#define _LINUX_SCHED_COREDUMP_H + +#include <linux/mm_types.h> + +#define SUID_DUMP_DISABLE 0 /* No setuid dumping */ +#define SUID_DUMP_USER 1 /* Dump as user of process */ +#define SUID_DUMP_ROOT 2 /* Dump as root */ + +/* mm flags */ + +/* for SUID_DUMP_* above */ +#define MMF_DUMPABLE_BITS 2 +#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) + +extern void set_dumpable(struct mm_struct *mm, int value); +/* + * This returns the actual value of the suid_dumpable flag. For things + * that are using this for checking for privilege transitions, it must + * test against SUID_DUMP_USER rather than treating it as a boolean + * value. + */ +static inline int __get_dumpable(unsigned long mm_flags) +{ + return mm_flags & MMF_DUMPABLE_MASK; +} + +static inline int get_dumpable(struct mm_struct *mm) +{ + return __get_dumpable(mm->flags); +} + +/* coredump filter bits */ +#define MMF_DUMP_ANON_PRIVATE 2 +#define MMF_DUMP_ANON_SHARED 3 +#define MMF_DUMP_MAPPED_PRIVATE 4 +#define MMF_DUMP_MAPPED_SHARED 5 +#define MMF_DUMP_ELF_HEADERS 6 +#define MMF_DUMP_HUGETLB_PRIVATE 7 +#define MMF_DUMP_HUGETLB_SHARED 8 +#define MMF_DUMP_DAX_PRIVATE 9 +#define MMF_DUMP_DAX_SHARED 10 + +#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS +#define MMF_DUMP_FILTER_BITS 9 +#define MMF_DUMP_FILTER_MASK \ + (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) +#define MMF_DUMP_FILTER_DEFAULT \ + ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ + (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) + +#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS +# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) +#else +# define MMF_DUMP_MASK_DEFAULT_ELF 0 +#endif + /* leave room for more dump flags */ +#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ +#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ +/* + * This one-shot flag is dropped due to necessity of changing exe once again + * on NFS restore + */ +//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ + +#define MMF_HAS_UPROBES 19 /* has uprobes */ +#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ +#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ +#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ +#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ + +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) + +#endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h new file mode 100644 index 000000000000..d2be2ccbb372 --- /dev/null +++ b/include/linux/sched/cpufreq.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_SCHED_CPUFREQ_H +#define _LINUX_SCHED_CPUFREQ_H + +#include <linux/types.h> + +/* + * Interface between cpufreq drivers and the scheduler: + */ + +#define SCHED_CPUFREQ_RT (1U << 0) +#define SCHED_CPUFREQ_DL (1U << 1) +#define SCHED_CPUFREQ_IOWAIT (1U << 2) + +#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) + +#ifdef CONFIG_CPU_FREQ +struct update_util_data { + void (*func)(struct update_util_data *data, u64 time, unsigned int flags); +}; + +void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, + void (*func)(struct update_util_data *data, u64 time, + unsigned int flags)); +void cpufreq_remove_update_util_hook(int cpu); +#endif /* CONFIG_CPU_FREQ */ + +#endif /* _LINUX_SCHED_CPUFREQ_H */ diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h new file mode 100644 index 000000000000..4c5b9735c1ae --- /dev/null +++ b/include/linux/sched/cputime.h @@ -0,0 +1,187 @@ +#ifndef _LINUX_SCHED_CPUTIME_H +#define _LINUX_SCHED_CPUTIME_H + +#include <linux/sched/signal.h> + +/* + * cputime accounting APIs: + */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +#include <asm/cputime.h> + +#ifndef cputime_to_nsecs +# define cputime_to_nsecs(__ct) \ + (cputime_to_usecs(__ct) * NSEC_PER_USEC) +#endif +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void task_cputime(struct task_struct *t, + u64 *utime, u64 *stime); +extern u64 task_gtime(struct task_struct *t); +#else +static inline void task_cputime(struct task_struct *t, + u64 *utime, u64 *stime) +{ + *utime = t->utime; + *stime = t->stime; +} + +static inline u64 task_gtime(struct task_struct *t) +{ + return t->gtime; +} +#endif + +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME +static inline void task_cputime_scaled(struct task_struct *t, + u64 *utimescaled, + u64 *stimescaled) +{ + *utimescaled = t->utimescaled; + *stimescaled = t->stimescaled; +} +#else +static inline void task_cputime_scaled(struct task_struct *t, + u64 *utimescaled, + u64 *stimescaled) +{ + task_cputime(t, utimescaled, stimescaled); +} +#endif + +extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); +extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); + + +/* + * Thread group CPU time accounting. + */ +void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); + + +/* + * The following are functions that support scheduler-internal time accounting. + * These functions are generally called at the timer tick. None of this depends + * on CONFIG_SCHEDSTATS. + */ + +/** + * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running + * + * @tsk: Pointer to target task. + */ +#ifdef CONFIG_POSIX_TIMERS +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) +{ + struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + + /* Check if cputimer isn't running. This is accessed without locking. */ + if (!READ_ONCE(cputimer->running)) + return NULL; + + /* + * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime + * in __exit_signal(), we won't account to the signal struct further + * cputime consumed by that task, even though the task can still be + * ticking after __exit_signal(). + * + * In order to keep a consistent behaviour between thread group cputime + * and thread group cputimer accounting, lets also ignore the cputime + * elapsing after __exit_signal() in any thread group timer running. + * + * This makes sure that POSIX CPU clocks and timers are synchronized, so + * that a POSIX CPU timer won't expire while the corresponding POSIX CPU + * clock delta is behind the expiring timer value. + */ + if (unlikely(!tsk->sighand)) + return NULL; + + return cputimer; +} +#else +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) +{ + return NULL; +} +#endif + +/** + * account_group_user_time - Maintain utime for a thread group. + * + * @tsk: Pointer to task structure. + * @cputime: Time value by which to increment the utime field of the + * thread_group_cputime structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the utime field there. + */ +static inline void account_group_user_time(struct task_struct *tsk, + u64 cputime) +{ + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); + + if (!cputimer) + return; + + atomic64_add(cputime, &cputimer->cputime_atomic.utime); +} + +/** + * account_group_system_time - Maintain stime for a thread group. + * + * @tsk: Pointer to task structure. + * @cputime: Time value by which to increment the stime field of the + * thread_group_cputime structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the stime field there. + */ +static inline void account_group_system_time(struct task_struct *tsk, + u64 cputime) +{ + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); + + if (!cputimer) + return; + + atomic64_add(cputime, &cputimer->cputime_atomic.stime); +} + +/** + * account_group_exec_runtime - Maintain exec runtime for a thread group. + * + * @tsk: Pointer to task structure. + * @ns: Time value by which to increment the sum_exec_runtime field + * of the thread_group_cputime structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the sum_exec_runtime field there. + */ +static inline void account_group_exec_runtime(struct task_struct *tsk, + unsigned long long ns) +{ + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); + + if (!cputimer) + return; + + atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime); +} + +static inline void prev_cputime_init(struct prev_cputime *prev) +{ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + prev->utime = prev->stime = 0; + raw_spin_lock_init(&prev->lock); +#endif +} + +extern unsigned long long +task_sched_runtime(struct task_struct *task); + +#endif /* _LINUX_SCHED_CPUTIME_H */ diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 9089a2ae913d..975be862e083 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -1,5 +1,7 @@ -#ifndef _SCHED_DEADLINE_H -#define _SCHED_DEADLINE_H +#ifndef _LINUX_SCHED_DEADLINE_H +#define _LINUX_SCHED_DEADLINE_H + +#include <linux/sched.h> /* * SCHED_DEADLINE tasks has negative priorities, reflecting @@ -26,4 +28,4 @@ static inline bool dl_time_before(u64 a, u64 b) return (s64)(a - b) < 0; } -#endif /* _SCHED_DEADLINE_H */ +#endif /* _LINUX_SCHED_DEADLINE_H */ diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h new file mode 100644 index 000000000000..e0eaee54c5a4 --- /dev/null +++ b/include/linux/sched/debug.h @@ -0,0 +1,50 @@ +#ifndef _LINUX_SCHED_DEBUG_H +#define _LINUX_SCHED_DEBUG_H + +/* + * Various scheduler/task debugging interfaces: + */ + +struct task_struct; + +extern void dump_cpu_task(int cpu); + +/* + * Only dump TASK_* tasks. (0 for all tasks) + */ +extern void show_state_filter(unsigned long state_filter); + +static inline void show_state(void) +{ + show_state_filter(0); +} + +struct pt_regs; + +extern void show_regs(struct pt_regs *); + +/* + * TASK is a pointer to the task whose backtrace we want to see (or NULL for current + * task), SP is the stack pointer of the first frame that should be shown in the back + * trace (or NULL if the entire call-chain of the task should be shown). + */ +extern void show_stack(struct task_struct *task, unsigned long *sp); + +extern void sched_show_task(struct task_struct *p); + +#ifdef CONFIG_SCHED_DEBUG +struct seq_file; +extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); +extern void proc_sched_set_task(struct task_struct *p); +#endif + +/* Attach to any functions which should be ignored in wchan output. */ +#define __sched __attribute__((__section__(".sched.text"))) + +/* Linker adds these: start and end of __sched functions */ +extern char __sched_text_start[], __sched_text_end[]; + +/* Is this address in the __sched functions? */ +extern int in_sched_functions(unsigned long addr); + +#endif /* _LINUX_SCHED_DEBUG_H */ diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h new file mode 100644 index 000000000000..752ac7e628d7 --- /dev/null +++ b/include/linux/sched/hotplug.h @@ -0,0 +1,24 @@ +#ifndef _LINUX_SCHED_HOTPLUG_H +#define _LINUX_SCHED_HOTPLUG_H + +/* + * Scheduler interfaces for hotplug CPU support: + */ + +extern int sched_cpu_starting(unsigned int cpu); +extern int sched_cpu_activate(unsigned int cpu); +extern int sched_cpu_deactivate(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CPU +extern int sched_cpu_dying(unsigned int cpu); +#else +# define sched_cpu_dying NULL +#endif + +#ifdef CONFIG_HOTPLUG_CPU +extern void idle_task_exit(void); +#else +static inline void idle_task_exit(void) {} +#endif + +#endif /* _LINUX_SCHED_HOTPLUG_H */ diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h new file mode 100644 index 000000000000..5ca63ebad6b4 --- /dev/null +++ b/include/linux/sched/idle.h @@ -0,0 +1,86 @@ +#ifndef _LINUX_SCHED_IDLE_H +#define _LINUX_SCHED_IDLE_H + +#include <linux/sched.h> + +enum cpu_idle_type { + CPU_IDLE, + CPU_NOT_IDLE, + CPU_NEWLY_IDLE, + CPU_MAX_IDLE_TYPES +}; + +extern void wake_up_if_idle(int cpu); + +/* + * Idle thread specific functions to determine the need_resched + * polling state. + */ +#ifdef TIF_POLLING_NRFLAG + +static inline void __current_set_polling(void) +{ + set_thread_flag(TIF_POLLING_NRFLAG); +} + +static inline bool __must_check current_set_polling_and_test(void) +{ + __current_set_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_curr() + */ + smp_mb__after_atomic(); + + return unlikely(tif_need_resched()); +} + +static inline void __current_clr_polling(void) +{ + clear_thread_flag(TIF_POLLING_NRFLAG); +} + +static inline bool __must_check current_clr_polling_and_test(void) +{ + __current_clr_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_curr() + */ + smp_mb__after_atomic(); + + return unlikely(tif_need_resched()); +} + +#else +static inline void __current_set_polling(void) { } +static inline void __current_clr_polling(void) { } + +static inline bool __must_check current_set_polling_and_test(void) +{ + return unlikely(tif_need_resched()); +} +static inline bool __must_check current_clr_polling_and_test(void) +{ + return unlikely(tif_need_resched()); +} +#endif + +static inline void current_clr_polling(void) +{ + __current_clr_polling(); + + /* + * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. + * Once the bit is cleared, we'll get IPIs with every new + * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also + * fold. + */ + smp_mb(); /* paired with resched_curr() */ + + preempt_fold_need_resched(); +} + +#endif /* _LINUX_SCHED_IDLE_H */ diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h new file mode 100644 index 000000000000..127215045285 --- /dev/null +++ b/include/linux/sched/init.h @@ -0,0 +1,11 @@ +#ifndef _LINUX_SCHED_INIT_H +#define _LINUX_SCHED_INIT_H + +/* + * Scheduler init related prototypes: + */ + +extern void sched_init(void); +extern void sched_init_smp(void); + +#endif /* _LINUX_SCHED_INIT_H */ diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h new file mode 100644 index 000000000000..016afa0fb3bb --- /dev/null +++ b/include/linux/sched/jobctl.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_SCHED_JOBCTL_H +#define _LINUX_SCHED_JOBCTL_H + +#include <linux/types.h> + +struct task_struct; + +/* + * task->jobctl flags + */ +#define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */ + +#define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */ +#define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */ +#define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */ +#define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */ +#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ +#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ +#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ + +#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) +#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) +#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) +#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT) +#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) +#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) +#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) + +#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) + +extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); +extern void task_clear_jobctl_trapping(struct task_struct *task); +extern void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask); + +#endif /* _LINUX_SCHED_JOBCTL_H */ diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h new file mode 100644 index 000000000000..4264bc6b2c27 --- /dev/null +++ b/include/linux/sched/loadavg.h @@ -0,0 +1,31 @@ +#ifndef _LINUX_SCHED_LOADAVG_H +#define _LINUX_SCHED_LOADAVG_H + +/* + * These are the constant used to fake the fixed-point load-average + * counting. Some notes: + * - 11 bit fractions expand to 22 bits by the multiplies: this gives + * a load-average precision of 10 bits integer + 11 bits fractional + * - if you want to count load-averages more often, you need more + * precision, or rounding will get you. With 2-second counting freq, + * the EXP_n values would be 1981, 2034 and 2043 if still using only + * 11 bit fractions. + */ +extern unsigned long avenrun[]; /* Load averages */ +extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); + +#define FSHIFT 11 /* nr of bits of precision */ +#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ +#define LOAD_FREQ (5*HZ+1) /* 5 sec intervals */ +#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */ +#define EXP_5 2014 /* 1/exp(5sec/5min) */ +#define EXP_15 2037 /* 1/exp(5sec/15min) */ + +#define CALC_LOAD(load,exp,n) \ + load *= exp; \ + load += n*(FIXED_1-exp); \ + load >>= FSHIFT; + +extern void calc_global_load(unsigned long ticks); + +#endif /* _LINUX_SCHED_LOADAVG_H */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h new file mode 100644 index 000000000000..830953ebb391 --- /dev/null +++ b/include/linux/sched/mm.h @@ -0,0 +1,174 @@ +#ifndef _LINUX_SCHED_MM_H +#define _LINUX_SCHED_MM_H + +#include <linux/kernel.h> +#include <linux/atomic.h> +#include <linux/sched.h> +#include <linux/mm_types.h> +#include <linux/gfp.h> + +/* + * Routines for handling mm_structs + */ +extern struct mm_struct * mm_alloc(void); + +/** + * mmgrab() - Pin a &struct mm_struct. + * @mm: The &struct mm_struct to pin. + * + * Make sure that @mm will not get freed even after the owning task + * exits. This doesn't guarantee that the associated address space + * will still exist later on and mmget_not_zero() has to be used before + * accessing it. + * + * This is a preferred way to to pin @mm for a longer/unbounded amount + * of time. + * + * Use mmdrop() to release the reference acquired by mmgrab(). + * + * See also <Documentation/vm/active_mm.txt> for an in-depth explanation + * of &mm_struct.mm_count vs &mm_struct.mm_users. + */ +static inline void mmgrab(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_count); +} + +/* mmdrop drops the mm and the page tables */ +extern void __mmdrop(struct mm_struct *); +static inline void mmdrop(struct mm_struct *mm) +{ + if (unlikely(atomic_dec_and_test(&mm->mm_count))) + __mmdrop(mm); +} + +static inline void mmdrop_async_fn(struct work_struct *work) +{ + struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); + __mmdrop(mm); +} + +static inline void mmdrop_async(struct mm_struct *mm) +{ + if (unlikely(atomic_dec_and_test(&mm->mm_count))) { + INIT_WORK(&mm->async_put_work, mmdrop_async_fn); + schedule_work(&mm->async_put_work); + } +} + +/** + * mmget() - Pin the address space associated with a &struct mm_struct. + * @mm: The address space to pin. + * + * Make sure that the address space of the given &struct mm_struct doesn't + * go away. This does not protect against parts of the address space being + * modified or freed, however. + * + * Never use this function to pin this address space for an + * unbounded/indefinite amount of time. + * + * Use mmput() to release the reference acquired by mmget(). + * + * See also <Documentation/vm/active_mm.txt> for an in-depth explanation + * of &mm_struct.mm_count vs &mm_struct.mm_users. + */ +static inline void mmget(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_users); +} + +static inline bool mmget_not_zero(struct mm_struct *mm) +{ + return atomic_inc_not_zero(&mm->mm_users); +} + +/* mmput gets rid of the mappings and all user-space */ +extern void mmput(struct mm_struct *); +#ifdef CONFIG_MMU +/* same as above but performs the slow path from the async context. Can + * be called from the atomic context as well + */ +extern void mmput_async(struct mm_struct *); +#endif + +/* Grab a reference to a task's mm, if it is not already going away */ +extern struct mm_struct *get_task_mm(struct task_struct *task); +/* + * Grab a reference to a task's mm, if it is not already going away + * and ptrace_may_access with the mode parameter passed to it + * succeeds. + */ +extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); +/* Remove the current tasks stale references to the old mm_struct */ +extern void mm_release(struct task_struct *, struct mm_struct *); + +#ifdef CONFIG_MEMCG +extern void mm_update_next_owner(struct mm_struct *mm); +#else +static inline void mm_update_next_owner(struct mm_struct *mm) +{ +} +#endif /* CONFIG_MEMCG */ + +#ifdef CONFIG_MMU +extern void arch_pick_mmap_layout(struct mm_struct *mm); +extern unsigned long +arch_get_unmapped_area(struct file *, unsigned long, unsigned long, + unsigned long, unsigned long); +extern unsigned long +arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); +#else +static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} +#endif + +static inline bool in_vfork(struct task_struct *tsk) +{ + bool ret; + + /* + * need RCU to access ->real_parent if CLONE_VM was used along with + * CLONE_PARENT. + * + * We check real_parent->mm == tsk->mm because CLONE_VFORK does not + * imply CLONE_VM + * + * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus + * ->real_parent is not necessarily the task doing vfork(), so in + * theory we can't rely on task_lock() if we want to dereference it. + * + * And in this case we can't trust the real_parent->mm == tsk->mm + * check, it can be false negative. But we do not care, if init or + * another oom-unkillable task does this it should blame itself. + */ + rcu_read_lock(); + ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; + rcu_read_unlock(); + + return ret; +} + +/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags + * __GFP_FS is also cleared as it implies __GFP_IO. + */ +static inline gfp_t memalloc_noio_flags(gfp_t flags) +{ + if (unlikely(current->flags & PF_MEMALLOC_NOIO)) + flags &= ~(__GFP_IO | __GFP_FS); + return flags; +} + +static inline unsigned int memalloc_noio_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC_NOIO; + current->flags |= PF_MEMALLOC_NOIO; + return flags; +} + +static inline void memalloc_noio_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; +} + +#endif /* _LINUX_SCHED_MM_H */ diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h new file mode 100644 index 000000000000..4995b717500b --- /dev/null +++ b/include/linux/sched/nohz.h @@ -0,0 +1,43 @@ +#ifndef _LINUX_SCHED_NOHZ_H +#define _LINUX_SCHED_NOHZ_H + +/* + * This is the interface between the scheduler and nohz/dyntics: + */ + +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +extern void cpu_load_update_nohz_start(void); +extern void cpu_load_update_nohz_stop(void); +#else +static inline void cpu_load_update_nohz_start(void) { } +static inline void cpu_load_update_nohz_stop(void) { } +#endif + +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +extern void nohz_balance_enter_idle(int cpu); +extern void set_cpu_sd_state_idle(void); +extern int get_nohz_timer_target(void); +#else +static inline void nohz_balance_enter_idle(int cpu) { } +static inline void set_cpu_sd_state_idle(void) { } +#endif + +#ifdef CONFIG_NO_HZ_COMMON +void calc_load_enter_idle(void); +void calc_load_exit_idle(void); +#else +static inline void calc_load_enter_idle(void) { } +static inline void calc_load_exit_idle(void) { } +#endif /* CONFIG_NO_HZ_COMMON */ + +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) +extern void wake_up_nohz_cpu(int cpu); +#else +static inline void wake_up_nohz_cpu(int cpu) { } +#endif + +#ifdef CONFIG_NO_HZ_FULL +extern u64 scheduler_tick_max_deferment(void); +#endif + +#endif /* _LINUX_SCHED_NOHZ_H */ diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h new file mode 100644 index 000000000000..35d5fc77b4be --- /dev/null +++ b/include/linux/sched/numa_balancing.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_SCHED_NUMA_BALANCING_H +#define _LINUX_SCHED_NUMA_BALANCING_H + +/* + * This is the interface between the scheduler and the MM that + * implements memory access pattern based NUMA-balancing: + */ + +#include <linux/sched.h> + +#define TNF_MIGRATED 0x01 +#define TNF_NO_GROUP 0x02 +#define TNF_SHARED 0x04 +#define TNF_FAULT_LOCAL 0x08 +#define TNF_MIGRATE_FAIL 0x10 + +#ifdef CONFIG_NUMA_BALANCING +extern void task_numa_fault(int last_node, int node, int pages, int flags); +extern pid_t task_numa_group_id(struct task_struct *p); +extern void set_numabalancing_state(bool enabled); +extern void task_numa_free(struct task_struct *p); +extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, + int src_nid, int dst_cpu); +#else +static inline void task_numa_fault(int last_node, int node, int pages, + int flags) +{ +} +static inline pid_t task_numa_group_id(struct task_struct *p) +{ + return 0; +} +static inline void set_numabalancing_state(bool enabled) +{ +} +static inline void task_numa_free(struct task_struct *p) +{ +} +static inline bool should_numa_migrate_memory(struct task_struct *p, + struct page *page, int src_nid, int dst_cpu) +{ + return true; +} +#endif + +#endif /* _LINUX_SCHED_NUMA_BALANCING_H */ diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index d9cf5a5762d9..2cc450f6ec54 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -1,5 +1,5 @@ -#ifndef _SCHED_PRIO_H -#define _SCHED_PRIO_H +#ifndef _LINUX_SCHED_PRIO_H +#define _LINUX_SCHED_PRIO_H #define MAX_NICE 19 #define MIN_NICE -20 @@ -57,4 +57,4 @@ static inline long rlimit_to_nice(long prio) return (MAX_NICE - prio + 1); } -#endif /* _SCHED_PRIO_H */ +#endif /* _LINUX_SCHED_PRIO_H */ diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index a30b172df6e1..3bd668414f61 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -1,7 +1,9 @@ -#ifndef _SCHED_RT_H -#define _SCHED_RT_H +#ifndef _LINUX_SCHED_RT_H +#define _LINUX_SCHED_RT_H -#include <linux/sched/prio.h> +#include <linux/sched.h> + +struct task_struct; static inline int rt_prio(int prio) { @@ -57,4 +59,4 @@ extern void normalize_rt_tasks(void); */ #define RR_TIMESLICE (100 * HZ / 1000) -#endif /* _SCHED_RT_H */ +#endif /* _LINUX_SCHED_RT_H */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h new file mode 100644 index 000000000000..2cf446704cd4 --- /dev/null +++ b/include/linux/sched/signal.h @@ -0,0 +1,613 @@ +#ifndef _LINUX_SCHED_SIGNAL_H +#define _LINUX_SCHED_SIGNAL_H + +#include <linux/rculist.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/sched/jobctl.h> +#include <linux/sched/task.h> +#include <linux/cred.h> + +/* + * Types defining task->signal and task->sighand and APIs using them: + */ + +struct sighand_struct { + atomic_t count; + struct k_sigaction action[_NSIG]; + spinlock_t siglock; + wait_queue_head_t signalfd_wqh; +}; + +/* + * Per-process accounting stats: + */ +struct pacct_struct { + int ac_flag; + long ac_exitcode; + unsigned long ac_mem; + u64 ac_utime, ac_stime; + unsigned long ac_minflt, ac_majflt; +}; + +struct cpu_itimer { + u64 expires; + u64 incr; +}; + +/* + * This is the atomic variant of task_cputime, which can be used for + * storing and updating task_cputime statistics without locking. + */ +struct task_cputime_atomic { + atomic64_t utime; + atomic64_t stime; + atomic64_t sum_exec_runtime; +}; + +#define INIT_CPUTIME_ATOMIC \ + (struct task_cputime_atomic) { \ + .utime = ATOMIC64_INIT(0), \ + .stime = ATOMIC64_INIT(0), \ + .sum_exec_runtime = ATOMIC64_INIT(0), \ + } +/** + * struct thread_group_cputimer - thread group interval timer counts + * @cputime_atomic: atomic thread group interval timers. + * @running: true when there are timers running and + * @cputime_atomic receives updates. + * @checking_timer: true when a thread in the group is in the + * process of checking for thread group timers. + * + * This structure contains the version of task_cputime, above, that is + * used for thread group CPU timer calculations. + */ +struct thread_group_cputimer { + struct task_cputime_atomic cputime_atomic; + bool running; + bool checking_timer; +}; + +/* + * NOTE! "signal_struct" does not have its own + * locking, because a shared signal_struct always + * implies a shared sighand_struct, so locking + * sighand_struct is always a proper superset of + * the locking of signal_struct. + */ +struct signal_struct { + atomic_t sigcnt; + atomic_t live; + int nr_threads; + struct list_head thread_head; + + wait_queue_head_t wait_chldexit; /* for wait4() */ + + /* current thread group signal load-balancing target: */ + struct task_struct *curr_target; + + /* shared signal handling: */ + struct sigpending shared_pending; + + /* thread group exit support */ + int group_exit_code; + /* overloaded: + * - notify group_exit_task when ->count is equal to notify_count + * - everyone except group_exit_task is stopped during signal delivery + * of fatal signals, group_exit_task processes the signal. + */ + int notify_count; + struct task_struct *group_exit_task; + + /* thread group stop support, overloads group_exit_code too */ + int group_stop_count; + unsigned int flags; /* see SIGNAL_* flags below */ + + /* + * PR_SET_CHILD_SUBREAPER marks a process, like a service + * manager, to re-parent orphan (double-forking) child processes + * to this process instead of 'init'. The service manager is + * able to receive SIGCHLD signals and is able to investigate + * the process until it calls wait(). All children of this + * process will inherit a flag if they should look for a + * child_subreaper process at exit. + */ + unsigned int is_child_subreaper:1; + unsigned int has_child_subreaper:1; + +#ifdef CONFIG_POSIX_TIMERS + + /* POSIX.1b Interval Timers */ + int posix_timer_id; + struct list_head posix_timers; + + /* ITIMER_REAL timer for the process */ + struct hrtimer real_timer; + ktime_t it_real_incr; + + /* + * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use + * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these + * values are defined to 0 and 1 respectively + */ + struct cpu_itimer it[2]; + + /* + * Thread group totals for process CPU timers. + * See thread_group_cputimer(), et al, for details. + */ + struct thread_group_cputimer cputimer; + + /* Earliest-expiration cache. */ + struct task_cputime cputime_expires; + + struct list_head cpu_timers[3]; + +#endif + + struct pid *leader_pid; + +#ifdef CONFIG_NO_HZ_FULL + atomic_t tick_dep_mask; +#endif + + struct pid *tty_old_pgrp; + + /* boolean value for session group leader */ + int leader; + + struct tty_struct *tty; /* NULL if no tty */ + +#ifdef CONFIG_SCHED_AUTOGROUP + struct autogroup *autogroup; +#endif + /* + * Cumulative resource counters for dead threads in the group, + * and for reaped dead child processes forked by this group. + * Live threads maintain their own counters and add to these + * in __exit_signal, except for the group leader. + */ + seqlock_t stats_lock; + u64 utime, stime, cutime, cstime; + u64 gtime; + u64 cgtime; + struct prev_cputime prev_cputime; + unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; + unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; + unsigned long inblock, oublock, cinblock, coublock; + unsigned long maxrss, cmaxrss; + struct task_io_accounting ioac; + + /* + * Cumulative ns of schedule CPU time fo dead threads in the + * group, not including a zombie group leader, (This only differs + * from jiffies_to_ns(utime + stime) if sched_clock uses something + * other than jiffies.) + */ + unsigned long long sum_sched_runtime; + + /* + * We don't bother to synchronize most readers of this at all, + * because there is no reader checking a limit that actually needs + * to get both rlim_cur and rlim_max atomically, and either one + * alone is a single word that can safely be read normally. + * getrlimit/setrlimit use task_lock(current->group_leader) to + * protect this instead of the siglock, because they really + * have no need to disable irqs. + */ + struct rlimit rlim[RLIM_NLIMITS]; + +#ifdef CONFIG_BSD_PROCESS_ACCT + struct pacct_struct pacct; /* per-process accounting information */ +#endif +#ifdef CONFIG_TASKSTATS + struct taskstats *stats; +#endif +#ifdef CONFIG_AUDIT + unsigned audit_tty; + struct tty_audit_buf *tty_audit_buf; +#endif + + /* + * Thread is the potential origin of an oom condition; kill first on + * oom + */ + bool oom_flag_origin; + short oom_score_adj; /* OOM kill score adjustment */ + short oom_score_adj_min; /* OOM kill score adjustment min value. + * Only settable by CAP_SYS_RESOURCE. */ + struct mm_struct *oom_mm; /* recorded mm when the thread group got + * killed by the oom killer */ + + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ +}; + +/* + * Bits in flags field of signal_struct. + */ +#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ +#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ +#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ +#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ +/* + * Pending notifications to parent. + */ +#define SIGNAL_CLD_STOPPED 0x00000010 +#define SIGNAL_CLD_CONTINUED 0x00000020 +#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) + +#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ + +#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ + SIGNAL_STOP_CONTINUED) + +static inline void signal_set_stop_flags(struct signal_struct *sig, + unsigned int flags) +{ + WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); + sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; +} + +/* If true, all threads except ->group_exit_task have pending SIGKILL */ +static inline int signal_group_exit(const struct signal_struct *sig) +{ + return (sig->flags & SIGNAL_GROUP_EXIT) || + (sig->group_exit_task != NULL); +} + +extern void flush_signals(struct task_struct *); +extern void ignore_signals(struct task_struct *); +extern void flush_signal_handlers(struct task_struct *, int force_default); +extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); + +static inline int kernel_dequeue_signal(siginfo_t *info) +{ + struct task_struct *tsk = current; + siginfo_t __info; + int ret; + + spin_lock_irq(&tsk->sighand->siglock); + ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info); + spin_unlock_irq(&tsk->sighand->siglock); + + return ret; +} + +static inline void kernel_signal_stop(void) +{ + spin_lock_irq(¤t->sighand->siglock); + if (current->jobctl & JOBCTL_STOP_DEQUEUED) + __set_current_state(TASK_STOPPED); + spin_unlock_irq(¤t->sighand->siglock); + + schedule(); +} +extern int send_sig_info(int, struct siginfo *, struct task_struct *); +extern int force_sigsegv(int, struct task_struct *); +extern int force_sig_info(int, struct siginfo *, struct task_struct *); +extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); +extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); +extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, + const struct cred *, u32); +extern int kill_pgrp(struct pid *pid, int sig, int priv); +extern int kill_pid(struct pid *pid, int sig, int priv); +extern int kill_proc_info(int, struct siginfo *, pid_t); +extern __must_check bool do_notify_parent(struct task_struct *, int); +extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); +extern void force_sig(int, struct task_struct *); +extern int send_sig(int, struct task_struct *, int); +extern int zap_other_threads(struct task_struct *p); +extern struct sigqueue *sigqueue_alloc(void); +extern void sigqueue_free(struct sigqueue *); +extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); +extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); + +static inline int restart_syscall(void) +{ + set_tsk_thread_flag(current, TIF_SIGPENDING); + return -ERESTARTNOINTR; +} + +static inline int signal_pending(struct task_struct *p) +{ + return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); +} + +static inline int __fatal_signal_pending(struct task_struct *p) +{ + return unlikely(sigismember(&p->pending.signal, SIGKILL)); +} + +static inline int fatal_signal_pending(struct task_struct *p) +{ + return signal_pending(p) && __fatal_signal_pending(p); +} + +static inline int signal_pending_state(long state, struct task_struct *p) +{ + if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) + return 0; + if (!signal_pending(p)) + return 0; + + return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); +} + +/* + * Reevaluate whether the task has signals pending delivery. + * Wake the task if so. + * This is required every time the blocked sigset_t changes. + * callers must hold sighand->siglock. + */ +extern void recalc_sigpending_and_wake(struct task_struct *t); +extern void recalc_sigpending(void); + +extern void signal_wake_up_state(struct task_struct *t, unsigned int state); + +static inline void signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); +} +static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? __TASK_TRACED : 0); +} + +#ifdef TIF_RESTORE_SIGMASK +/* + * Legacy restore_sigmask accessors. These are inefficient on + * SMP architectures because they require atomic operations. + */ + +/** + * set_restore_sigmask() - make sure saved_sigmask processing gets done + * + * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code + * will run before returning to user mode, to process the flag. For + * all callers, TIF_SIGPENDING is already set or it's no harm to set + * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the + * arch code will notice on return to user mode, in case those bits + * are scarce. We set TIF_SIGPENDING here to ensure that the arch + * signal code always gets run when TIF_RESTORE_SIGMASK is set. + */ +static inline void set_restore_sigmask(void) +{ + set_thread_flag(TIF_RESTORE_SIGMASK); + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); +} +static inline void clear_restore_sigmask(void) +{ + clear_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_restore_sigmask(void) +{ + return test_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_and_clear_restore_sigmask(void) +{ + return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); +} + +#else /* TIF_RESTORE_SIGMASK */ + +/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ +static inline void set_restore_sigmask(void) +{ + current->restore_sigmask = true; + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); +} +static inline void clear_restore_sigmask(void) +{ + current->restore_sigmask = false; +} +static inline bool test_restore_sigmask(void) +{ + return current->restore_sigmask; +} +static inline bool test_and_clear_restore_sigmask(void) +{ + if (!current->restore_sigmask) + return false; + current->restore_sigmask = false; + return true; +} +#endif + +static inline void restore_saved_sigmask(void) +{ + if (test_and_clear_restore_sigmask()) + __set_current_blocked(¤t->saved_sigmask); +} + +static inline sigset_t *sigmask_to_save(void) +{ + sigset_t *res = ¤t->blocked; + if (unlikely(test_restore_sigmask())) + res = ¤t->saved_sigmask; + return res; +} + +static inline int kill_cad_pid(int sig, int priv) +{ + return kill_pid(cad_pid, sig, priv); +} + +/* These can be the second arg to send_sig_info/send_group_sig_info. */ +#define SEND_SIG_NOINFO ((struct siginfo *) 0) +#define SEND_SIG_PRIV ((struct siginfo *) 1) +#define SEND_SIG_FORCED ((struct siginfo *) 2) + +/* + * True if we are on the alternate signal stack. + */ +static inline int on_sig_stack(unsigned long sp) +{ + /* + * If the signal stack is SS_AUTODISARM then, by construction, we + * can't be on the signal stack unless user code deliberately set + * SS_AUTODISARM when we were already on it. + * + * This improves reliability: if user state gets corrupted such that + * the stack pointer points very close to the end of the signal stack, + * then this check will enable the signal to be handled anyway. + */ + if (current->sas_ss_flags & SS_AUTODISARM) + return 0; + +#ifdef CONFIG_STACK_GROWSUP + return sp >= current->sas_ss_sp && + sp - current->sas_ss_sp < current->sas_ss_size; +#else + return sp > current->sas_ss_sp && + sp - current->sas_ss_sp <= current->sas_ss_size; +#endif +} + +static inline int sas_ss_flags(unsigned long sp) +{ + if (!current->sas_ss_size) + return SS_DISABLE; + + return on_sig_stack(sp) ? SS_ONSTACK : 0; +} + +static inline void sas_ss_reset(struct task_struct *p) +{ + p->sas_ss_sp = 0; + p->sas_ss_size = 0; + p->sas_ss_flags = SS_DISABLE; +} + +static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) +{ + if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) +#ifdef CONFIG_STACK_GROWSUP + return current->sas_ss_sp; +#else + return current->sas_ss_sp + current->sas_ss_size; +#endif + return sp; +} + +extern void __cleanup_sighand(struct sighand_struct *); +extern void flush_itimer_signals(void); + +#define tasklist_empty() \ + list_empty(&init_task.tasks) + +#define next_task(p) \ + list_entry_rcu((p)->tasks.next, struct task_struct, tasks) + +#define for_each_process(p) \ + for (p = &init_task ; (p = next_task(p)) != &init_task ; ) + +extern bool current_is_single_threaded(void); + +/* + * Careful: do_each_thread/while_each_thread is a double loop so + * 'break' will not work as expected - use goto instead. + */ +#define do_each_thread(g, t) \ + for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do + +#define while_each_thread(g, t) \ + while ((t = next_thread(t)) != g) + +#define __for_each_thread(signal, t) \ + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + +#define for_each_thread(p, t) \ + __for_each_thread((p)->signal, t) + +/* Careful: this is a double loop, 'break' won't work as expected. */ +#define for_each_process_thread(p, t) \ + for_each_process(p) for_each_thread(p, t) + +typedef int (*proc_visitor)(struct task_struct *p, void *data); +void walk_process_tree(struct task_struct *top, proc_visitor, void *); + +static inline int get_nr_threads(struct task_struct *tsk) +{ + return tsk->signal->nr_threads; +} + +static inline bool thread_group_leader(struct task_struct *p) +{ + return p->exit_signal >= 0; +} + +/* Do to the insanities of de_thread it is possible for a process + * to have the pid of the thread group leader without actually being + * the thread group leader. For iteration through the pids in proc + * all we care about is that we have a task with the appropriate + * pid, we don't actually care if we have the right task. + */ +static inline bool has_group_leader_pid(struct task_struct *p) +{ + return task_pid(p) == p->signal->leader_pid; +} + +static inline +bool same_thread_group(struct task_struct *p1, struct task_struct *p2) +{ + return p1->signal == p2->signal; +} + +static inline struct task_struct *next_thread(const struct task_struct *p) +{ + return list_entry_rcu(p->thread_group.next, + struct task_struct, thread_group); +} + +static inline int thread_group_empty(struct task_struct *p) +{ + return list_empty(&p->thread_group); +} + +#define delay_group_leader(p) \ + (thread_group_leader(p) && !thread_group_empty(p)) + +extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, + unsigned long *flags); + +static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + struct sighand_struct *ret; + + ret = __lock_task_sighand(tsk, flags); + (void)__cond_lock(&tsk->sighand->siglock, ret); + return ret; +} + +static inline void unlock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); +} + +static inline unsigned long task_rlimit(const struct task_struct *tsk, + unsigned int limit) +{ + return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); +} + +static inline unsigned long task_rlimit_max(const struct task_struct *tsk, + unsigned int limit) +{ + return READ_ONCE(tsk->signal->rlim[limit].rlim_max); +} + +static inline unsigned long rlimit(unsigned int limit) +{ + return task_rlimit(current, limit); +} + +static inline unsigned long rlimit_max(unsigned int limit) +{ + return task_rlimit_max(current, limit); +} + +#endif /* _LINUX_SCHED_SIGNAL_H */ diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h new file mode 100644 index 000000000000..141b74c53fad --- /dev/null +++ b/include/linux/sched/stat.h @@ -0,0 +1,40 @@ +#ifndef _LINUX_SCHED_STAT_H +#define _LINUX_SCHED_STAT_H + +#include <linux/percpu.h> + +/* + * Various counters maintained by the scheduler and fork(), + * exposed via /proc, sys.c or used by drivers via these APIs. + * + * ( Note that all these values are aquired without locking, + * so they can only be relied on in narrow circumstances. ) + */ + +extern unsigned long total_forks; +extern int nr_threads; +DECLARE_PER_CPU(unsigned long, process_counts); +extern int nr_processes(void); +extern unsigned long nr_running(void); +extern bool single_task_running(void); +extern unsigned long nr_iowait(void); +extern unsigned long nr_iowait_cpu(int cpu); +extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); + +static inline int sched_info_on(void) +{ +#ifdef CONFIG_SCHEDSTATS + return 1; +#elif defined(CONFIG_TASK_DELAY_ACCT) + extern int delayacct_on; + return delayacct_on; +#else + return 0; +#endif +} + +#ifdef CONFIG_SCHEDSTATS +void force_schedstat_enabled(void); +#endif + +#endif /* _LINUX_SCHED_STAT_H */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 49308e142aae..0f5ecd4d298e 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -1,5 +1,9 @@ -#ifndef _SCHED_SYSCTL_H -#define _SCHED_SYSCTL_H +#ifndef _LINUX_SCHED_SYSCTL_H +#define _LINUX_SCHED_SYSCTL_H + +#include <linux/types.h> + +struct ctl_table; #ifdef CONFIG_DETECT_HUNG_TASK extern int sysctl_hung_task_check_count; @@ -78,4 +82,4 @@ extern int sysctl_schedstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -#endif /* _SCHED_SYSCTL_H */ +#endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h new file mode 100644 index 000000000000..a978d7189cfd --- /dev/null +++ b/include/linux/sched/task.h @@ -0,0 +1,139 @@ +#ifndef _LINUX_SCHED_TASK_H +#define _LINUX_SCHED_TASK_H + +/* + * Interface between the scheduler and various task lifetime (fork()/exit()) + * functionality: + */ + +#include <linux/sched.h> + +struct task_struct; +union thread_union; + +/* + * This serializes "schedule()" and also protects + * the run-queue from deletions/modifications (but + * _adding_ to the beginning of the run-queue has + * a separate lock). + */ +extern rwlock_t tasklist_lock; +extern spinlock_t mmlist_lock; + +extern union thread_union init_thread_union; +extern struct task_struct init_task; + +#ifdef CONFIG_PROVE_RCU +extern int lockdep_tasklist_lock_is_held(void); +#endif /* #ifdef CONFIG_PROVE_RCU */ + +extern asmlinkage void schedule_tail(struct task_struct *prev); +extern void init_idle(struct task_struct *idle, int cpu); +extern void init_idle_bootup_task(struct task_struct *idle); + +extern int sched_fork(unsigned long clone_flags, struct task_struct *p); +extern void sched_dead(struct task_struct *p); + +void __noreturn do_task_dead(void); + +extern void proc_caches_init(void); + +extern void release_task(struct task_struct * p); + +#ifdef CONFIG_HAVE_COPY_THREAD_TLS +extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, + struct task_struct *, unsigned long); +#else +extern int copy_thread(unsigned long, unsigned long, unsigned long, + struct task_struct *); + +/* Architectures that haven't opted into copy_thread_tls get the tls argument + * via pt_regs, so ignore the tls argument passed via C. */ +static inline int copy_thread_tls( + unsigned long clone_flags, unsigned long sp, unsigned long arg, + struct task_struct *p, unsigned long tls) +{ + return copy_thread(clone_flags, sp, arg, p); +} +#endif +extern void flush_thread(void); + +#ifdef CONFIG_HAVE_EXIT_THREAD +extern void exit_thread(struct task_struct *tsk); +#else +static inline void exit_thread(struct task_struct *tsk) +{ +} +#endif +extern void do_group_exit(int); + +extern void exit_files(struct task_struct *); +extern void exit_itimers(struct signal_struct *); + +extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); +extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); +struct task_struct *fork_idle(int); +extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +extern void free_task(struct task_struct *tsk); + +/* sched_exec is called by processes performing an exec */ +#ifdef CONFIG_SMP +extern void sched_exec(void); +#else +#define sched_exec() {} +#endif + +#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) + +extern void __put_task_struct(struct task_struct *t); + +static inline void put_task_struct(struct task_struct *t) +{ + if (atomic_dec_and_test(&t->usage)) + __put_task_struct(t); +} + +struct task_struct *task_rcu_dereference(struct task_struct **ptask); +struct task_struct *try_get_task_struct(struct task_struct **ptask); + + +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT +extern int arch_task_struct_size __read_mostly; +#else +# define arch_task_struct_size (sizeof(struct task_struct)) +#endif + +#ifdef CONFIG_VMAP_STACK +static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) +{ + return t->stack_vm_area; +} +#else +static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) +{ + return NULL; +} +#endif + +/* + * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring + * subscriptions and synchronises with wait4(). Also used in procfs. Also + * pins the final release of task.io_context. Also protects ->cpuset and + * ->cgroup.subsys[]. And ->vfork_done. + * + * Nests both inside and outside of read_lock(&tasklist_lock). + * It must not be nested with write_lock_irq(&tasklist_lock), + * neither inside nor outside. + */ +static inline void task_lock(struct task_struct *p) +{ + spin_lock(&p->alloc_lock); +} + +static inline void task_unlock(struct task_struct *p) +{ + spin_unlock(&p->alloc_lock); +} + +#endif /* _LINUX_SCHED_TASK_H */ diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h new file mode 100644 index 000000000000..df6ea6665b31 --- /dev/null +++ b/include/linux/sched/task_stack.h @@ -0,0 +1,121 @@ +#ifndef _LINUX_SCHED_TASK_STACK_H +#define _LINUX_SCHED_TASK_STACK_H + +/* + * task->stack (kernel stack) handling interfaces: + */ + +#include <linux/sched.h> +#include <linux/magic.h> + +#ifdef CONFIG_THREAD_INFO_IN_TASK + +/* + * When accessing the stack of a non-current task that might exit, use + * try_get_task_stack() instead. task_stack_page will return a pointer + * that could get freed out from under you. + */ +static inline void *task_stack_page(const struct task_struct *task) +{ + return task->stack; +} + +#define setup_thread_stack(new,old) do { } while(0) + +static inline unsigned long *end_of_stack(const struct task_struct *task) +{ + return task->stack; +} + +#elif !defined(__HAVE_THREAD_FUNCTIONS) + +#define task_stack_page(task) ((void *)(task)->stack) + +static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) +{ + *task_thread_info(p) = *task_thread_info(org); + task_thread_info(p)->task = p; +} + +/* + * Return the address of the last usable long on the stack. + * + * When the stack grows down, this is just above the thread + * info struct. Going any lower will corrupt the threadinfo. + * + * When the stack grows up, this is the highest address. + * Beyond that position, we corrupt data on the next page. + */ +static inline unsigned long *end_of_stack(struct task_struct *p) +{ +#ifdef CONFIG_STACK_GROWSUP + return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; +#else + return (unsigned long *)(task_thread_info(p) + 1); +#endif +} + +#endif + +#ifdef CONFIG_THREAD_INFO_IN_TASK +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return atomic_inc_not_zero(&tsk->stack_refcount) ? + task_stack_page(tsk) : NULL; +} + +extern void put_task_stack(struct task_struct *tsk); +#else +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return task_stack_page(tsk); +} + +static inline void put_task_stack(struct task_struct *tsk) {} +#endif + +#define task_stack_end_corrupted(task) \ + (*(end_of_stack(task)) != STACK_END_MAGIC) + +static inline int object_is_on_stack(void *obj) +{ + void *stack = task_stack_page(current); + + return (obj >= stack) && (obj < (stack + THREAD_SIZE)); +} + +extern void thread_stack_cache_init(void); + +#ifdef CONFIG_DEBUG_STACK_USAGE +static inline unsigned long stack_not_used(struct task_struct *p) +{ + unsigned long *n = end_of_stack(p); + + do { /* Skip over canary */ +# ifdef CONFIG_STACK_GROWSUP + n--; +# else + n++; +# endif + } while (!*n); + +# ifdef CONFIG_STACK_GROWSUP + return (unsigned long)end_of_stack(p) - (unsigned long)n; +# else + return (unsigned long)n - (unsigned long)end_of_stack(p); +# endif +} +#endif +extern void set_task_stack_end_magic(struct task_struct *tsk); + +#ifndef __HAVE_ARCH_KSTACK_END +static inline int kstack_end(void *addr) +{ + /* Reliable end of stack detection: + * Some APM bios versions misalign the stack + */ + return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); +} +#endif + +#endif /* _LINUX_SCHED_TASK_STACK_H */ diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h new file mode 100644 index 000000000000..7d065abc7a47 --- /dev/null +++ b/include/linux/sched/topology.h @@ -0,0 +1,226 @@ +#ifndef _LINUX_SCHED_TOPOLOGY_H +#define _LINUX_SCHED_TOPOLOGY_H + +#include <linux/topology.h> + +#include <linux/sched/idle.h> + +/* + * sched-domains (multiprocessor balancing) declarations: + */ +#ifdef CONFIG_SMP + +#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ +#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ +#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ +#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ +#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ +#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ +#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */ +#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */ +#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ +#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ +#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ +#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ +#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ +#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ +#define SD_NUMA 0x4000 /* cross-node balancing */ + +/* + * Increase resolution of cpu_capacity calculations + */ +#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT +#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) + +#ifdef CONFIG_SCHED_SMT +static inline int cpu_smt_flags(void) +{ + return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; +} +#endif + +#ifdef CONFIG_SCHED_MC +static inline int cpu_core_flags(void) +{ + return SD_SHARE_PKG_RESOURCES; +} +#endif + +#ifdef CONFIG_NUMA +static inline int cpu_numa_flags(void) +{ + return SD_NUMA; +} +#endif + +extern int arch_asym_cpu_priority(int cpu); + +struct sched_domain_attr { + int relax_domain_level; +}; + +#define SD_ATTR_INIT (struct sched_domain_attr) { \ + .relax_domain_level = -1, \ +} + +extern int sched_domain_level_max; + +struct sched_group; + +struct sched_domain_shared { + atomic_t ref; + atomic_t nr_busy_cpus; + int has_idle_cores; +}; + +struct sched_domain { + /* These fields must be setup */ + struct sched_domain *parent; /* top domain must be null terminated */ + struct sched_domain *child; /* bottom domain must be null terminated */ + struct sched_group *groups; /* the balancing groups of the domain */ + unsigned long min_interval; /* Minimum balance interval ms */ + unsigned long max_interval; /* Maximum balance interval ms */ + unsigned int busy_factor; /* less balancing by factor if busy */ + unsigned int imbalance_pct; /* No balance until over watermark */ + unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ + unsigned int busy_idx; + unsigned int idle_idx; + unsigned int newidle_idx; + unsigned int wake_idx; + unsigned int forkexec_idx; + unsigned int smt_gain; + + int nohz_idle; /* NOHZ IDLE status */ + int flags; /* See SD_* */ + int level; + + /* Runtime fields. */ + unsigned long last_balance; /* init to jiffies. units in jiffies */ + unsigned int balance_interval; /* initialise to 1. units in ms. */ + unsigned int nr_balance_failed; /* initialise to 0 */ + + /* idle_balance() stats */ + u64 max_newidle_lb_cost; + unsigned long next_decay_max_lb_cost; + + u64 avg_scan_cost; /* select_idle_sibling */ + +#ifdef CONFIG_SCHEDSTATS + /* load_balance() stats */ + unsigned int lb_count[CPU_MAX_IDLE_TYPES]; + unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; + unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; + unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; + unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; + unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; + unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; + + /* Active load balancing */ + unsigned int alb_count; + unsigned int alb_failed; + unsigned int alb_pushed; + + /* SD_BALANCE_EXEC stats */ + unsigned int sbe_count; + unsigned int sbe_balanced; + unsigned int sbe_pushed; + + /* SD_BALANCE_FORK stats */ + unsigned int sbf_count; + unsigned int sbf_balanced; + unsigned int sbf_pushed; + + /* try_to_wake_up() stats */ + unsigned int ttwu_wake_remote; + unsigned int ttwu_move_affine; + unsigned int ttwu_move_balance; +#endif +#ifdef CONFIG_SCHED_DEBUG + char *name; +#endif + union { + void *private; /* used during construction */ + struct rcu_head rcu; /* used during destruction */ + }; + struct sched_domain_shared *shared; + + unsigned int span_weight; + /* + * Span of all CPUs in this domain. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + */ + unsigned long span[0]; +}; + +static inline struct cpumask *sched_domain_span(struct sched_domain *sd) +{ + return to_cpumask(sd->span); +} + +extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], + struct sched_domain_attr *dattr_new); + +/* Allocate an array of sched domains, for partition_sched_domains(). */ +cpumask_var_t *alloc_sched_domains(unsigned int ndoms); +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); + +bool cpus_share_cache(int this_cpu, int that_cpu); + +typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); +typedef int (*sched_domain_flags_f)(void); + +#define SDTL_OVERLAP 0x01 + +struct sd_data { + struct sched_domain **__percpu sd; + struct sched_domain_shared **__percpu sds; + struct sched_group **__percpu sg; + struct sched_group_capacity **__percpu sgc; +}; + +struct sched_domain_topology_level { + sched_domain_mask_f mask; + sched_domain_flags_f sd_flags; + int flags; + int numa_level; + struct sd_data data; +#ifdef CONFIG_SCHED_DEBUG + char *name; +#endif +}; + +extern void set_sched_topology(struct sched_domain_topology_level *tl); + +#ifdef CONFIG_SCHED_DEBUG +# define SD_INIT_NAME(type) .name = #type +#else +# define SD_INIT_NAME(type) +#endif + +#else /* CONFIG_SMP */ + +struct sched_domain_attr; + +static inline void +partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], + struct sched_domain_attr *dattr_new) +{ +} + +static inline bool cpus_share_cache(int this_cpu, int that_cpu) +{ + return true; +} + +#endif /* !CONFIG_SMP */ + +static inline int task_node(const struct task_struct *p) +{ + return cpu_to_node(task_cpu(p)); +} + +#endif /* _LINUX_SCHED_TOPOLOGY_H */ diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h new file mode 100644 index 000000000000..5d5415e129d4 --- /dev/null +++ b/include/linux/sched/user.h @@ -0,0 +1,61 @@ +#ifndef _LINUX_SCHED_USER_H +#define _LINUX_SCHED_USER_H + +#include <linux/uidgid.h> +#include <linux/atomic.h> + +struct key; + +/* + * Some day this will be a full-fledged user tracking system.. + */ +struct user_struct { + atomic_t __count; /* reference count */ + atomic_t processes; /* How many processes does this user have? */ + atomic_t sigpending; /* How many pending signals does this user have? */ +#ifdef CONFIG_FANOTIFY + atomic_t fanotify_listeners; +#endif +#ifdef CONFIG_EPOLL + atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ +#endif +#ifdef CONFIG_POSIX_MQUEUE + /* protected by mq_lock */ + unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ +#endif + unsigned long locked_shm; /* How many pages of mlocked shm ? */ + unsigned long unix_inflight; /* How many files in flight in unix sockets */ + atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ + +#ifdef CONFIG_KEYS + struct key *uid_keyring; /* UID specific keyring */ + struct key *session_keyring; /* UID's default session keyring */ +#endif + + /* Hash table maintenance information */ + struct hlist_node uidhash_node; + kuid_t uid; + +#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) + atomic_long_t locked_vm; +#endif +}; + +extern int uids_sysfs_init(void); + +extern struct user_struct *find_user(kuid_t); + +extern struct user_struct root_user; +#define INIT_USER (&root_user) + + +/* per-UID process charging. */ +extern struct user_struct * alloc_uid(kuid_t); +static inline struct user_struct *get_uid(struct user_struct *u) +{ + atomic_inc(&u->__count); + return u; +} +extern void free_uid(struct user_struct *); + +#endif /* _LINUX_SCHED_USER_H */ diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h new file mode 100644 index 000000000000..d03d8a9047dc --- /dev/null +++ b/include/linux/sched/wake_q.h @@ -0,0 +1,53 @@ +#ifndef _LINUX_SCHED_WAKE_Q_H +#define _LINUX_SCHED_WAKE_Q_H + +/* + * Wake-queues are lists of tasks with a pending wakeup, whose + * callers have already marked the task as woken internally, + * and can thus carry on. A common use case is being able to + * do the wakeups once the corresponding user lock as been + * released. + * + * We hold reference to each task in the list across the wakeup, + * thus guaranteeing that the memory is still valid by the time + * the actual wakeups are performed in wake_up_q(). + * + * One per task suffices, because there's never a need for a task to be + * in two wake queues simultaneously; it is forbidden to abandon a task + * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is + * already in a wake queue, the wakeup will happen soon and the second + * waker can just skip it. + * + * The DEFINE_WAKE_Q macro declares and initializes the list head. + * wake_up_q() does NOT reinitialize the list; it's expected to be + * called near the end of a function. Otherwise, the list can be + * re-initialized for later re-use by wake_q_init(). + * + * Note that this can cause spurious wakeups. schedule() callers + * must ensure the call is done inside a loop, confirming that the + * wakeup condition has in fact occurred. + */ + +#include <linux/sched.h> + +struct wake_q_head { + struct wake_q_node *first; + struct wake_q_node **lastp; +}; + +#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) + +#define DEFINE_WAKE_Q(name) \ + struct wake_q_head name = { WAKE_Q_TAIL, &name.first } + +static inline void wake_q_init(struct wake_q_head *head) +{ + head->first = WAKE_Q_TAIL; + head->lastp = &head->first; +} + +extern void wake_q_add(struct wake_q_head *head, + struct task_struct *task); +extern void wake_up_q(struct wake_q_head *head); + +#endif /* _LINUX_SCHED_WAKE_Q_H */ diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h new file mode 100644 index 000000000000..a28156a0d34a --- /dev/null +++ b/include/linux/sched/xacct.h @@ -0,0 +1,48 @@ +#ifndef _LINUX_SCHED_XACCT_H +#define _LINUX_SCHED_XACCT_H + +/* + * Extended task accounting methods: + */ + +#include <linux/sched.h> + +#ifdef CONFIG_TASK_XACCT +static inline void add_rchar(struct task_struct *tsk, ssize_t amt) +{ + tsk->ioac.rchar += amt; +} + +static inline void add_wchar(struct task_struct *tsk, ssize_t amt) +{ + tsk->ioac.wchar += amt; +} + +static inline void inc_syscr(struct task_struct *tsk) +{ + tsk->ioac.syscr++; +} + +static inline void inc_syscw(struct task_struct *tsk) +{ + tsk->ioac.syscw++; +} +#else +static inline void add_rchar(struct task_struct *tsk, ssize_t amt) +{ +} + +static inline void add_wchar(struct task_struct *tsk, ssize_t amt) +{ +} + +static inline void inc_syscr(struct task_struct *tsk) +{ +} + +static inline void inc_syscw(struct task_struct *tsk) +{ +} +#endif + +#endif /* _LINUX_SCHED_XACCT_H */ diff --git a/include/linux/signal.h b/include/linux/signal.h index 5308304993be..94ad6eea9550 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -1,32 +1,13 @@ #ifndef _LINUX_SIGNAL_H #define _LINUX_SIGNAL_H -#include <linux/list.h> #include <linux/bug.h> -#include <uapi/linux/signal.h> +#include <linux/signal_types.h> struct task_struct; /* for sysctl */ extern int print_fatal_signals; -/* - * Real Time signals may be queued. - */ - -struct sigqueue { - struct list_head list; - int flags; - siginfo_t info; - struct user_struct *user; -}; - -/* flags values. */ -#define SIGQUEUE_PREALLOC 1 - -struct sigpending { - struct list_head list; - sigset_t signal; -}; #ifndef HAVE_ARCH_COPY_SIGINFO @@ -272,42 +253,6 @@ extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; -struct sigaction { -#ifndef __ARCH_HAS_IRIX_SIGACTION - __sighandler_t sa_handler; - unsigned long sa_flags; -#else - unsigned int sa_flags; - __sighandler_t sa_handler; -#endif -#ifdef __ARCH_HAS_SA_RESTORER - __sigrestore_t sa_restorer; -#endif - sigset_t sa_mask; /* mask last for extensibility */ -}; - -struct k_sigaction { - struct sigaction sa; -#ifdef __ARCH_HAS_KA_RESTORER - __sigrestore_t ka_restorer; -#endif -}; - -#ifdef CONFIG_OLD_SIGACTION -struct old_sigaction { - __sighandler_t sa_handler; - old_sigset_t sa_mask; - unsigned long sa_flags; - __sigrestore_t sa_restorer; -}; -#endif - -struct ksignal { - struct k_sigaction ka; - siginfo_t info; - int sig; -}; - extern int get_signal(struct ksignal *ksig); extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h new file mode 100644 index 000000000000..16d862a3d8f3 --- /dev/null +++ b/include/linux/signal_types.h @@ -0,0 +1,66 @@ +#ifndef _LINUX_SIGNAL_TYPES_H +#define _LINUX_SIGNAL_TYPES_H + +/* + * Basic signal handling related data type definitions: + */ + +#include <linux/list.h> +#include <uapi/linux/signal.h> + +/* + * Real Time signals may be queued. + */ + +struct sigqueue { + struct list_head list; + int flags; + siginfo_t info; + struct user_struct *user; +}; + +/* flags values. */ +#define SIGQUEUE_PREALLOC 1 + +struct sigpending { + struct list_head list; + sigset_t signal; +}; + +struct sigaction { +#ifndef __ARCH_HAS_IRIX_SIGACTION + __sighandler_t sa_handler; + unsigned long sa_flags; +#else + unsigned int sa_flags; + __sighandler_t sa_handler; +#endif +#ifdef __ARCH_HAS_SA_RESTORER + __sigrestore_t sa_restorer; +#endif + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +#ifdef __ARCH_HAS_KA_RESTORER + __sigrestore_t ka_restorer; +#endif +}; + +#ifdef CONFIG_OLD_SIGACTION +struct old_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + unsigned long sa_flags; + __sigrestore_t sa_restorer; +}; +#endif + +struct ksignal { + struct k_sigaction ka; + siginfo_t info; + int sig; +}; + +#endif /* _LINUX_SIGNAL_TYPES_H */ diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h index eadbe227c256..4985048640a7 100644 --- a/include/linux/signalfd.h +++ b/include/linux/signalfd.h @@ -8,7 +8,7 @@ #define _LINUX_SIGNALFD_H #include <uapi/linux/signalfd.h> - +#include <linux/sched/signal.h> #ifdef CONFIG_SIGNALFD diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 69ccd2636911..c776abd86937 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,6 +34,7 @@ #include <linux/dma-mapping.h> #include <linux/netdev_features.h> #include <linux/sched.h> +#include <linux/sched/clock.h> #include <net/flow_dissector.h> #include <linux/splice.h> #include <linux/in6.h> diff --git a/include/linux/stat.h b/include/linux/stat.h index 075cb0c7eb2a..c76e524fb34b 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -18,20 +18,32 @@ #include <linux/time.h> #include <linux/uidgid.h> +#define KSTAT_QUERY_FLAGS (AT_STATX_SYNC_TYPE) + struct kstat { - u64 ino; - dev_t dev; + u32 result_mask; /* What fields the user got */ umode_t mode; unsigned int nlink; + uint32_t blksize; /* Preferred I/O size */ + u64 attributes; +#define KSTAT_ATTR_FS_IOC_FLAGS \ + (STATX_ATTR_COMPRESSED | \ + STATX_ATTR_IMMUTABLE | \ + STATX_ATTR_APPEND | \ + STATX_ATTR_NODUMP | \ + STATX_ATTR_ENCRYPTED \ + )/* Attrs corresponding to FS_*_FL flags */ + u64 ino; + dev_t dev; + dev_t rdev; kuid_t uid; kgid_t gid; - dev_t rdev; loff_t size; - struct timespec atime; + struct timespec atime; struct timespec mtime; struct timespec ctime; - unsigned long blksize; - unsigned long long blocks; + struct timespec btime; /* File creation time */ + u64 blocks; }; #endif diff --git a/include/linux/sunrpc/types.h b/include/linux/sunrpc/types.h index d222f47550af..11a7536c0fd2 100644 --- a/include/linux/sunrpc/types.h +++ b/include/linux/sunrpc/types.h @@ -10,6 +10,7 @@ #define _LINUX_SUNRPC_TYPES_H_ #include <linux/timer.h> +#include <linux/sched/signal.h> #include <linux/workqueue.h> #include <linux/sunrpc/debug.h> #include <linux/list.h> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 91a740f6b884..980c3c9b06f8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -48,6 +48,7 @@ struct stat; struct stat64; struct statfs; struct statfs64; +struct statx; struct __sysctl_args; struct sysinfo; struct timespec; @@ -902,5 +903,7 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len, unsigned long prot, int pkey); asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); asmlinkage long sys_pkey_free(int pkey); +asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, + unsigned mask, struct statx __user *buffer); #endif diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index 58de6edf751f..e2a5daf8d14f 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -8,7 +8,7 @@ #define _LINUX_TASKSTATS_KERN_H #include <linux/taskstats.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/slab.h> #ifdef CONFIG_TASKSTATS diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index d2e804e15c3e..b598cbc7b576 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -8,6 +8,10 @@ void timekeeping_init(void); extern int timekeeping_suspended; +/* Architecture timer tick functions: */ +extern void update_process_times(int user); +extern void xtime_update(unsigned long ticks); + /* * Get and set timeofday */ diff --git a/include/linux/timer.h b/include/linux/timer.h index c7bdf895179c..e6789b8757d5 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -212,7 +212,7 @@ struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -#include <linux/sysctl.h> +struct ctl_table; extern unsigned int sysctl_timer_migration; int timer_migration_handler(struct ctl_table *table, int write, diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 363e0e8082a9..32354b4b4b2b 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -5,6 +5,9 @@ #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/sched.h> +#include <linux/workqueue.h> +#include <linux/rwsem.h> +#include <linux/sysctl.h> #include <linux/err.h> #define UID_GID_MAP_MAX_EXTENTS 5 @@ -69,7 +72,7 @@ struct ucounts { struct hlist_node node; struct user_namespace *ns; kuid_t uid; - atomic_t count; + int count; atomic_t ucount[UCOUNT_COUNTS]; }; diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 0468548acebf..48a3483dccb1 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -61,8 +61,7 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); -extern void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -72,8 +71,6 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); -extern void userfaultfd_exit(struct mm_struct *mm); - #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -120,11 +117,11 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, { } -static inline void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +static inline bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + return true; } static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, @@ -139,10 +136,6 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm, { } -static inline void userfaultfd_exit(struct mm_struct *mm) -{ -} - #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 26c155bb639b..8355bab175e1 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -7,6 +7,8 @@ #include <linux/virtio_byteorder.h> #include <uapi/linux/virtio_config.h> +struct irq_affinity; + /** * virtio_config_ops - operations for configuring a virtio device * @get: read the value of a configuration field @@ -56,6 +58,7 @@ * This returns a pointer to the bus name a la pci_name from which * the caller can then copy. * @set_vq_affinity: set the affinity for a virtqueue. + * @get_vq_affinity: get the affinity for a virtqueue (optional). */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { @@ -68,14 +71,15 @@ struct virtio_config_ops { void (*set_status)(struct virtio_device *vdev, u8 status); void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[]); + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); int (*finalize_features)(struct virtio_device *vdev); const char *(*bus_name)(struct virtio_device *vdev); int (*set_vq_affinity)(struct virtqueue *vq, int cpu); + const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev, + int index); }; /* If driver didn't advertise the feature, it will never appear. */ @@ -169,7 +173,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, vq_callback_t *callbacks[] = { c }; const char *names[] = { n }; struct virtqueue *vq; - int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names); + int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL); if (err < 0) return ERR_PTR(err); return vq; diff --git a/include/linux/virtio_mmio.h b/include/linux/virtio_mmio.h deleted file mode 100644 index c4b09689ab64..000000000000 --- a/include/linux/virtio_mmio.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Virtio platform device driver - * - * Copyright 2011, ARM Ltd. - * - * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007 - * - * This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _LINUX_VIRTIO_MMIO_H -#define _LINUX_VIRTIO_MMIO_H - -/* - * Control registers - */ - -/* Magic value ("virt" string) - Read Only */ -#define VIRTIO_MMIO_MAGIC_VALUE 0x000 - -/* Virtio device version - Read Only */ -#define VIRTIO_MMIO_VERSION 0x004 - -/* Virtio device ID - Read Only */ -#define VIRTIO_MMIO_DEVICE_ID 0x008 - -/* Virtio vendor ID - Read Only */ -#define VIRTIO_MMIO_VENDOR_ID 0x00c - -/* Bitmask of the features supported by the device (host) - * (32 bits per set) - Read Only */ -#define VIRTIO_MMIO_DEVICE_FEATURES 0x010 - -/* Device (host) features set selector - Write Only */ -#define VIRTIO_MMIO_DEVICE_FEATURES_SEL 0x014 - -/* Bitmask of features activated by the driver (guest) - * (32 bits per set) - Write Only */ -#define VIRTIO_MMIO_DRIVER_FEATURES 0x020 - -/* Activated features set selector - Write Only */ -#define VIRTIO_MMIO_DRIVER_FEATURES_SEL 0x024 - - -#ifndef VIRTIO_MMIO_NO_LEGACY /* LEGACY DEVICES ONLY! */ - -/* Guest's memory page size in bytes - Write Only */ -#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 - -#endif - - -/* Queue selector - Write Only */ -#define VIRTIO_MMIO_QUEUE_SEL 0x030 - -/* Maximum size of the currently selected queue - Read Only */ -#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 - -/* Queue size for the currently selected queue - Write Only */ -#define VIRTIO_MMIO_QUEUE_NUM 0x038 - - -#ifndef VIRTIO_MMIO_NO_LEGACY /* LEGACY DEVICES ONLY! */ - -/* Used Ring alignment for the currently selected queue - Write Only */ -#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c - -/* Guest's PFN for the currently selected queue - Read Write */ -#define VIRTIO_MMIO_QUEUE_PFN 0x040 - -#endif - - -/* Ready bit for the currently selected queue - Read Write */ -#define VIRTIO_MMIO_QUEUE_READY 0x044 - -/* Queue notifier - Write Only */ -#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 - -/* Interrupt status - Read Only */ -#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 - -/* Interrupt acknowledge - Write Only */ -#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 - -/* Device status register - Read Write */ -#define VIRTIO_MMIO_STATUS 0x070 - -/* Selected queue's Descriptor Table address, 64 bits in two halves */ -#define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 -#define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084 - -/* Selected queue's Available Ring address, 64 bits in two halves */ -#define VIRTIO_MMIO_QUEUE_AVAIL_LOW 0x090 -#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH 0x094 - -/* Selected queue's Used Ring address, 64 bits in two halves */ -#define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0 -#define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4 - -/* Configuration atomicity value */ -#define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc - -/* The config space is defined by each driver as - * the per-driver configuration space - Read Write */ -#define VIRTIO_MMIO_CONFIG 0x100 - - - -/* - * Interrupt flags (re: interrupt status & acknowledge registers) - */ - -#define VIRTIO_MMIO_INT_VRING (1 << 0) -#define VIRTIO_MMIO_INT_CONFIG (1 << 1) - -#endif diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 6aa1b6cb5828..a80b7b59cf33 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -79,6 +79,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_SPLIT_PAGE_FAILED, THP_DEFERRED_SPLIT_PAGE, THP_SPLIT_PMD, +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + THP_SPLIT_PUD, +#endif THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, #endif diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h index c3fa0fd43949..1081db987391 100644 --- a/include/linux/vmacache.h +++ b/include/linux/vmacache.h @@ -12,7 +12,7 @@ static inline void vmacache_flush(struct task_struct *tsk) { - memset(tsk->vmacache, 0, sizeof(tsk->vmacache)); + memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas)); } extern void vmacache_flush_all(struct mm_struct *mm); diff --git a/include/linux/wait.h b/include/linux/wait.h index 1421132e9086..db076ca7f11d 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -6,6 +6,7 @@ #include <linux/list.h> #include <linux/stddef.h> #include <linux/spinlock.h> + #include <asm/current.h> #include <uapi/linux/wait.h> @@ -619,30 +620,19 @@ do { \ __ret; \ }) +extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *); +extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *); -#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \ +#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \ ({ \ - int __ret = 0; \ + int __ret; \ DEFINE_WAIT(__wait); \ if (exclusive) \ __wait.flags |= WQ_FLAG_EXCLUSIVE; \ do { \ - if (likely(list_empty(&__wait.task_list))) \ - __add_wait_queue_tail(&(wq), &__wait); \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (signal_pending(current)) { \ - __ret = -ERESTARTSYS; \ + __ret = fn(&(wq), &__wait); \ + if (__ret) \ break; \ - } \ - if (irq) \ - spin_unlock_irq(&(wq).lock); \ - else \ - spin_unlock(&(wq).lock); \ - schedule(); \ - if (irq) \ - spin_lock_irq(&(wq).lock); \ - else \ - spin_lock(&(wq).lock); \ } while (!(condition)); \ __remove_wait_queue(&(wq), &__wait); \ __set_current_state(TASK_RUNNING); \ @@ -675,7 +665,7 @@ do { \ */ #define wait_event_interruptible_locked(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr)) /** * wait_event_interruptible_locked_irq - sleep until a condition gets true @@ -702,7 +692,7 @@ do { \ */ #define wait_event_interruptible_locked_irq(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq)) /** * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true @@ -733,7 +723,7 @@ do { \ */ #define wait_event_interruptible_exclusive_locked(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr)) /** * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true @@ -764,7 +754,7 @@ do { \ */ #define wait_event_interruptible_exclusive_locked_irq(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq)) #define __wait_event_killable(wq, condition) \ |