diff options
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r-- | include/linux/sched.h | 189 |
1 files changed, 144 insertions, 45 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1c4f3e9b9bc5..81a173c0897d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -63,7 +63,6 @@ struct sched_param { #include <linux/nodemask.h> #include <linux/mm_types.h> -#include <asm/system.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/cputime.h> @@ -220,7 +219,7 @@ extern char ___assert_task_state[1 - 2*!!( ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FREEZING) == 0) + (task->flags & PF_FROZEN) == 0) #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -273,9 +272,11 @@ extern int runqueue_is_locked(int cpu); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern void select_nohz_load_balancer(int stop_tick); +extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); #else static inline void select_nohz_load_balancer(int stop_tick) { } +static inline void set_cpu_sd_state_idle(void) { } #endif /* @@ -359,6 +360,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout); extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void schedule(void); +extern void schedule_preempt_disabled(void); extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); struct nsproxy; @@ -483,8 +485,8 @@ struct task_cputime { #define INIT_CPUTIME \ (struct task_cputime) { \ - .utime = cputime_zero, \ - .stime = cputime_zero, \ + .utime = 0, \ + .stime = 0, \ .sum_exec_runtime = 0, \ } @@ -550,6 +552,18 @@ struct signal_struct { int group_stop_count; unsigned int flags; /* see SIGNAL_* flags below */ + /* + * PR_SET_CHILD_SUBREAPER marks a process, like a service + * manager, to re-parent orphan (double-forking) child processes + * to this process instead of 'init'. The service manager is + * able to receive SIGCHLD signals and is able to investigate + * the process until it calls wait(). All children of this + * process will inherit a flag if they should look for a + * child_subreaper process at exit. + */ + unsigned int is_child_subreaper:1; + unsigned int has_child_subreaper:1; + /* POSIX.1b Interval Timers */ struct list_head posix_timers; @@ -635,13 +649,15 @@ struct signal_struct { #endif #ifdef CONFIG_CGROUPS /* - * The threadgroup_fork_lock prevents threads from forking with - * CLONE_THREAD while held for writing. Use this for fork-sensitive - * threadgroup-wide operations. It's taken for reading in fork.c in - * copy_process(). - * Currently only needed write-side by cgroups. + * group_rwsem prevents new tasks from entering the threadgroup and + * member tasks from exiting,a more specifically, setting of + * PF_EXITING. fork and exit paths are protected with this rwsem + * using threadgroup_change_begin/end(). Users which require + * threadgroup to remain stable should use threadgroup_[un]lock() + * which also takes care of exec path. Currently, cgroup is the + * only user. */ - struct rw_semaphore threadgroup_fork_lock; + struct rw_semaphore group_rwsem; #endif int oom_adj; /* OOM kill score adjustment (bit shift) */ @@ -901,6 +917,11 @@ struct sched_group_power { * single CPU. */ unsigned int power, power_orig; + unsigned long next_update; + /* + * Number of busy cpus in this group. + */ + atomic_t nr_busy_cpus; }; struct sched_group { @@ -925,6 +946,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) return to_cpumask(sg->cpumask); } +/** + * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. + * @group: The group whose first cpu is to be returned. + */ +static inline unsigned int group_first_cpu(struct sched_group *group) +{ + return cpumask_first(sched_group_cpus(group)); +} + struct sched_domain_attr { int relax_domain_level; }; @@ -1035,6 +1065,8 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag) unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); +bool cpus_share_cache(int this_cpu, int that_cpu); + #else /* CONFIG_SMP */ struct sched_domain_attr; @@ -1044,6 +1076,12 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { } + +static inline bool cpus_share_cache(int this_cpu, int that_cpu) +{ + return true; +} + #endif /* !CONFIG_SMP */ @@ -1208,6 +1246,12 @@ struct sched_rt_entity { #endif }; +/* + * default timeslice is 100 msecs (used only for SCHED_RR tasks). + * Timeslices get refilled after they expire. + */ +#define RR_TIMESLICE (100 * HZ / 1000) + struct rcu_node; enum perf_event_task_context { @@ -1302,6 +1346,11 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; +#ifdef CONFIG_GENERIC_HARDIRQS + /* IRQ handler threads */ + unsigned irq_thread:1; +#endif + pid_t pid; pid_t tgid; @@ -1315,8 +1364,8 @@ struct task_struct { * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ - struct task_struct *real_parent; /* real parent process */ - struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */ + struct task_struct __rcu *real_parent; /* real parent process */ + struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ /* * children/sibling forms the list of my natural children */ @@ -1410,11 +1459,6 @@ struct task_struct { * mempolicy */ spinlock_t alloc_lock; -#ifdef CONFIG_GENERIC_HARDIRQS - /* IRQ handler threads */ - struct irqaction *irqaction; -#endif - /* Protection of the PI data structures: */ raw_spinlock_t pi_lock; @@ -1481,7 +1525,7 @@ struct task_struct { #endif #ifdef CONFIG_CPUSETS nodemask_t mems_allowed; /* Protected by alloc_lock */ - int mems_allowed_change_disable; + seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ int cpuset_mem_spread_rotor; int cpuset_slab_spread_rotor; #endif @@ -1527,6 +1571,7 @@ struct task_struct { */ int nr_dirtied; int nr_dirtied_pause; + unsigned long dirty_paused_when; /* start of a write-and-pause period */ #ifdef CONFIG_LATENCYTOP int latency_record_count; @@ -1759,7 +1804,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * /* * Per process flags */ -#define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ @@ -1772,7 +1816,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ -#define PF_FREEZING 0x00004000 /* freeze in progress. do not account to load */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ @@ -1788,7 +1831,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ -#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* * Only the _current_ task can read/write to tsk->flags, but other @@ -1848,8 +1890,7 @@ extern void task_clear_jobctl_pending(struct task_struct *task, #ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ -#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */ -#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */ +#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ static inline void rcu_copy_process(struct task_struct *p) { @@ -2033,7 +2074,7 @@ extern void sched_autogroup_fork(struct signal_struct *sig); extern void sched_autogroup_exit(struct signal_struct *sig); #ifdef CONFIG_PROC_FS extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); -extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice); +extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); #endif #else static inline void sched_autogroup_create_attach(struct task_struct *p) { } @@ -2050,12 +2091,20 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice; extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); extern void rt_mutex_adjust_pi(struct task_struct *p); +static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +{ + return tsk->pi_blocked_on != NULL; +} #else static inline int rt_mutex_getprio(struct task_struct *p) { return p->normal_prio; } # define rt_mutex_adjust_pi(p) do { } while (0) +static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +{ + return false; +} #endif extern bool yield_to(struct task_struct *p, bool preempt); @@ -2070,6 +2119,14 @@ extern int sched_setscheduler(struct task_struct *, int, extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); extern struct task_struct *idle_task(int cpu); +/** + * is_idle_task - is the specified task an idle task? + * @p: the task in question. + */ +static inline bool is_idle_task(const struct task_struct *p) +{ + return p->pid == 0; +} extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); @@ -2235,6 +2292,12 @@ static inline void mmdrop(struct mm_struct * mm) extern void mmput(struct mm_struct *); /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); +/* + * Grab a reference to a task's mm, if it is not already going away + * and ptrace_may_access with the mode parameter passed to it + * succeeds. + */ +extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); /* Allocate a new mm structure and copy contents from tsk->mm */ @@ -2251,7 +2314,7 @@ extern void __cleanup_sighand(struct sighand_struct *); extern void exit_itimers(struct signal_struct *); extern void flush_itimer_signals(void); -extern NORET_TYPE void do_group_exit(int); +extern void do_group_exit(int); extern void daemonize(const char *, ...); extern int allow_signal(int); @@ -2341,7 +2404,7 @@ static inline int thread_group_empty(struct task_struct *p) * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. + * ->cgroup.subsys[]. And ->vfork_done. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), @@ -2360,12 +2423,15 @@ static inline void task_unlock(struct task_struct *p) extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, unsigned long *flags); -#define lock_task_sighand(tsk, flags) \ -({ struct sighand_struct *__ss; \ - __cond_lock(&(tsk)->sighand->siglock, \ - (__ss = __lock_task_sighand(tsk, flags))); \ - __ss; \ -}) \ +static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + struct sighand_struct *ret; + + ret = __lock_task_sighand(tsk, flags); + (void)__cond_lock(&tsk->sighand->siglock, ret); + return ret; +} static inline void unlock_task_sighand(struct task_struct *tsk, unsigned long *flags) @@ -2373,29 +2439,62 @@ static inline void unlock_task_sighand(struct task_struct *tsk, spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); } -/* See the declaration of threadgroup_fork_lock in signal_struct. */ #ifdef CONFIG_CGROUPS -static inline void threadgroup_fork_read_lock(struct task_struct *tsk) +static inline void threadgroup_change_begin(struct task_struct *tsk) { - down_read(&tsk->signal->threadgroup_fork_lock); + down_read(&tsk->signal->group_rwsem); } -static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) +static inline void threadgroup_change_end(struct task_struct *tsk) { - up_read(&tsk->signal->threadgroup_fork_lock); + up_read(&tsk->signal->group_rwsem); } -static inline void threadgroup_fork_write_lock(struct task_struct *tsk) + +/** + * threadgroup_lock - lock threadgroup + * @tsk: member task of the threadgroup to lock + * + * Lock the threadgroup @tsk belongs to. No new task is allowed to enter + * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or + * perform exec. This is useful for cases where the threadgroup needs to + * stay stable across blockable operations. + * + * fork and exit paths explicitly call threadgroup_change_{begin|end}() for + * synchronization. While held, no new task will be added to threadgroup + * and no existing live task will have its PF_EXITING set. + * + * During exec, a task goes and puts its thread group through unusual + * changes. After de-threading, exclusive access is assumed to resources + * which are usually shared by tasks in the same group - e.g. sighand may + * be replaced with a new one. Also, the exec'ing task takes over group + * leader role including its pid. Exclude these changes while locked by + * grabbing cred_guard_mutex which is used to synchronize exec path. + */ +static inline void threadgroup_lock(struct task_struct *tsk) { - down_write(&tsk->signal->threadgroup_fork_lock); + /* + * exec uses exit for de-threading nesting group_rwsem inside + * cred_guard_mutex. Grab cred_guard_mutex first. + */ + mutex_lock(&tsk->signal->cred_guard_mutex); + down_write(&tsk->signal->group_rwsem); } -static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) + +/** + * threadgroup_unlock - unlock threadgroup + * @tsk: member task of the threadgroup to unlock + * + * Reverse threadgroup_lock(). + */ +static inline void threadgroup_unlock(struct task_struct *tsk) { - up_write(&tsk->signal->threadgroup_fork_lock); + up_write(&tsk->signal->group_rwsem); + mutex_unlock(&tsk->signal->cred_guard_mutex); } #else -static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {} -static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {} -static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {} -static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {} +static inline void threadgroup_change_begin(struct task_struct *tsk) {} +static inline void threadgroup_change_end(struct task_struct *tsk) {} +static inline void threadgroup_lock(struct task_struct *tsk) {} +static inline void threadgroup_unlock(struct task_struct *tsk) {} #endif #ifndef __HAVE_THREAD_FUNCTIONS |