diff options
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/ipc_sysctl.c | 36 | ||||
-rw-r--r-- | ipc/mqueue.c | 129 | ||||
-rw-r--r-- | ipc/sem.c | 79 | ||||
-rw-r--r-- | ipc/shm.c | 121 | ||||
-rw-r--r-- | ipc/util.c | 16 |
5 files changed, 285 insertions, 96 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 56410faa4550..00fba2bab87d 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -31,12 +31,37 @@ static int proc_ipc_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; + memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); } +static int proc_ipc_dointvec_minmax(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table ipc_table; + + memcpy(&ipc_table, table, sizeof(ipc_table)); + ipc_table.data = get_ipc(table); + + return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); +} + +static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ipc_namespace *ns = current->nsproxy->ipc_ns; + int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (err < 0) + return err; + if (ns->shm_rmid_forced) + shm_destroy_orphaned(ns); + return err; +} + static int proc_ipc_callback_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -125,6 +150,8 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #else #define proc_ipc_doulongvec_minmax NULL #define proc_ipc_dointvec NULL +#define proc_ipc_dointvec_minmax NULL +#define proc_ipc_dointvec_minmax_orphans NULL #define proc_ipc_callback_dointvec NULL #define proc_ipcauto_dointvec_minmax NULL #endif @@ -155,6 +182,15 @@ static struct ctl_table ipc_kern_table[] = { .proc_handler = proc_ipc_dointvec, }, { + .procname = "shm_rmid_forced", + .data = &init_ipc_ns.shm_rmid_forced, + .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax_orphans, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof (init_ipc_ns.msg_ctlmax), diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 14fb6d67e6a3..2e0ecfcc881d 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -113,72 +113,75 @@ static struct inode *mqueue_get_inode(struct super_block *sb, { struct user_struct *u = current_user(); struct inode *inode; + int ret = -ENOMEM; inode = new_inode(sb); - if (inode) { - inode->i_ino = get_next_ino(); - inode->i_mode = mode; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); - inode->i_mtime = inode->i_ctime = inode->i_atime = - CURRENT_TIME; + if (!inode) + goto err; - if (S_ISREG(mode)) { - struct mqueue_inode_info *info; - struct task_struct *p = current; - unsigned long mq_bytes, mq_msg_tblsz; - - inode->i_fop = &mqueue_file_operations; - inode->i_size = FILENT_SIZE; - /* mqueue specific info */ - info = MQUEUE_I(inode); - spin_lock_init(&info->lock); - init_waitqueue_head(&info->wait_q); - INIT_LIST_HEAD(&info->e_wait_q[0].list); - INIT_LIST_HEAD(&info->e_wait_q[1].list); - info->notify_owner = NULL; - info->qsize = 0; - info->user = NULL; /* set when all is ok */ - memset(&info->attr, 0, sizeof(info->attr)); - info->attr.mq_maxmsg = ipc_ns->mq_msg_max; - info->attr.mq_msgsize = ipc_ns->mq_msgsize_max; - if (attr) { - info->attr.mq_maxmsg = attr->mq_maxmsg; - info->attr.mq_msgsize = attr->mq_msgsize; - } - mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *); - info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); - if (!info->messages) - goto out_inode; - - mq_bytes = (mq_msg_tblsz + - (info->attr.mq_maxmsg * info->attr.mq_msgsize)); - - spin_lock(&mq_lock); - if (u->mq_bytes + mq_bytes < u->mq_bytes || - u->mq_bytes + mq_bytes > - task_rlimit(p, RLIMIT_MSGQUEUE)) { - spin_unlock(&mq_lock); - /* mqueue_evict_inode() releases info->messages */ - goto out_inode; - } - u->mq_bytes += mq_bytes; - spin_unlock(&mq_lock); + inode->i_ino = get_next_ino(); + inode->i_mode = mode; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_mtime = inode->i_ctime = inode->i_atime = CURRENT_TIME; + + if (S_ISREG(mode)) { + struct mqueue_inode_info *info; + struct task_struct *p = current; + unsigned long mq_bytes, mq_msg_tblsz; + + inode->i_fop = &mqueue_file_operations; + inode->i_size = FILENT_SIZE; + /* mqueue specific info */ + info = MQUEUE_I(inode); + spin_lock_init(&info->lock); + init_waitqueue_head(&info->wait_q); + INIT_LIST_HEAD(&info->e_wait_q[0].list); + INIT_LIST_HEAD(&info->e_wait_q[1].list); + info->notify_owner = NULL; + info->qsize = 0; + info->user = NULL; /* set when all is ok */ + memset(&info->attr, 0, sizeof(info->attr)); + info->attr.mq_maxmsg = ipc_ns->mq_msg_max; + info->attr.mq_msgsize = ipc_ns->mq_msgsize_max; + if (attr) { + info->attr.mq_maxmsg = attr->mq_maxmsg; + info->attr.mq_msgsize = attr->mq_msgsize; + } + mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *); + info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); + if (!info->messages) + goto out_inode; - /* all is ok */ - info->user = get_uid(u); - } else if (S_ISDIR(mode)) { - inc_nlink(inode); - /* Some things misbehave if size == 0 on a directory */ - inode->i_size = 2 * DIRENT_SIZE; - inode->i_op = &mqueue_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + mq_bytes = (mq_msg_tblsz + + (info->attr.mq_maxmsg * info->attr.mq_msgsize)); + + spin_lock(&mq_lock); + if (u->mq_bytes + mq_bytes < u->mq_bytes || + u->mq_bytes + mq_bytes > task_rlimit(p, RLIMIT_MSGQUEUE)) { + spin_unlock(&mq_lock); + /* mqueue_evict_inode() releases info->messages */ + ret = -EMFILE; + goto out_inode; } + u->mq_bytes += mq_bytes; + spin_unlock(&mq_lock); + + /* all is ok */ + info->user = get_uid(u); + } else if (S_ISDIR(mode)) { + inc_nlink(inode); + /* Some things misbehave if size == 0 on a directory */ + inode->i_size = 2 * DIRENT_SIZE; + inode->i_op = &mqueue_dir_inode_operations; + inode->i_fop = &simple_dir_operations; } + return inode; out_inode: iput(inode); - return NULL; +err: + return ERR_PTR(ret); } static int mqueue_fill_super(struct super_block *sb, void *data, int silent) @@ -194,8 +197,8 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL); - if (!inode) { - error = -ENOMEM; + if (IS_ERR(inode)) { + error = PTR_ERR(inode); goto out; } @@ -315,8 +318,8 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry, spin_unlock(&mq_lock); inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr); - if (!inode) { - error = -ENOMEM; + if (IS_ERR(inode)) { + error = PTR_ERR(inode); spin_lock(&mq_lock); ipc_ns->mq_queues_count--; goto out_unlock; @@ -446,8 +449,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr, set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&info->lock); - time = schedule_hrtimeout_range_clock(timeout, - HRTIMER_MODE_ABS, 0, CLOCK_REALTIME); + time = schedule_hrtimeout_range_clock(timeout, 0, + HRTIMER_MODE_ABS, CLOCK_REALTIME); while (ewp->state == STATE_PENDING) cpu_relax(); diff --git a/ipc/sem.c b/ipc/sem.c index 34193ed69fbe..5215a81420df 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -90,6 +90,52 @@ #include <asm/uaccess.h> #include "util.h" +/* One semaphore structure for each semaphore in the system. */ +struct sem { + int semval; /* current value */ + int sempid; /* pid of last operation */ + struct list_head sem_pending; /* pending single-sop operations */ +}; + +/* One queue for each sleeping process in the system. */ +struct sem_queue { + struct list_head simple_list; /* queue of pending operations */ + struct list_head list; /* queue of pending operations */ + struct task_struct *sleeper; /* this process */ + struct sem_undo *undo; /* undo structure */ + int pid; /* process id of requesting process */ + int status; /* completion status of operation */ + struct sembuf *sops; /* array of pending operations */ + int nsops; /* number of operations */ + int alter; /* does *sops alter the array? */ +}; + +/* Each task has a list of undo requests. They are executed automatically + * when the process exits. + */ +struct sem_undo { + struct list_head list_proc; /* per-process list: * + * all undos from one process + * rcu protected */ + struct rcu_head rcu; /* rcu struct for sem_undo */ + struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ + struct list_head list_id; /* per semaphore array list: + * all undos for one array */ + int semid; /* semaphore set identifier */ + short *semadj; /* array of adjustments */ + /* one per semaphore */ +}; + +/* sem_undo_list controls shared access to the list of sem_undo structures + * that may be shared among all a CLONE_SYSVSEM task group. + */ +struct sem_undo_list { + atomic_t refcnt; + spinlock_t lock; + struct list_head list_proc; +}; + + #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) @@ -689,12 +735,6 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) return semzcnt; } -static void free_un(struct rcu_head *head) -{ - struct sem_undo *un = container_of(head, struct sem_undo, rcu); - kfree(un); -} - /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex * remains locked on exit. @@ -714,7 +754,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) un->semid = -1; list_del_rcu(&un->list_proc); spin_unlock(&un->ulp->lock); - call_rcu(&un->rcu, free_un); + kfree_rcu(un, rcu); } /* Wake up all pending processes and let them fail with EIDRM. */ @@ -1432,6 +1472,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, queue.status = -EINTR; queue.sleeper = current; + +sleep_again: current->state = TASK_INTERRUPTIBLE; sem_unlock(sma); @@ -1456,15 +1498,23 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, } sma = sem_lock(ns, semid); + + /* + * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. + */ + error = get_queue_result(&queue); + + /* + * Array removed? If yes, leave without sem_unlock(). + */ if (IS_ERR(sma)) { - error = -EIDRM; goto out_free; } - error = get_queue_result(&queue); /* - * If queue.status != -EINTR we are woken up by another process + * If queue.status != -EINTR we are woken up by another process. + * Leave without unlink_queue(), but with sem_unlock(). */ if (error != -EINTR) { @@ -1476,6 +1526,13 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, */ if (timeout && jiffies_left == 0) error = -EAGAIN; + + /* + * If the wakeup was spurious, just retry + */ + if (error == -EINTR && !signal_pending(current)) + goto sleep_again; + unlink_queue(sma, &queue); out_unlock_free: @@ -1612,7 +1669,7 @@ void exit_sem(struct task_struct *tsk) sem_unlock(sma); wake_up_sem_queue_do(&tasks); - call_rcu(&un->rcu, free_un); + kfree_rcu(un, rcu); } kfree(ulp); } diff --git a/ipc/shm.c b/ipc/shm.c index ab3385a21b27..02ecf2c078fc 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns) ns->shm_ctlmax = SHMMAX; ns->shm_ctlall = SHMALL; ns->shm_ctlmni = SHMMNI; + ns->shm_rmid_forced = 0; ns->shm_tot = 0; ipc_init_ids(&shm_ids(ns)); } @@ -104,9 +105,16 @@ void shm_exit_ns(struct ipc_namespace *ns) } #endif -void __init shm_init (void) +static int __init ipc_ns_init(void) { shm_init_ns(&init_ipc_ns); + return 0; +} + +pure_initcall(ipc_ns_init); + +void __init shm_init (void) +{ ipc_init_proc_interface("sysvipc/shm", #if BITS_PER_LONG <= 32 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", @@ -130,6 +138,12 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) return container_of(ipcp, struct shmid_kernel, shm_perm); } +static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) +{ + rcu_read_lock(); + spin_lock(&ipcp->shm_perm.lock); +} + static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, int id) { @@ -187,6 +201,23 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) } /* + * shm_may_destroy - identifies whether shm segment should be destroyed now + * + * Returns true if and only if there are no active users of the segment and + * one of the following is true: + * + * 1) shmctl(id, IPC_RMID, NULL) was called for this shp + * + * 2) sysctl kernel.shm_rmid_forced is set to 1. + */ +static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) +{ + return (shp->shm_nattch == 0) && + (ns->shm_rmid_forced || + (shp->shm_perm.mode & SHM_DEST)); +} + +/* * remove the attach descriptor vma. * free memory for segment if it is marked destroyed. * The descriptor has already been removed from the current->mm->mmap list @@ -206,14 +237,90 @@ static void shm_close(struct vm_area_struct *vma) shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_perm.mode & SHM_DEST) + if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); else shm_unlock(shp); up_write(&shm_ids(ns).rw_mutex); } +/* Called with ns->shm_ids(ns).rw_mutex locked */ +static int shm_try_destroy_current(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct kern_ipc_perm *ipcp = p; + struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); + + if (shp->shm_creator != current) + return 0; + + /* + * Mark it as orphaned to destroy the segment when + * kernel.shm_rmid_forced is changed. + * It is noop if the following shm_may_destroy() returns true. + */ + shp->shm_creator = NULL; + + /* + * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID + * is not set, it shouldn't be deleted here. + */ + if (!ns->shm_rmid_forced) + return 0; + + if (shm_may_destroy(ns, shp)) { + shm_lock_by_ptr(shp); + shm_destroy(ns, shp); + } + return 0; +} + +/* Called with ns->shm_ids(ns).rw_mutex locked */ +static int shm_try_destroy_orphaned(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct kern_ipc_perm *ipcp = p; + struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); + + /* + * We want to destroy segments without users and with already + * exit'ed originating process. + * + * As shp->* are changed under rw_mutex, it's safe to skip shp locking. + */ + if (shp->shm_creator != NULL) + return 0; + + if (shm_may_destroy(ns, shp)) { + shm_lock_by_ptr(shp); + shm_destroy(ns, shp); + } + return 0; +} + +void shm_destroy_orphaned(struct ipc_namespace *ns) +{ + down_write(&shm_ids(ns).rw_mutex); + if (shm_ids(ns).in_use) + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); + up_write(&shm_ids(ns).rw_mutex); +} + + +void exit_shm(struct task_struct *task) +{ + struct ipc_namespace *ns = task->nsproxy->ipc_ns; + + if (shm_ids(ns).in_use == 0) + return; + + /* Destroy all already created segments, but not mapped yet */ + down_write(&shm_ids(ns).rw_mutex); + if (shm_ids(ns).in_use) + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); + up_write(&shm_ids(ns).rw_mutex); +} + static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct file *file = vma->vm_file; @@ -277,13 +384,13 @@ static int shm_release(struct inode *ino, struct file *file) return 0; } -static int shm_fsync(struct file *file, int datasync) +static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct shm_file_data *sfd = shm_file_data(file); if (!sfd->file->f_op->fsync) return -EINVAL; - return sfd->file->f_op->fsync(sfd->file, datasync); + return sfd->file->f_op->fsync(sfd->file, start, end, datasync); } static unsigned long shm_get_unmapped_area(struct file *file, @@ -404,6 +511,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_segsz = size; shp->shm_nattch = 0; shp->shm_file = file; + shp->shm_creator = current; /* * shmid gets reported as "inode#" in /proc/pid/maps. * proc-ps tools use this. Changing this will break them. @@ -950,8 +1058,7 @@ out_nattch: shp = shm_lock(ns, shmid); BUG_ON(IS_ERR(shp)); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_perm.mode & SHM_DEST) + if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); else shm_unlock(shp); diff --git a/ipc/util.c b/ipc/util.c index 5c0d28921ba8..75261a31d48d 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -579,19 +579,6 @@ static void ipc_schedule_free(struct rcu_head *head) schedule_work(&sched->work); } -/** - * ipc_immediate_free - free ipc + rcu space - * @head: RCU callback structure that contains pointer to be freed - * - * Free from the RCU callback context. - */ -static void ipc_immediate_free(struct rcu_head *head) -{ - struct ipc_rcu_grace *free = - container_of(head, struct ipc_rcu_grace, rcu); - kfree(free); -} - void ipc_rcu_putref(void *ptr) { if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) @@ -601,8 +588,7 @@ void ipc_rcu_putref(void *ptr) call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, ipc_schedule_free); } else { - call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, - ipc_immediate_free); + kfree_rcu(container_of(ptr, struct ipc_rcu_grace, data), rcu); } } |