diff options
author | Vasiliy Kulikov <segoon@openwall.com> | 2011-07-27 01:08:48 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-27 01:49:44 +0200 |
commit | b34a6b1da371ed8af1221459a18c67970f7e3d53 (patch) | |
tree | 5addc850de13623b172395b9d0d7d670930fa6b3 /ipc | |
parent | ipc/mqueue.c: fix mq_open() return value (diff) | |
download | linux-b34a6b1da371ed8af1221459a18c67970f7e3d53.tar.xz linux-b34a6b1da371ed8af1221459a18c67970f7e3d53.zip |
ipc: introduce shm_rmid_forced sysctl
Add support for the shm_rmid_forced sysctl. If set to 1, all shared
memory objects in current ipc namespace will be automatically forced to
use IPC_RMID.
The POSIX way of handling shmem allows one to create shm objects and
call shmdt(), leaving shm object associated with no process, thus
consuming memory not counted via rlimits.
With shm_rmid_forced=1 the shared memory object is counted at least for
one process, so OOM killer may effectively kill the fat process holding
the shared memory.
It obviously breaks POSIX - some programs relying on the feature would
stop working. So set shm_rmid_forced=1 only if you're sure nobody uses
"orphaned" memory. Use shm_rmid_forced=0 by default for compatability
reasons.
The feature was previously impemented in -ow as a configure option.
[akpm@linux-foundation.org: fix documentation, per Randy]
[akpm@linux-foundation.org: fix warning]
[akpm@linux-foundation.org: readability/conventionality tweaks]
[akpm@linux-foundation.org: fix shm_rmid_forced/shm_forced_rmid confusion, use standard comment layout]
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Serge E. Hallyn" <serge.hallyn@canonical.com>
Cc: Daniel Lezcano <daniel.lezcano@free.fr>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Solar Designer <solar@openwall.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/ipc_sysctl.c | 36 | ||||
-rw-r--r-- | ipc/shm.c | 97 |
2 files changed, 129 insertions, 4 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 56410faa4550..00fba2bab87d 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -31,12 +31,37 @@ static int proc_ipc_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; + memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); } +static int proc_ipc_dointvec_minmax(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table ipc_table; + + memcpy(&ipc_table, table, sizeof(ipc_table)); + ipc_table.data = get_ipc(table); + + return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); +} + +static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ipc_namespace *ns = current->nsproxy->ipc_ns; + int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (err < 0) + return err; + if (ns->shm_rmid_forced) + shm_destroy_orphaned(ns); + return err; +} + static int proc_ipc_callback_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -125,6 +150,8 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #else #define proc_ipc_doulongvec_minmax NULL #define proc_ipc_dointvec NULL +#define proc_ipc_dointvec_minmax NULL +#define proc_ipc_dointvec_minmax_orphans NULL #define proc_ipc_callback_dointvec NULL #define proc_ipcauto_dointvec_minmax NULL #endif @@ -155,6 +182,15 @@ static struct ctl_table ipc_kern_table[] = { .proc_handler = proc_ipc_dointvec, }, { + .procname = "shm_rmid_forced", + .data = &init_ipc_ns.shm_rmid_forced, + .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax_orphans, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof (init_ipc_ns.msg_ctlmax), diff --git a/ipc/shm.c b/ipc/shm.c index 27884adb1a90..3f5b14365f33 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns) ns->shm_ctlmax = SHMMAX; ns->shm_ctlall = SHMALL; ns->shm_ctlmni = SHMMNI; + ns->shm_rmid_forced = 0; ns->shm_tot = 0; ipc_init_ids(&shm_ids(ns)); } @@ -187,6 +188,23 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) } /* + * shm_may_destroy - identifies whether shm segment should be destroyed now + * + * Returns true if and only if there are no active users of the segment and + * one of the following is true: + * + * 1) shmctl(id, IPC_RMID, NULL) was called for this shp + * + * 2) sysctl kernel.shm_rmid_forced is set to 1. + */ +static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) +{ + return (shp->shm_nattch == 0) && + (ns->shm_rmid_forced || + (shp->shm_perm.mode & SHM_DEST)); +} + +/* * remove the attach descriptor vma. * free memory for segment if it is marked destroyed. * The descriptor has already been removed from the current->mm->mmap list @@ -206,11 +224,83 @@ static void shm_close(struct vm_area_struct *vma) shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_perm.mode & SHM_DEST) + if (shm_may_destroy(ns, shp)) + shm_destroy(ns, shp); + else + shm_unlock(shp); + up_write(&shm_ids(ns).rw_mutex); +} + +static int shm_try_destroy_current(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct shmid_kernel *shp = shm_lock(ns, id); + + if (IS_ERR(shp)) + return 0; + + if (shp->shm_cprid != task_tgid_vnr(current)) { + shm_unlock(shp); + return 0; + } + + if (shm_may_destroy(ns, shp)) + shm_destroy(ns, shp); + else + shm_unlock(shp); + return 0; +} + +static int shm_try_destroy_orphaned(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct shmid_kernel *shp = shm_lock(ns, id); + struct task_struct *task; + + if (IS_ERR(shp)) + return 0; + + /* + * We want to destroy segments without users and with already + * exit'ed originating process. + * + * XXX: the originating process may exist in another pid namespace. + */ + task = find_task_by_vpid(shp->shm_cprid); + if (task != NULL) { + shm_unlock(shp); + return 0; + } + + if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); else shm_unlock(shp); + return 0; +} + +void shm_destroy_orphaned(struct ipc_namespace *ns) +{ + down_write(&shm_ids(ns).rw_mutex); + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); + up_write(&shm_ids(ns).rw_mutex); +} + + +void exit_shm(struct task_struct *task) +{ + struct nsproxy *nsp = task->nsproxy; + struct ipc_namespace *ns; + + if (!nsp) + return; + ns = nsp->ipc_ns; + if (!ns || !ns->shm_rmid_forced) + return; + + /* Destroy all already created segments, but not mapped yet */ + down_write(&shm_ids(ns).rw_mutex); + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); up_write(&shm_ids(ns).rw_mutex); } @@ -950,8 +1040,7 @@ out_nattch: shp = shm_lock(ns, shmid); BUG_ON(IS_ERR(shp)); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_perm.mode & SHM_DEST) + if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); else shm_unlock(shp); |