summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDipankar Sarma <dipankar@in.ibm.com>2005-09-09 22:04:13 +0200
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-09 22:57:55 +0200
commitab2af1f5005069321c5d130f09cce577b03f43ef (patch)
tree73a70ba486f522cd9eeeef376ede2b5a1c1b473b /kernel
parent[PATCH] files-sparc64-fix 2 (diff)
downloadlinux-ab2af1f5005069321c5d130f09cce577b03f43ef.tar.xz
linux-ab2af1f5005069321c5d130f09cce577b03f43ef.zip
[PATCH] files: files struct with RCU
Patch to eliminate struct files_struct.file_lock spinlock on the reader side and use rcu refcounting rcuref_xxx api for the f_count refcounter. The updates to the fdtable are done by allocating a new fdtable structure and setting files->fdt to point to the new structure. The fdtable structure is protected by RCU thereby allowing lock-free lookup. For fd arrays/sets that are vmalloced, we use keventd to free them since RCU callbacks can't sleep. A global list of fdtable to be freed is not scalable, so we use a per-cpu list. If keventd is already handling the current cpu's work, we use a timer to defer queueing of that work. Since the last publication, this patch has been re-written to avoid using explicit memory barriers and use rcu_assign_pointer(), rcu_dereference() premitives instead. This required that the fd information is kept in a separate structure (fdtable) and updated atomically. Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c15
-rw-r--r--kernel/fork.c23
2 files changed, 25 insertions, 13 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 83beb1e93b18..6d2089a1bce7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -411,15 +411,16 @@ void fastcall put_files_struct(struct files_struct *files)
close_files(files);
/*
* Free the fd and fdset arrays if we expanded them.
+ * If the fdtable was embedded, pass files for freeing
+ * at the end of the RCU grace period. Otherwise,
+ * you can free files immediately.
*/
fdt = files_fdtable(files);
- if (fdt->fd != &files->fd_array[0])
- free_fd_array(fdt->fd, fdt->max_fds);
- if (fdt->max_fdset > __FD_SETSIZE) {
- free_fdset(fdt->open_fds, fdt->max_fdset);
- free_fdset(fdt->close_on_exec, fdt->max_fdset);
- }
- kmem_cache_free(files_cachep, files);
+ if (fdt == &files->fdtab)
+ fdt->free_files = files;
+ else
+ kmem_cache_free(files_cachep, files);
+ free_fdtable(fdt);
}
}
diff --git a/kernel/fork.c b/kernel/fork.c
index ecc694debb50..8149f3602881 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -35,6 +35,7 @@
#include <linux/syscalls.h>
#include <linux/jiffies.h>
#include <linux/futex.h>
+#include <linux/rcupdate.h>
#include <linux/ptrace.h>
#include <linux/mount.h>
#include <linux/audit.h>
@@ -565,13 +566,12 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
return 0;
}
-static int count_open_files(struct files_struct *files, int size)
+static int count_open_files(struct fdtable *fdt)
{
+ int size = fdt->max_fdset;
int i;
- struct fdtable *fdt;
/* Find the last open fd */
- fdt = files_fdtable(files);
for (i = size/(8*sizeof(long)); i > 0; ) {
if (fdt->open_fds->fds_bits[--i])
break;
@@ -592,13 +592,17 @@ static struct files_struct *alloc_files(void)
atomic_set(&newf->count, 1);
spin_lock_init(&newf->file_lock);
- fdt = files_fdtable(newf);
+ fdt = &newf->fdtab;
fdt->next_fd = 0;
fdt->max_fds = NR_OPEN_DEFAULT;
fdt->max_fdset = __FD_SETSIZE;
fdt->close_on_exec = &newf->close_on_exec_init;
fdt->open_fds = &newf->open_fds_init;
fdt->fd = &newf->fd_array[0];
+ INIT_RCU_HEAD(&fdt->rcu);
+ fdt->free_files = NULL;
+ fdt->next = NULL;
+ rcu_assign_pointer(newf->fdt, fdt);
out:
return newf;
}
@@ -637,7 +641,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
old_fdt = files_fdtable(oldf);
new_fdt = files_fdtable(newf);
size = old_fdt->max_fdset;
- open_files = count_open_files(oldf, old_fdt->max_fdset);
+ open_files = count_open_files(old_fdt);
expand = 0;
/*
@@ -661,7 +665,14 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
spin_unlock(&newf->file_lock);
if (error < 0)
goto out_release;
+ new_fdt = files_fdtable(newf);
+ /*
+ * Reacquire the oldf lock and a pointer to its fd table
+ * who knows it may have a new bigger fd table. We need
+ * the latest pointer.
+ */
spin_lock(&oldf->file_lock);
+ old_fdt = files_fdtable(oldf);
}
old_fds = old_fdt->fd;
@@ -683,7 +694,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
*/
FD_CLR(open_files - i, new_fdt->open_fds);
}
- *new_fds++ = f;
+ rcu_assign_pointer(*new_fds++, f);
}
spin_unlock(&oldf->file_lock);