From 938bb9f5e840eddbf54e4f62f6c5ba9b3ae12c9d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 14 Jan 2009 14:14:30 +0100
Subject: [CVE-2009-0029] System call wrappers part 28

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 fs/notify/inotify/inotify_user.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/notify/inotify/inotify_user.c')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 81b8644b0136..efef1ffca77b 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -576,7 +576,7 @@ static const struct inotify_operations inotify_user_ops = {
 	.destroy_watch	= free_inotify_user_watch,
 };
 
-asmlinkage long sys_inotify_init1(int flags)
+SYSCALL_DEFINE1(inotify_init1, int, flags)
 {
 	struct inotify_device *dev;
 	struct inotify_handle *ih;
@@ -655,7 +655,7 @@ out_put_fd:
 	return ret;
 }
 
-asmlinkage long sys_inotify_init(void)
+SYSCALL_DEFINE0(inotify_init)
 {
 	return sys_inotify_init1(0);
 }
-- 
cgit v1.2.3


From 2e4d0924eb0c403ce4014fa139d1d61bf2c44fee Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 14 Jan 2009 14:14:31 +0100
Subject: [CVE-2009-0029] System call wrappers part 29

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 fs/namei.c                       | 21 ++++++++++-----------
 fs/notify/inotify/inotify_user.c |  5 +++--
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs/notify/inotify/inotify_user.c')

diff --git a/fs/namei.c b/fs/namei.c
index 90520f05f997..bbc15c237558 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1962,8 +1962,8 @@ static int may_mknod(mode_t mode)
 	}
 }
 
-asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
-				unsigned dev)
+SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode,
+		unsigned, dev)
 {
 	int error;
 	char *tmp;
@@ -2044,7 +2044,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	return error;
 }
 
-asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
+SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode)
 {
 	int error = 0;
 	char * tmp;
@@ -2291,7 +2291,7 @@ slashes:
 	goto exit2;
 }
 
-asmlinkage long sys_unlinkat(int dfd, const char __user *pathname, int flag)
+SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
 {
 	if ((flag & ~AT_REMOVEDIR) != 0)
 		return -EINVAL;
@@ -2328,8 +2328,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
 	return error;
 }
 
-asmlinkage long sys_symlinkat(const char __user *oldname,
-			      int newdfd, const char __user *newname)
+SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
+		int, newdfd, const char __user *, newname)
 {
 	int error;
 	char *from;
@@ -2422,9 +2422,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
  * with linux 2.0, and to avoid hard-linking to directories
  * and other special files.  --ADM
  */
-asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
-			   int newdfd, const char __user *newname,
-			   int flags)
+SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
+		int, newdfd, const char __user *, newname, int, flags)
 {
 	struct dentry *new_dentry;
 	struct nameidata nd;
@@ -2624,8 +2623,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return error;
 }
 
-asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
-			     int newdfd, const char __user *newname)
+SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
+		int, newdfd, const char __user *, newname)
 {
 	struct dentry *old_dir, *new_dir;
 	struct dentry *old_dentry, *new_dentry;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index efef1ffca77b..d53a1838d6e8 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -660,7 +660,8 @@ SYSCALL_DEFINE0(inotify_init)
 	return sys_inotify_init1(0);
 }
 
-asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
+SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
+		u32, mask)
 {
 	struct inode *inode;
 	struct inotify_device *dev;
@@ -704,7 +705,7 @@ fput_and_out:
 	return ret;
 }
 
-asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd)
+SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 {
 	struct file *filp;
 	struct inotify_device *dev;
-- 
cgit v1.2.3


From 3632dee2f8b8a9720329f29eeaa4ec4669a3aff8 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Thu, 22 Jan 2009 15:29:45 +0100
Subject: inotify: clean up inotify_read and fix locking problems

If userspace supplies an invalid pointer to a read() of an inotify
instance, the inotify device's event list mutex is unlocked twice.
This causes an unbalance which effectively leaves the data structure
unprotected, and we can trigger oopses by accessing the inotify
instance from different tasks concurrently.

The best fix (contributed largely by Linus) is a total rewrite
of the function in question:

On Thu, Jan 22, 2009 at 7:05 AM, Linus Torvalds wrote:
> The thing to notice is that:
>
>  - locking is done in just one place, and there is no question about it
>   not having an unlock.
>
>  - that whole double-while(1)-loop thing is gone.
>
>  - use multiple functions to make nesting and error handling sane
>
>  - do error testing after doing the things you always need to do, ie do
>   this:
>
>        mutex_lock(..)
>        ret = function_call();
>        mutex_unlock(..)
>
>        .. test ret here ..
>
>   instead of doing conditional exits with unlocking or freeing.
>
> So if the code is written in this way, it may still be buggy, but at least
> it's not buggy because of subtle "forgot to unlock" or "forgot to free"
> issues.
>
> This _always_ unlocks if it locked, and it always frees if it got a
> non-error kevent.

Cc: John McCutchan <ttb@tentacle.dhs.org>
Cc: Robert Love <rlove@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/inotify/inotify_user.c | 135 +++++++++++++++++++++------------------
 1 file changed, 74 insertions(+), 61 deletions(-)

(limited to 'fs/notify/inotify/inotify_user.c')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index d53a1838d6e8..bed766e435b5 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -427,10 +427,61 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
 	return ret;
 }
 
+/*
+ * Get an inotify_kernel_event if one exists and is small
+ * enough to fit in "count". Return an error pointer if
+ * not large enough.
+ *
+ * Called with the device ev_mutex held.
+ */
+static struct inotify_kernel_event *get_one_event(struct inotify_device *dev,
+						  size_t count)
+{
+	size_t event_size = sizeof(struct inotify_event);
+	struct inotify_kernel_event *kevent;
+
+	if (list_empty(&dev->events))
+		return NULL;
+
+	kevent = inotify_dev_get_event(dev);
+	if (kevent->name)
+		event_size += kevent->event.len;
+
+	if (event_size > count)
+		return ERR_PTR(-EINVAL);
+
+	remove_kevent(dev, kevent);
+	return kevent;
+}
+
+/*
+ * Copy an event to user space, returning how much we copied.
+ *
+ * We already checked that the event size is smaller than the
+ * buffer we had in "get_one_event()" above.
+ */
+static ssize_t copy_event_to_user(struct inotify_kernel_event *kevent,
+				  char __user *buf)
+{
+	size_t event_size = sizeof(struct inotify_event);
+
+	if (copy_to_user(buf, &kevent->event, event_size))
+		return -EFAULT;
+
+	if (kevent->name) {
+		buf += event_size;
+
+		if (copy_to_user(buf, kevent->name, kevent->event.len))
+			return -EFAULT;
+
+		event_size += kevent->event.len;
+	}
+	return event_size;
+}
+
 static ssize_t inotify_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *pos)
 {
-	size_t event_size = sizeof (struct inotify_event);
 	struct inotify_device *dev;
 	char __user *start;
 	int ret;
@@ -440,81 +491,43 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 	dev = file->private_data;
 
 	while (1) {
+		struct inotify_kernel_event *kevent;
 
 		prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
 
 		mutex_lock(&dev->ev_mutex);
-		if (!list_empty(&dev->events)) {
-			ret = 0;
-			break;
-		}
+		kevent = get_one_event(dev, count);
 		mutex_unlock(&dev->ev_mutex);
 
-		if (file->f_flags & O_NONBLOCK) {
-			ret = -EAGAIN;
-			break;
-		}
-
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			break;
+		if (kevent) {
+			ret = PTR_ERR(kevent);
+			if (IS_ERR(kevent))
+				break;
+			ret = copy_event_to_user(kevent, buf);
+			free_kevent(kevent);
+			if (ret < 0)
+				break;
+			buf += ret;
+			count -= ret;
+			continue;
 		}
 
-		schedule();
-	}
-
-	finish_wait(&dev->wq, &wait);
-	if (ret)
-		return ret;
-
-	while (1) {
-		struct inotify_kernel_event *kevent;
-
-		ret = buf - start;
-		if (list_empty(&dev->events))
+		ret = -EAGAIN;
+		if (file->f_flags & O_NONBLOCK)
 			break;
-
-		kevent = inotify_dev_get_event(dev);
-		if (event_size + kevent->event.len > count) {
-			if (ret == 0 && count > 0) {
-				/*
-				 * could not get a single event because we
-				 * didn't have enough buffer space.
-				 */
-				ret = -EINVAL;
-			}
+		ret = -EINTR;
+		if (signal_pending(current))
 			break;
-		}
-		remove_kevent(dev, kevent);
 
-		/*
-		 * Must perform the copy_to_user outside the mutex in order
-		 * to avoid a lock order reversal with mmap_sem.
-		 */
-		mutex_unlock(&dev->ev_mutex);
-
-		if (copy_to_user(buf, &kevent->event, event_size)) {
-			ret = -EFAULT;
+		if (start != buf)
 			break;
-		}
-		buf += event_size;
-		count -= event_size;
-
-		if (kevent->name) {
-			if (copy_to_user(buf, kevent->name, kevent->event.len)){
-				ret = -EFAULT;
-				break;
-			}
-			buf += kevent->event.len;
-			count -= kevent->event.len;
-		}
-
-		free_kevent(kevent);
 
-		mutex_lock(&dev->ev_mutex);
+		schedule();
 	}
-	mutex_unlock(&dev->ev_mutex);
 
+	finish_wait(&dev->wq, &wait);
+	if (start != buf && ret != -EFAULT)
+		ret = buf - start;
 	return ret;
 }
 
-- 
cgit v1.2.3