summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig30
-rw-r--r--fs/befs/btree.c2
-rw-r--r--fs/befs/datastream.c2
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/buffer.c62
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/compat.c32
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/ecryptfs/mmap.c5
-rw-r--r--fs/eventpoll.c13
-rw-r--r--fs/exec.c16
-rw-r--r--fs/ext3/inode.c4
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/freevxfs/vxfs_dir.h2
-rw-r--r--fs/freevxfs/vxfs_immed.c2
-rw-r--r--fs/fs-writeback.c31
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/ops_address.c2
-rw-r--r--fs/gfs2/recovery.c2
-rw-r--r--fs/hostfs/hostfs_user.c8
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jfs/jfs_xtree.c2
-rw-r--r--fs/libfs.c11
-rw-r--r--fs/lockd/host.c26
-rw-r--r--fs/lockd/svc.c39
-rw-r--r--fs/lockd/svc4proc.c20
-rw-r--r--fs/lockd/svclock.c20
-rw-r--r--fs/lockd/svcproc.c22
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/locks.c125
-rw-r--r--fs/mpage.c7
-rw-r--r--fs/ncpfs/mmap.c4
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/client.c6
-rw-r--r--fs/nfs/direct.c7
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/nfs3proc.c7
-rw-r--r--fs/nfs/nfs4proc.c27
-rw-r--r--fs/nfs/nfsroot.c3
-rw-r--r--fs/nfs/pagelist.c19
-rw-r--r--fs/nfs/read.c10
-rw-r--r--fs/nfs/super.c4
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nfsd/auth.c10
-rw-r--r--fs/nfsd/auth.h22
-rw-r--r--fs/nfsd/export.c20
-rw-r--r--fs/nfsd/nfs2acl.c7
-rw-r--r--fs/nfsd/nfs3xdr.c21
-rw-r--r--fs/nfsd/nfs4callback.c92
-rw-r--r--fs/nfsd/nfs4idmap.c28
-rw-r--r--fs/nfsd/nfs4proc.c2
-rw-r--r--fs/nfsd/nfs4state.c257
-rw-r--r--fs/nfsd/nfs4xdr.c36
-rw-r--r--fs/nfsd/nfscache.c28
-rw-r--r--fs/nfsd/nfsctl.c124
-rw-r--r--fs/nfsd/nfsfh.c1
-rw-r--r--fs/nfsd/nfssvc.c8
-rw-r--r--fs/nfsd/nfsxdr.c9
-rw-r--r--fs/nfsd/vfs.c51
-rw-r--r--fs/ntfs/aops.c20
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/file.c32
-rw-r--r--fs/ntfs/malloc.h3
-rw-r--r--fs/ocfs2/alloc.c4
-rw-r--r--fs/ocfs2/aops.c6
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/ocfs1_fs_compat.h2
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/proc/array.c28
-rw-r--r--fs/proc/base.c55
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/proc_misc.c136
-rw-r--r--fs/proc/task_mmu.c676
-rw-r--r--fs/readdir.c5
-rw-r--r--fs/reiserfs/bitmap.c6
-rw-r--r--fs/reiserfs/inode.c4
-rw-r--r--fs/signalfd.c2
-rw-r--r--fs/smbfs/inode.c7
-rw-r--r--fs/smbfs/request.c2
-rw-r--r--fs/splice.c4
-rw-r--r--fs/timerfd.c207
-rw-r--r--fs/xattr.c45
-rw-r--r--fs/xfs/linux-2.6/kmem.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c2
88 files changed, 1521 insertions, 1073 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 219ec06a8c7e..ea5b35947623 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1152,7 +1152,7 @@ config BEFS_DEBUG
depends on BEFS_FS
help
If you say Y here, you can use the 'debug' mount option to enable
- debugging output from the driver.
+ debugging output from the driver.
config BFS_FS
tristate "BFS file system support (EXPERIMENTAL)"
@@ -1263,7 +1263,7 @@ config JFFS2_FS_XATTR
Extended attributes are name:value pairs associated with inodes by
the kernel or by users (see the attr(5) manual page, or visit
<http://acl.bestbits.at/> for details).
-
+
If unsure, say N.
config JFFS2_FS_POSIX_ACL
@@ -1274,10 +1274,10 @@ config JFFS2_FS_POSIX_ACL
help
Posix Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
-
+
To learn more about Access Control Lists, visit the Posix ACLs for
Linux website <http://acl.bestbits.at/>.
-
+
If you don't know what Access Control Lists are, say N
config JFFS2_FS_SECURITY
@@ -1289,7 +1289,7 @@ config JFFS2_FS_SECURITY
implemented by security modules like SELinux. This option
enables an extended attribute handler for file security
labels in the jffs2 filesystem.
-
+
If you are not using a security module that requires using
extended attributes for file security labels, say N.
@@ -1674,6 +1674,8 @@ config NFSD
select CRYPTO_MD5 if NFSD_V4
select CRYPTO if NFSD_V4
select FS_POSIX_ACL if NFSD_V4
+ select PROC_FS if NFSD_V4
+ select PROC_FS if SUNRPC_GSS
help
If you want your Linux box to act as an NFS *server*, so that other
computers on your local network which support NFS can access certain
@@ -1833,7 +1835,7 @@ config RPCSEC_GSS_SPKM3
If unsure, say N.
config SMB_FS
- tristate "SMB file system support (to mount Windows shares etc.)"
+ tristate "SMB file system support (OBSOLETE, please use CIFS)"
depends on INET
select NLS
help
@@ -1856,8 +1858,8 @@ config SMB_FS
General information about how to connect Linux, Windows machines and
Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
- To compile the SMB support as a module, choose M here: the module will
- be called smbfs. Most people say N, however.
+ To compile the SMB support as a module, choose M here:
+ the module will be called smbfs. Most people say N, however.
config SMB_NLS_DEFAULT
bool "Use a default NLS"
@@ -1889,7 +1891,7 @@ config SMB_NLS_REMOTE
smbmount from samba 2.2.0 or later supports this.
config CIFS
- tristate "CIFS support (advanced network filesystem for Samba, Window and other CIFS compliant servers)"
+ tristate "CIFS support (advanced network filesystem, SMBFS successor)"
depends on INET
select NLS
help
@@ -1947,16 +1949,16 @@ config CIFS_WEAK_PW_HASH
LANMAN based servers such as OS/2 and Windows 95, but such
mounts may be less secure than mounts using NTLM or more recent
security mechanisms if you are on a public network. Unless you
- have a need to access old SMB servers (and are on a private
+ have a need to access old SMB servers (and are on a private
network) you probably want to say N. Even if this support
is enabled in the kernel build, LANMAN authentication will not be
used automatically. At runtime LANMAN mounts are disabled but
can be set to required (or optional) either in
/proc/fs/cifs (see fs/cifs/README for more detail) or via an
- option on the mount command. This support is disabled by
+ option on the mount command. This support is disabled by
default in order to reduce the possibility of a downgrade
attack.
-
+
If unsure, say N.
config CIFS_XATTR
@@ -1997,7 +1999,7 @@ config CIFS_DEBUG2
messages in some error paths, slowing performance. This
option can be turned off unless you are debugging
cifs problems. If unsure, say N.
-
+
config CIFS_EXPERIMENTAL
bool "CIFS Experimental Features (EXPERIMENTAL)"
depends on CIFS && EXPERIMENTAL
@@ -2088,7 +2090,7 @@ config CODA_FS_OLD_API
However this new API is not backward compatible with older
clients. If you really need to run the old Coda userspace
cache manager then say Y.
-
+
For most cases you probably want to say N.
config AFS_FS
diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index af5bb93276f8..4202db7496cb 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c
@@ -232,7 +232,7 @@ befs_bt_read_node(struct super_block *sb, befs_data_stream * ds,
* @key: Key string to lookup in btree
* @value: Value stored with @key
*
- * On sucess, returns BEFS_OK and sets *@value to the value stored
+ * On success, returns BEFS_OK and sets *@value to the value stored
* with @key (usually the disk block number of an inode).
*
* On failure, returns BEFS_ERR or BEFS_BT_NOT_FOUND.
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index aacb4da6298a..e3287d0d1a58 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -236,7 +236,7 @@ befs_count_blocks(struct super_block * sb, befs_data_stream * ds)
as in the indirect region code).
When/if blockno is found, if blockno is inside of a block
- run as stored on disk, we offset the start and lenght members
+ run as stored on disk, we offset the start and length members
of the block run, so that blockno is the start and len is
still valid (the run ends in the same place).
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 18ed6dd906c1..4628c42ca892 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -117,7 +117,7 @@ static int padzero(unsigned long elf_bss)
return 0;
}
-/* Let's use some macros to make this stack manipulation a litle clearer */
+/* Let's use some macros to make this stack manipulation a little clearer */
#ifdef CONFIG_STACK_GROWSUP
#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
#define STACK_ROUND(sp, items) \
diff --git a/fs/buffer.c b/fs/buffer.c
index 456c9ab7705b..826baf4f04bc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1798,7 +1798,7 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
start = max(from, block_start);
size = min(to, block_end) - start;
- zero_user_page(page, start, size, KM_USER0);
+ zero_user(page, start, size);
set_buffer_uptodate(bh);
}
@@ -1861,19 +1861,10 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
mark_buffer_dirty(bh);
continue;
}
- if (block_end > to || block_start < from) {
- void *kaddr;
-
- kaddr = kmap_atomic(page, KM_USER0);
- if (block_end > to)
- memset(kaddr+to, 0,
- block_end-to);
- if (block_start < from)
- memset(kaddr+block_start,
- 0, from-block_start);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
- }
+ if (block_end > to || block_start < from)
+ zero_user_segments(page,
+ to, block_end,
+ block_start, from);
continue;
}
}
@@ -2104,8 +2095,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
SetPageError(page);
}
if (!buffer_mapped(bh)) {
- zero_user_page(page, i * blocksize, blocksize,
- KM_USER0);
+ zero_user(page, i * blocksize, blocksize);
if (!err)
set_buffer_uptodate(bh);
continue;
@@ -2218,7 +2208,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping,
&page, &fsdata);
if (err)
goto out;
- zero_user_page(page, zerofrom, len, KM_USER0);
+ zero_user(page, zerofrom, len);
err = pagecache_write_end(file, mapping, curpos, len, len,
page, fsdata);
if (err < 0)
@@ -2245,7 +2235,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping,
&page, &fsdata);
if (err)
goto out;
- zero_user_page(page, zerofrom, len, KM_USER0);
+ zero_user(page, zerofrom, len);
err = pagecache_write_end(file, mapping, curpos, len, len,
page, fsdata);
if (err < 0)
@@ -2422,7 +2412,6 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
unsigned block_in_page;
unsigned block_start, block_end;
sector_t block_in_file;
- char *kaddr;
int nr_reads = 0;
int ret = 0;
int is_mapped_to_disk = 1;
@@ -2493,13 +2482,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
continue;
}
if (buffer_new(bh) || !buffer_mapped(bh)) {
- kaddr = kmap_atomic(page, KM_USER0);
- if (block_start < from)
- memset(kaddr+block_start, 0, from-block_start);
- if (block_end > to)
- memset(kaddr + to, 0, block_end - to);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_segments(page, block_start, from,
+ to, block_end);
continue;
}
if (buffer_uptodate(bh))
@@ -2636,7 +2620,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
+ zero_user_segment(page, offset, PAGE_CACHE_SIZE);
out:
ret = mpage_writepage(page, get_block, wbc);
if (ret == -EAGAIN)
@@ -2709,7 +2693,7 @@ has_buffers:
if (page_has_buffers(page))
goto has_buffers;
}
- zero_user_page(page, offset, length, KM_USER0);
+ zero_user(page, offset, length);
set_page_dirty(page);
err = 0;
@@ -2785,7 +2769,7 @@ int block_truncate_page(struct address_space *mapping,
goto unlock;
}
- zero_user_page(page, offset, length, KM_USER0);
+ zero_user(page, offset, length);
mark_buffer_dirty(bh);
err = 0;
@@ -2831,7 +2815,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
+ zero_user_segment(page, offset, PAGE_CACHE_SIZE);
return __block_write_full_page(inode, page, get_block, wbc);
}
@@ -3169,7 +3153,7 @@ static void recalc_bh_state(void)
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
{
- struct buffer_head *ret = kmem_cache_zalloc(bh_cachep,
+ struct buffer_head *ret = kmem_cache_alloc(bh_cachep,
set_migrateflags(gfp_flags, __GFP_RECLAIMABLE));
if (ret) {
INIT_LIST_HEAD(&ret->b_assoc_buffers);
@@ -3257,12 +3241,24 @@ int bh_submit_read(struct buffer_head *bh)
}
EXPORT_SYMBOL(bh_submit_read);
+static void
+init_buffer_head(struct kmem_cache *cachep, void *data)
+{
+ struct buffer_head *bh = data;
+
+ memset(bh, 0, sizeof(*bh));
+ INIT_LIST_HEAD(&bh->b_assoc_buffers);
+}
+
void __init buffer_init(void)
{
int nrpages;
- bh_cachep = KMEM_CACHE(buffer_head,
- SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
+ bh_cachep = kmem_cache_create("buffer_head",
+ sizeof(struct buffer_head), 0,
+ (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
+ SLAB_MEM_SPREAD),
+ init_buffer_head);
/*
* Limit the bh occupancy to 10% of ZONE_NORMAL
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index d9567ba2960b..47f2621001e4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1386,7 +1386,7 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
if (!page)
return -ENOMEM;
- zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
+ zero_user_segment(page, offset, PAGE_CACHE_SIZE);
unlock_page(page);
page_cache_release(page);
return rc;
diff --git a/fs/compat.c b/fs/compat.c
index 5216c3fd7517..69baca5ad608 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -2206,19 +2206,41 @@ asmlinkage long compat_sys_signalfd(int ufd,
#ifdef CONFIG_TIMERFD
-asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags,
- const struct compat_itimerspec __user *utmr)
+asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
+ const struct compat_itimerspec __user *utmr,
+ struct compat_itimerspec __user *otmr)
{
+ int error;
struct itimerspec t;
struct itimerspec __user *ut;
if (get_compat_itimerspec(&t, utmr))
return -EFAULT;
- ut = compat_alloc_user_space(sizeof(*ut));
- if (copy_to_user(ut, &t, sizeof(t)))
+ ut = compat_alloc_user_space(2 * sizeof(struct itimerspec));
+ if (copy_to_user(&ut[0], &t, sizeof(t)))
return -EFAULT;
+ error = sys_timerfd_settime(ufd, flags, &ut[0], &ut[1]);
+ if (!error && otmr)
+ error = (copy_from_user(&t, &ut[1], sizeof(struct itimerspec)) ||
+ put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
+
+ return error;
+}
+
+asmlinkage long compat_sys_timerfd_gettime(int ufd,
+ struct compat_itimerspec __user *otmr)
+{
+ int error;
+ struct itimerspec t;
+ struct itimerspec __user *ut;
- return sys_timerfd(ufd, clockid, flags, ut);
+ ut = compat_alloc_user_space(sizeof(struct itimerspec));
+ error = sys_timerfd_gettime(ufd, ut);
+ if (!error)
+ error = (copy_from_user(&t, ut, sizeof(struct itimerspec)) ||
+ put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
+
+ return error;
}
#endif /* CONFIG_TIMERFD */
diff --git a/fs/direct-io.c b/fs/direct-io.c
index acf0da1bd257..9e81addbd6ea 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -878,8 +878,8 @@ do_holes:
page_cache_release(page);
goto out;
}
- zero_user_page(page, block_in_page << blkbits,
- 1 << blkbits, KM_USER0);
+ zero_user(page, block_in_page << blkbits,
+ 1 << blkbits);
dio->block_in_file++;
block_in_page++;
goto next_block;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 32c5711d79a3..0535412d8c64 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -257,8 +257,7 @@ static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
if (to > end_byte_in_page)
end_byte_in_page = to;
- zero_user_page(page, end_byte_in_page,
- PAGE_CACHE_SIZE - end_byte_in_page, KM_USER0);
+ zero_user_segment(page, end_byte_in_page, PAGE_CACHE_SIZE);
out:
return 0;
}
@@ -307,7 +306,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
*/
if ((i_size_read(page->mapping->host) == prev_page_end_size) &&
(from != 0)) {
- zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+ zero_user(page, 0, PAGE_CACHE_SIZE);
}
out:
return rc;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 34f68f3a069a..a415f42d32cf 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -353,7 +353,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
spin_unlock_irqrestore(&psw->lock, flags);
/* Do really wake up now */
- wake_up(wq);
+ wake_up_nested(wq, 1 + wake_nests);
/* Remove the current task from the list */
spin_lock_irqsave(&psw->lock, flags);
@@ -656,8 +656,7 @@ is_linked:
* wait list.
*/
if (waitqueue_active(&ep->wq))
- __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
- TASK_INTERRUPTIBLE);
+ wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++;
@@ -780,7 +779,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
- __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);
+ wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
@@ -854,8 +853,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
- __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
- TASK_INTERRUPTIBLE);
+ wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
@@ -978,8 +976,7 @@ errxit:
* wait list (delayed after we release the lock).
*/
if (waitqueue_active(&ep->wq))
- __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
- TASK_INTERRUPTIBLE);
+ wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
diff --git a/fs/exec.c b/fs/exec.c
index 282240afe99e..be923e4bc389 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -760,7 +760,7 @@ static int de_thread(struct task_struct *tsk)
*/
read_lock(&tasklist_lock);
spin_lock_irq(lock);
- if (sig->flags & SIGNAL_GROUP_EXIT) {
+ if (signal_group_exit(sig)) {
/*
* Another group action in progress, just
* return so that the signal is processed.
@@ -778,6 +778,7 @@ static int de_thread(struct task_struct *tsk)
if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
task_active_pid_ns(tsk)->child_reaper = tsk;
+ sig->group_exit_task = tsk;
zap_other_threads(tsk);
read_unlock(&tasklist_lock);
@@ -802,7 +803,6 @@ static int de_thread(struct task_struct *tsk)
}
sig->notify_count = count;
- sig->group_exit_task = tsk;
while (atomic_read(&sig->count) > count) {
__set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock_irq(lock);
@@ -871,15 +871,10 @@ static int de_thread(struct task_struct *tsk)
leader->exit_state = EXIT_DEAD;
write_unlock_irq(&tasklist_lock);
- }
+ }
sig->group_exit_task = NULL;
sig->notify_count = 0;
- /*
- * There may be one thread left which is just exiting,
- * but it's safe to stop telling the group to kill themselves.
- */
- sig->flags = 0;
no_thread_group:
exit_itimers(sig);
@@ -947,12 +942,13 @@ static void flush_old_files(struct files_struct * files)
spin_unlock(&files->file_lock);
}
-void get_task_comm(char *buf, struct task_struct *tsk)
+char *get_task_comm(char *buf, struct task_struct *tsk)
{
/* buf must be at least sizeof(tsk->comm) in size */
task_lock(tsk);
strncpy(buf, tsk->comm, sizeof(tsk->comm));
task_unlock(tsk);
+ return buf;
}
void set_task_comm(struct task_struct *tsk, char *buf)
@@ -1548,7 +1544,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
int err = -EAGAIN;
spin_lock_irq(&tsk->sighand->siglock);
- if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
+ if (!signal_group_exit(tsk->signal)) {
tsk->signal->group_exit_code = exit_code;
zap_process(tsk);
err = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9b162cd6c16c..077535439288 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1845,7 +1845,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
*/
if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
ext3_should_writeback_data(inode) && PageUptodate(page)) {
- zero_user_page(page, offset, length, KM_USER0);
+ zero_user(page, offset, length);
set_page_dirty(page);
goto unlock;
}
@@ -1898,7 +1898,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
goto unlock;
}
- zero_user_page(page, offset, length, KM_USER0);
+ zero_user(page, offset, length);
BUFFER_TRACE(bh, "zeroed end of block");
err = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bb717cbb749c..05c4145dd27d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1840,7 +1840,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
*/
if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
ext4_should_writeback_data(inode) && PageUptodate(page)) {
- zero_user_page(page, offset, length, KM_USER0);
+ zero_user(page, offset, length);
set_page_dirty(page);
goto unlock;
}
@@ -1893,7 +1893,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
goto unlock;
}
- zero_user_page(page, offset, length, KM_USER0);
+ zero_user(page, offset, length);
BUFFER_TRACE(bh, "zeroed end of block");
diff --git a/fs/freevxfs/vxfs_dir.h b/fs/freevxfs/vxfs_dir.h
index 3c96d6e63978..aaf1fb098639 100644
--- a/fs/freevxfs/vxfs_dir.h
+++ b/fs/freevxfs/vxfs_dir.h
@@ -41,7 +41,7 @@
* VxFS directory block header.
*
* This entry is the head of every filesystem block in a directory.
- * It is used for free space managment and additionally includes
+ * It is used for free space management and additionally includes
* a hash for speeding up directory search (lookup).
*
* The hash may be empty and in fact we do not use it all in the
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 24b5a775ff96..8a5959a61ba9 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -54,7 +54,7 @@ const struct inode_operations vxfs_immed_symlink_iops = {
};
/*
- * Adress space operations for immed files and directories.
+ * Address space operations for immed files and directories.
*/
const struct address_space_operations vxfs_immed_aops = {
.readpage = vxfs_immed_readpage,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 300324bd563c..0b3064079fa5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -284,7 +284,17 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
* soon as the queue becomes uncongested.
*/
inode->i_state |= I_DIRTY_PAGES;
- requeue_io(inode);
+ if (wbc->nr_to_write <= 0) {
+ /*
+ * slice used up: queue for next turn
+ */
+ requeue_io(inode);
+ } else {
+ /*
+ * somehow blocked: retry later
+ */
+ redirty_tail(inode);
+ }
} else {
/*
* Otherwise fully redirty the inode so that
@@ -334,9 +344,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
WARN_ON(inode->i_state & I_WILL_FREE);
if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
- struct address_space *mapping = inode->i_mapping;
- int ret;
-
/*
* We're skipping this inode because it's locked, and we're not
* doing writeback-for-data-integrity. Move it to s_more_io so
@@ -345,15 +352,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* completed a full scan of s_io.
*/
requeue_io(inode);
-
- /*
- * Even if we don't actually write the inode itself here,
- * we can at least start some of the data writeout..
- */
- spin_unlock(&inode_lock);
- ret = do_writepages(mapping, wbc);
- spin_lock(&inode_lock);
- return ret;
+ return 0;
}
/*
@@ -479,8 +478,12 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
iput(inode);
cond_resched();
spin_lock(&inode_lock);
- if (wbc->nr_to_write <= 0)
+ if (wbc->nr_to_write <= 0) {
+ wbc->more_io = 1;
break;
+ }
+ if (!list_empty(&sb->s_more_io))
+ wbc->more_io = 1;
}
return; /* Leave any unwritten inodes on s_io */
}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e4effc47abfc..e9456ebd3bb6 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -932,7 +932,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
if (!gfs2_is_writeback(ip))
gfs2_trans_add_bh(ip->i_gl, bh, 0);
- zero_user_page(page, offset, length, KM_USER0);
+ zero_user(page, offset, length);
unlock:
unlock_page(page);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 38dbe99a30ed..ac772b6d9dbb 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -446,7 +446,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
* so we need to supply one here. It doesn't happen often.
*/
if (unlikely(page->index)) {
- zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+ zero_user(page, 0, PAGE_CACHE_SIZE);
return 0;
}
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index b249e294a95b..6fb07d67ca8a 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -450,7 +450,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
jd->jd_jid);
- /* Aquire the journal lock so we can do recovery */
+ /* Acquire the journal lock so we can do recovery */
error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
LM_ST_EXCLUSIVE,
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 35c1a9f33f47..53fd0a67c11a 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -285,17 +285,17 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
return err;
times[0].tv_sec = atime_ts.tv_sec;
- times[0].tv_usec = atime_ts.tv_nsec * 1000;
+ times[0].tv_usec = atime_ts.tv_nsec / 1000;
times[1].tv_sec = mtime_ts.tv_sec;
- times[1].tv_usec = mtime_ts.tv_nsec * 1000;
+ times[1].tv_usec = mtime_ts.tv_nsec / 1000;
if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) {
times[0].tv_sec = attrs->ia_atime.tv_sec;
- times[0].tv_usec = attrs->ia_atime.tv_nsec * 1000;
+ times[0].tv_usec = attrs->ia_atime.tv_nsec / 1000;
}
if (attrs->ia_valid & HOSTFS_ATTR_MTIME_SET) {
times[1].tv_sec = attrs->ia_mtime.tv_sec;
- times[1].tv_usec = attrs->ia_mtime.tv_nsec * 1000;
+ times[1].tv_usec = attrs->ia_mtime.tv_nsec / 1000;
}
if (fd >= 0) {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 09ee07f02663..3b3cc28cdefc 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -768,7 +768,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
case Opt_mode:
if (match_octal(&args[0], &option))
goto bad_val;
- pconfig->mode = option & 0777U;
+ pconfig->mode = option & 01777U;
break;
case Opt_size: {
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 2eae5d2dbebe..6c1ba3566f58 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -741,7 +741,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
* are not obsolete.
*
* Of course, this optimization only makes sense in case
- * of NAND flashes (or other flashes whith
+ * of NAND flashes (or other flashes with
* !jffs2_can_mark_obsolete()), since on NOR flashes
* nodes are marked obsolete physically.
*
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 1543906a2e0d..a000aaa75136 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -3965,7 +3965,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
* xtTruncate_pmap()
*
* function:
- * Perform truncate to zero lenghth for deleted file, leaving the
+ * Perform truncate to zero length for deleted file, leaving the
* the xtree and working map untouched. This allows the file to
* be accessed via open file handles, while the delete of the file
* is committed to disk.
diff --git a/fs/libfs.c b/fs/libfs.c
index 6e68b700958d..5523bde96387 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -341,13 +341,10 @@ int simple_prepare_write(struct file *file, struct page *page,
unsigned from, unsigned to)
{
if (!PageUptodate(page)) {
- if (to - from != PAGE_CACHE_SIZE) {
- void *kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr, 0, from);
- memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
- }
+ if (to - from != PAGE_CACHE_SIZE)
+ zero_user_segments(page,
+ 0, from,
+ to, PAGE_CACHE_SIZE);
}
return 0;
}
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 572601e98dcd..ca6b16fc3101 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -34,10 +34,10 @@ static DEFINE_MUTEX(nlm_host_mutex);
static void nlm_gc_hosts(void);
static struct nsm_handle * __nsm_find(const struct sockaddr_in *,
- const char *, int, int);
+ const char *, unsigned int, int);
static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
const char *hostname,
- int hostname_len);
+ unsigned int hostname_len);
/*
* Common host lookup routine for server & client
@@ -45,7 +45,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
static struct nlm_host *
nlm_lookup_host(int server, const struct sockaddr_in *sin,
int proto, int version, const char *hostname,
- int hostname_len, const struct sockaddr_in *ssin)
+ unsigned int hostname_len,
+ const struct sockaddr_in *ssin)
{
struct hlist_head *chain;
struct hlist_node *pos;
@@ -176,7 +177,7 @@ nlm_destroy_host(struct nlm_host *host)
*/
struct nlm_host *
nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
- const char *hostname, int hostname_len)
+ const char *hostname, unsigned int hostname_len)
{
struct sockaddr_in ssin = {0};
@@ -189,7 +190,7 @@ nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
*/
struct nlm_host *
nlmsvc_lookup_host(struct svc_rqst *rqstp,
- const char *hostname, int hostname_len)
+ const char *hostname, unsigned int hostname_len)
{
struct sockaddr_in ssin = {0};
@@ -307,7 +308,8 @@ void nlm_release_host(struct nlm_host *host)
* Release all resources held by that peer.
*/
void nlm_host_rebooted(const struct sockaddr_in *sin,
- const char *hostname, int hostname_len,
+ const char *hostname,
+ unsigned int hostname_len,
u32 new_state)
{
struct hlist_head *chain;
@@ -377,8 +379,13 @@ nlm_shutdown_hosts(void)
/* First, make all hosts eligible for gc */
dprintk("lockd: nuking all hosts...\n");
for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
- hlist_for_each_entry(host, pos, chain, h_hash)
+ hlist_for_each_entry(host, pos, chain, h_hash) {
host->h_expires = jiffies - 1;
+ if (host->h_rpcclnt) {
+ rpc_shutdown_client(host->h_rpcclnt);
+ host->h_rpcclnt = NULL;
+ }
+ }
}
/* Then, perform a garbage collection pass */
@@ -449,7 +456,7 @@ static DEFINE_MUTEX(nsm_mutex);
static struct nsm_handle *
__nsm_find(const struct sockaddr_in *sin,
- const char *hostname, int hostname_len,
+ const char *hostname, unsigned int hostname_len,
int create)
{
struct nsm_handle *nsm = NULL;
@@ -503,7 +510,8 @@ out:
}
static struct nsm_handle *
-nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len)
+nsm_find(const struct sockaddr_in *sin, const char *hostname,
+ unsigned int hostname_len)
{
return __nsm_find(sin, hostname, hostname_len, 1);
}
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 82e2192a0d5c..08226464e563 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -219,19 +219,6 @@ lockd(struct svc_rqst *rqstp)
module_put_and_exit(0);
}
-
-static int find_socket(struct svc_serv *serv, int proto)
-{
- struct svc_sock *svsk;
- int found = 0;
- list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
- if (svsk->sk_sk->sk_protocol == proto) {
- found = 1;
- break;
- }
- return found;
-}
-
/*
* Make any sockets that are needed but not present.
* If nlm_udpport or nlm_tcpport were set as module
@@ -240,17 +227,25 @@ static int find_socket(struct svc_serv *serv, int proto)
static int make_socks(struct svc_serv *serv, int proto)
{
static int warned;
+ struct svc_xprt *xprt;
int err = 0;
- if (proto == IPPROTO_UDP || nlm_udpport)
- if (!find_socket(serv, IPPROTO_UDP))
- err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport,
- SVC_SOCK_DEFAULTS);
- if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport))
- if (!find_socket(serv, IPPROTO_TCP))
- err = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport,
- SVC_SOCK_DEFAULTS);
-
+ if (proto == IPPROTO_UDP || nlm_udpport) {
+ xprt = svc_find_xprt(serv, "udp", 0, 0);
+ if (!xprt)
+ err = svc_create_xprt(serv, "udp", nlm_udpport,
+ SVC_SOCK_DEFAULTS);
+ else
+ svc_xprt_put(xprt);
+ }
+ if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) {
+ xprt = svc_find_xprt(serv, "tcp", 0, 0);
+ if (!xprt)
+ err = svc_create_xprt(serv, "tcp", nlm_tcpport,
+ SVC_SOCK_DEFAULTS);
+ else
+ svc_xprt_put(xprt);
+ }
if (err >= 0) {
warned = 0;
err = 0;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index bf27b6c6cb6b..385437e3387d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -84,6 +84,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
+ int rc = rpc_success;
dprintk("lockd: TEST4 called\n");
resp->cookie = argp->cookie;
@@ -91,7 +92,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept test requests during grace period */
if (nlmsvc_grace_period) {
resp->status = nlm_lck_denied_grace_period;
- return rpc_success;
+ return rc;
}
/* Obtain client and file */
@@ -101,12 +102,13 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Now check for conflicting locks */
resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie);
if (resp->status == nlm_drop_reply)
- return rpc_drop_reply;
+ rc = rpc_drop_reply;
+ else
+ dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
- dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
nlm_release_host(host);
nlm_release_file(file);
- return rpc_success;
+ return rc;
}
static __be32
@@ -115,6 +117,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
+ int rc = rpc_success;
dprintk("lockd: LOCK called\n");
@@ -123,7 +126,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept new lock requests during grace period */
if (nlmsvc_grace_period && !argp->reclaim) {
resp->status = nlm_lck_denied_grace_period;
- return rpc_success;
+ return rc;
}
/* Obtain client and file */
@@ -146,12 +149,13 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = nlmsvc_lock(rqstp, file, &argp->lock,
argp->block, &argp->cookie);
if (resp->status == nlm_drop_reply)
- return rpc_drop_reply;
+ rc = rpc_drop_reply;
+ else
+ dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
- dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
nlm_release_host(host);
nlm_release_file(file);
- return rpc_success;
+ return rc;
}
static __be32
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d120ec39bcb0..2f4d8fa66689 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -501,25 +501,29 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
block, block->b_flags, block->b_fl);
if (block->b_flags & B_TIMED_OUT) {
nlmsvc_unlink_block(block);
- return nlm_lck_denied;
+ ret = nlm_lck_denied;
+ goto out;
}
if (block->b_flags & B_GOT_CALLBACK) {
+ nlmsvc_unlink_block(block);
if (block->b_fl != NULL
&& block->b_fl->fl_type != F_UNLCK) {
lock->fl = *block->b_fl;
goto conf_lock;
- }
- else {
- nlmsvc_unlink_block(block);
- return nlm_granted;
+ } else {
+ ret = nlm_granted;
+ goto out;
}
}
- return nlm_drop_reply;
+ ret = nlm_drop_reply;
+ goto out;
}
error = vfs_test_lock(file->f_file, &lock->fl);
- if (error == -EINPROGRESS)
- return nlmsvc_defer_lock_rqst(rqstp, block);
+ if (error == -EINPROGRESS) {
+ ret = nlmsvc_defer_lock_rqst(rqstp, block);
+ goto out;
+ }
if (error) {
ret = nlm_lck_denied_nolocks;
goto out;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 9cd5c8b37593..88379cc6e0b1 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -113,6 +113,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
+ int rc = rpc_success;
dprintk("lockd: TEST called\n");
resp->cookie = argp->cookie;
@@ -120,7 +121,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept test requests during grace period */
if (nlmsvc_grace_period) {
resp->status = nlm_lck_denied_grace_period;
- return rpc_success;
+ return rc;
}
/* Obtain client and file */
@@ -130,13 +131,14 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Now check for conflicting locks */
resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie));
if (resp->status == nlm_drop_reply)
- return rpc_drop_reply;
+ rc = rpc_drop_reply;
+ else
+ dprintk("lockd: TEST status %d vers %d\n",
+ ntohl(resp->status), rqstp->rq_vers);
- dprintk("lockd: TEST status %d vers %d\n",
- ntohl(resp->status), rqstp->rq_vers);
nlm_release_host(host);
nlm_release_file(file);
- return rpc_success;
+ return rc;
}
static __be32
@@ -145,6 +147,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
+ int rc = rpc_success;
dprintk("lockd: LOCK called\n");
@@ -153,7 +156,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept new lock requests during grace period */
if (nlmsvc_grace_period && !argp->reclaim) {
resp->status = nlm_lck_denied_grace_period;
- return rpc_success;
+ return rc;
}
/* Obtain client and file */
@@ -176,12 +179,13 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock,
argp->block, &argp->cookie));
if (resp->status == nlm_drop_reply)
- return rpc_drop_reply;
+ rc = rpc_drop_reply;
+ else
+ dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
- dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
nlm_release_host(host);
nlm_release_file(file);
- return rpc_success;
+ return rc;
}
static __be32
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 84ebba33b98d..dbbefbcd6712 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -87,7 +87,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
unsigned int hash;
__be32 nfserr;
- nlm_debug_print_fh("nlm_file_lookup", f);
+ nlm_debug_print_fh("nlm_lookup_file", f);
hash = file_hash(f);
diff --git a/fs/locks.c b/fs/locks.c
index 8b8388eca05e..49354b9c7dc1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -125,6 +125,7 @@
#include <linux/syscalls.h>
#include <linux/time.h>
#include <linux/rcupdate.h>
+#include <linux/pid_namespace.h>
#include <asm/semaphore.h>
#include <asm/uaccess.h>
@@ -185,6 +186,7 @@ void locks_init_lock(struct file_lock *fl)
fl->fl_fasync = NULL;
fl->fl_owner = NULL;
fl->fl_pid = 0;
+ fl->fl_nspid = NULL;
fl->fl_file = NULL;
fl->fl_flags = 0;
fl->fl_type = 0;
@@ -553,6 +555,8 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
{
list_add(&fl->fl_link, &file_lock_list);
+ fl->fl_nspid = get_pid(task_tgid(current));
+
/* insert into file's list */
fl->fl_next = *pos;
*pos = fl;
@@ -584,6 +588,11 @@ static void locks_delete_lock(struct file_lock **thisfl_p)
if (fl->fl_ops && fl->fl_ops->fl_remove)
fl->fl_ops->fl_remove(fl);
+ if (fl->fl_nspid) {
+ put_pid(fl->fl_nspid);
+ fl->fl_nspid = NULL;
+ }
+
locks_wake_up_blocks(fl);
locks_free_lock(fl);
}
@@ -634,33 +643,6 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
return (locks_conflict(caller_fl, sys_fl));
}
-static int interruptible_sleep_on_locked(wait_queue_head_t *fl_wait, int timeout)
-{
- int result = 0;
- DECLARE_WAITQUEUE(wait, current);
-
- __set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(fl_wait, &wait);
- if (timeout == 0)
- schedule();
- else
- result = schedule_timeout(timeout);
- if (signal_pending(current))
- result = -ERESTARTSYS;
- remove_wait_queue(fl_wait, &wait);
- __set_current_state(TASK_RUNNING);
- return result;
-}
-
-static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *waiter, int time)
-{
- int result;
- locks_insert_block(blocker, waiter);
- result = interruptible_sleep_on_locked(&waiter->fl_wait, time);
- __locks_delete_block(waiter);
- return result;
-}
-
void
posix_test_lock(struct file *filp, struct file_lock *fl)
{
@@ -673,55 +655,67 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
if (posix_locks_conflict(fl, cfl))
break;
}
- if (cfl)
+ if (cfl) {
__locks_copy_lock(fl, cfl);
- else
+ if (cfl->fl_nspid)
+ fl->fl_pid = pid_nr_ns(cfl->fl_nspid,
+ task_active_pid_ns(current));
+ } else
fl->fl_type = F_UNLCK;
unlock_kernel();
return;
}
-
EXPORT_SYMBOL(posix_test_lock);
-/* This function tests for deadlock condition before putting a process to
- * sleep. The detection scheme is no longer recursive. Recursive was neat,
- * but dangerous - we risked stack corruption if the lock data was bad, or
- * if the recursion was too deep for any other reason.
+/*
+ * Deadlock detection:
+ *
+ * We attempt to detect deadlocks that are due purely to posix file
+ * locks.
*
- * We rely on the fact that a task can only be on one lock's wait queue
- * at a time. When we find blocked_task on a wait queue we can re-search
- * with blocked_task equal to that queue's owner, until either blocked_task
- * isn't found, or blocked_task is found on a queue owned by my_task.
+ * We assume that a task can be waiting for at most one lock at a time.
+ * So for any acquired lock, the process holding that lock may be
+ * waiting on at most one other lock. That lock in turns may be held by
+ * someone waiting for at most one other lock. Given a requested lock
+ * caller_fl which is about to wait for a conflicting lock block_fl, we
+ * follow this chain of waiters to ensure we are not about to create a
+ * cycle.
*
- * Note: the above assumption may not be true when handling lock requests
- * from a broken NFS client. But broken NFS clients have a lot more to
- * worry about than proper deadlock detection anyway... --okir
+ * Since we do this before we ever put a process to sleep on a lock, we
+ * are ensured that there is never a cycle; that is what guarantees that
+ * the while() loop in posix_locks_deadlock() eventually completes.
*
- * However, the failure of this assumption (also possible in the case of
- * multiple tasks sharing the same open file table) also means there's no
- * guarantee that the loop below will terminate. As a hack, we give up
- * after a few iterations.
+ * Note: the above assumption may not be true when handling lock
+ * requests from a broken NFS client. It may also fail in the presence
+ * of tasks (such as posix threads) sharing the same open file table.
+ *
+ * To handle those cases, we just bail out after a few iterations.
*/
#define MAX_DEADLK_ITERATIONS 10
+/* Find a lock that the owner of the given block_fl is blocking on. */
+static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)
+{
+ struct file_lock *fl;
+
+ list_for_each_entry(fl, &blocked_list, fl_link) {
+ if (posix_same_owner(fl, block_fl))
+ return fl->fl_next;
+ }
+ return NULL;
+}
+
static int posix_locks_deadlock(struct file_lock *caller_fl,
struct file_lock *block_fl)
{
- struct file_lock *fl;
int i = 0;
-next_task:
- if (posix_same_owner(caller_fl, block_fl))
- return 1;
- list_for_each_entry(fl, &blocked_list, fl_link) {
- if (posix_same_owner(fl, block_fl)) {
- if (i++ > MAX_DEADLK_ITERATIONS)
- return 0;
- fl = fl->fl_next;
- block_fl = fl;
- goto next_task;
- }
+ while ((block_fl = what_owner_is_waiting_for(block_fl))) {
+ if (i++ > MAX_DEADLK_ITERATIONS)
+ return 0;
+ if (posix_same_owner(caller_fl, block_fl))
+ return 1;
}
return 0;
}
@@ -1256,7 +1250,10 @@ restart:
if (break_time == 0)
break_time++;
}
- error = locks_block_on_timeout(flock, new_fl, break_time);
+ locks_insert_block(flock, new_fl);
+ error = wait_event_interruptible_timeout(new_fl->fl_wait,
+ !new_fl->fl_next, break_time);
+ __locks_delete_block(new_fl);
if (error >= 0) {
if (error == 0)
time_out_leases(inode);
@@ -2084,6 +2081,12 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
int id, char *pfx)
{
struct inode *inode = NULL;
+ unsigned int fl_pid;
+
+ if (fl->fl_nspid)
+ fl_pid = pid_nr_ns(fl->fl_nspid, task_active_pid_ns(current));
+ else
+ fl_pid = fl->fl_pid;
if (fl->fl_file != NULL)
inode = fl->fl_file->f_path.dentry->d_inode;
@@ -2124,16 +2127,16 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
}
if (inode) {
#ifdef WE_CAN_BREAK_LSLK_NOW
- seq_printf(f, "%d %s:%ld ", fl->fl_pid,
+ seq_printf(f, "%d %s:%ld ", fl_pid,
inode->i_sb->s_id, inode->i_ino);
#else
/* userspace relies on this representation of dev_t ;-( */
- seq_printf(f, "%d %02x:%02x:%ld ", fl->fl_pid,
+ seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
MAJOR(inode->i_sb->s_dev),
MINOR(inode->i_sb->s_dev), inode->i_ino);
#endif
} else {
- seq_printf(f, "%d <none>:0 ", fl->fl_pid);
+ seq_printf(f, "%d <none>:0 ", fl_pid);
}
if (IS_POSIX(fl)) {
if (fl->fl_end == OFFSET_MAX)
diff --git a/fs/mpage.c b/fs/mpage.c
index d54f8f897224..5df564366f36 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -276,9 +276,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
}
if (first_hole != blocks_per_page) {
- zero_user_page(page, first_hole << blkbits,
- PAGE_CACHE_SIZE - (first_hole << blkbits),
- KM_USER0);
+ zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE);
if (first_hole == 0) {
SetPageUptodate(page);
unlock_page(page);
@@ -571,8 +569,7 @@ page_is_mapped:
if (page->index > end_index || !offset)
goto confused;
- zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
- KM_USER0);
+ zero_user_segment(page, offset, PAGE_CACHE_SIZE);
}
/*
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index a94473d3072c..5d8dcb9ee326 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -50,10 +50,6 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
pos = vmf->pgoff << PAGE_SHIFT;
count = PAGE_SIZE;
- if ((unsigned long)vmf->virtual_address + PAGE_SIZE > area->vm_end) {
- WARN_ON(1); /* shouldn't happen? */
- count = area->vm_end - (unsigned long)vmf->virtual_address;
- }
/* what we can read in one go */
bufsize = NCP_SERVER(inode)->buffer_size;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 9b6bbf1b9787..bd185a572a23 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -119,8 +119,8 @@ int nfs_callback_up(void)
if (!serv)
goto out_err;
- ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport,
- SVC_SOCK_ANONYMOUS);
+ ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport,
+ SVC_SOCK_ANONYMOUS);
if (ret <= 0)
goto out_destroy;
nfs_callback_tcpport = ret;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 685c43f810c1..c5c0175898f6 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -386,7 +386,7 @@ found_client:
if (new)
nfs_free_client(new);
- error = wait_event_interruptible(nfs_client_active_wq,
+ error = wait_event_killable(nfs_client_active_wq,
clp->cl_cons_state != NFS_CS_INITING);
if (error < 0) {
nfs_put_client(clp);
@@ -589,10 +589,6 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
if (server->flags & NFS_MOUNT_SOFT)
server->client->cl_softrtry = 1;
- server->client->cl_intr = 0;
- if (server->flags & NFS4_MOUNT_INTR)
- server->client->cl_intr = 1;
-
return 0;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f8e165c7d5a6..16844f98f50e 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -188,17 +188,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq)
static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
{
ssize_t result = -EIOCBQUEUED;
- struct rpc_clnt *clnt;
- sigset_t oldset;
/* Async requests don't wait here */
if (dreq->iocb)
goto out;
- clnt = NFS_CLIENT(dreq->inode);
- rpc_clnt_sigmask(clnt, &oldset);
- result = wait_for_completion_interruptible(&dreq->completion);
- rpc_clnt_sigunmask(clnt, &oldset);
+ result = wait_for_completion_killable(&dreq->completion);
if (!result)
result = dreq->error;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3f332e54e760..966a8850aa30 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -433,15 +433,11 @@ static int nfs_wait_schedule(void *word)
*/
static int nfs_wait_on_inode(struct inode *inode)
{
- struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_inode *nfsi = NFS_I(inode);
- sigset_t oldmask;
int error;
- rpc_clnt_sigmask(clnt, &oldmask);
error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING,
- nfs_wait_schedule, TASK_INTERRUPTIBLE);
- rpc_clnt_sigunmask(clnt, &oldmask);
+ nfs_wait_schedule, TASK_KILLABLE);
return error;
}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 8afd9f7e7a97..49c7cd0502cc 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -56,7 +56,7 @@ int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
.program = &mnt_program,
.version = version,
.authflavor = RPC_AUTH_UNIX,
- .flags = RPC_CLNT_CREATE_INTR,
+ .flags = 0,
};
struct rpc_clnt *mnt_clnt;
int status;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index b353c1a05bfd..549dbce714a4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -27,17 +27,14 @@
static int
nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
{
- sigset_t oldset;
int res;
- rpc_clnt_sigmask(clnt, &oldset);
do {
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
- schedule_timeout_interruptible(NFS_JUKEBOX_RETRY_TIME);
+ schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
- } while (!signalled());
- rpc_clnt_sigunmask(clnt, &oldset);
+ } while (!fatal_signal_pending(current));
return res;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5c189bd57eb2..027e1095256e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -316,12 +316,9 @@ static void nfs4_opendata_put(struct nfs4_opendata *p)
static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
{
- sigset_t oldset;
int ret;
- rpc_clnt_sigmask(task->tk_client, &oldset);
ret = rpc_wait_for_completion_task(task);
- rpc_clnt_sigunmask(task->tk_client, &oldset);
return ret;
}
@@ -2785,9 +2782,9 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
return 0;
}
-static int nfs4_wait_bit_interruptible(void *word)
+static int nfs4_wait_bit_killable(void *word)
{
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
return -ERESTARTSYS;
schedule();
return 0;
@@ -2795,18 +2792,14 @@ static int nfs4_wait_bit_interruptible(void *word)
static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
{
- sigset_t oldset;
int res;
might_sleep();
rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_);
- rpc_clnt_sigmask(clnt, &oldset);
res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER,
- nfs4_wait_bit_interruptible,
- TASK_INTERRUPTIBLE);
- rpc_clnt_sigunmask(clnt, &oldset);
+ nfs4_wait_bit_killable, TASK_KILLABLE);
rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_);
return res;
@@ -2814,7 +2807,6 @@ static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
{
- sigset_t oldset;
int res = 0;
might_sleep();
@@ -2823,14 +2815,9 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
*timeout = NFS4_POLL_RETRY_MIN;
if (*timeout > NFS4_POLL_RETRY_MAX)
*timeout = NFS4_POLL_RETRY_MAX;
- rpc_clnt_sigmask(clnt, &oldset);
- if (clnt->cl_intr) {
- schedule_timeout_interruptible(*timeout);
- if (signalled())
- res = -ERESTARTSYS;
- } else
- schedule_timeout_uninterruptible(*timeout);
- rpc_clnt_sigunmask(clnt, &oldset);
+ schedule_timeout_killable(*timeout);
+ if (fatal_signal_pending(current))
+ res = -ERESTARTSYS;
*timeout <<= 1;
return res;
}
@@ -3069,7 +3056,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
static unsigned long
nfs4_set_lock_task_retry(unsigned long timeout)
{
- schedule_timeout_interruptible(timeout);
+ schedule_timeout_killable(timeout);
timeout <<= 1;
if (timeout > NFS4_LOCK_MAXTIMEOUT)
return NFS4_LOCK_MAXTIMEOUT;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 4b0334590ee5..531379d36823 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -228,10 +228,7 @@ static int __init root_nfs_parse(char *name, char *buf)
nfs_data.flags &= ~NFS_MOUNT_SOFT;
break;
case Opt_intr:
- nfs_data.flags |= NFS_MOUNT_INTR;
- break;
case Opt_nointr:
- nfs_data.flags &= ~NFS_MOUNT_INTR;
break;
case Opt_posix:
nfs_data.flags |= NFS_MOUNT_POSIX;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 3b3dbb94393d..7f079209d70a 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -58,7 +58,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
struct page *page,
unsigned int offset, unsigned int count)
{
- struct nfs_server *server = NFS_SERVER(inode);
struct nfs_page *req;
for (;;) {
@@ -67,7 +66,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
if (req != NULL)
break;
- if (signalled() && (server->flags & NFS_MOUNT_INTR))
+ if (fatal_signal_pending(current))
return ERR_PTR(-ERESTARTSYS);
yield();
}
@@ -177,11 +176,11 @@ void nfs_release_request(struct nfs_page *req)
kref_put(&req->wb_kref, nfs_free_request);
}
-static int nfs_wait_bit_interruptible(void *word)
+static int nfs_wait_bit_killable(void *word)
{
int ret = 0;
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
ret = -ERESTARTSYS;
else
schedule();
@@ -192,26 +191,18 @@ static int nfs_wait_bit_interruptible(void *word)
* nfs_wait_on_request - Wait for a request to complete.
* @req: request to wait upon.
*
- * Interruptible by signals only if mounted with intr flag.
+ * Interruptible by fatal signals only.
* The user is responsible for holding a count on the request.
*/
int
nfs_wait_on_request(struct nfs_page *req)
{
- struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
- sigset_t oldmask;
int ret = 0;
if (!test_bit(PG_BUSY, &req->wb_flags))
goto out;
- /*
- * Note: the call to rpc_clnt_sigmask() suffices to ensure that we
- * are not interrupted if intr flag is not set
- */
- rpc_clnt_sigmask(clnt, &oldmask);
ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
- nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE);
- rpc_clnt_sigunmask(clnt, &oldmask);
+ nfs_wait_bit_killable, TASK_KILLABLE);
out:
return ret;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 8fd6dfbe1bc3..3d7d9631e125 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -79,7 +79,7 @@ void nfs_readdata_release(void *data)
static
int nfs_return_empty_page(struct page *page)
{
- zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+ zero_user(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
unlock_page(page);
return 0;
@@ -103,10 +103,10 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
pglen = PAGE_CACHE_SIZE - base;
for (;;) {
if (remainder <= pglen) {
- zero_user_page(*pages, base, remainder, KM_USER0);
+ zero_user(*pages, base, remainder);
break;
}
- zero_user_page(*pages, base, pglen, KM_USER0);
+ zero_user(*pages, base, pglen);
pages++;
remainder -= pglen;
pglen = PAGE_CACHE_SIZE;
@@ -130,7 +130,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
return PTR_ERR(new);
}
if (len < PAGE_CACHE_SIZE)
- zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
+ zero_user_segment(page, len, PAGE_CACHE_SIZE);
nfs_list_add_request(new, &one_request);
if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
@@ -532,7 +532,7 @@ readpage_async_filler(void *data, struct page *page)
goto out_error;
if (len < PAGE_CACHE_SIZE)
- zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
+ zero_user_segment(page, len, PAGE_CACHE_SIZE);
nfs_pageio_add_request(desc->pgio, new);
return 0;
out_error:
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 22c49c02897d..7f4505f6ac6f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -448,7 +448,6 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
const char *nostr;
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", ",hard" },
- { NFS_MOUNT_INTR, ",intr", ",nointr" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
{ NFS_MOUNT_NONLM, ",nolock", "" },
@@ -708,10 +707,7 @@ static int nfs_parse_mount_options(char *raw,
mnt->flags &= ~NFS_MOUNT_SOFT;
break;
case Opt_intr:
- mnt->flags |= NFS_MOUNT_INTR;
- break;
case Opt_nointr:
- mnt->flags &= ~NFS_MOUNT_INTR;
break;
case Opt_posix:
mnt->flags |= NFS_MOUNT_POSIX;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5ac5b27b639a..b144b1957dd9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -488,7 +488,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
/*
* Wait for a request to complete.
*
- * Interruptible by signals only if mounted with intr flag.
+ * Interruptible by fatal signals only.
*/
static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
{
@@ -665,9 +665,7 @@ zero_page:
* then we need to zero any uninitalised data. */
if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE
&& !PageUptodate(req->wb_page))
- zero_user_page(req->wb_page, req->wb_bytes,
- PAGE_CACHE_SIZE - req->wb_bytes,
- KM_USER0);
+ zero_user_segment(req->wb_page, req->wb_bytes, PAGE_CACHE_SIZE);
return req;
}
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 21928056e35e..d13403e33622 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -11,8 +11,6 @@
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/export.h>
-#define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE))
-
int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
{
struct exp_flavor_info *f;
@@ -69,10 +67,12 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
ret = set_current_groups(cred.cr_group_info);
put_group_info(cred.cr_group_info);
if ((cred.cr_uid)) {
- cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
+ current->cap_effective =
+ cap_drop_nfsd_set(current->cap_effective);
} else {
- cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
- current->cap_permitted);
+ current->cap_effective =
+ cap_raise_nfsd_set(current->cap_effective,
+ current->cap_permitted);
}
return ret;
}
diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h
new file mode 100644
index 000000000000..78b3c0e93822
--- /dev/null
+++ b/fs/nfsd/auth.h
@@ -0,0 +1,22 @@
+/*
+ * nfsd-specific authentication stuff.
+ * uid/gid mapping not yet implemented.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef LINUX_NFSD_AUTH_H
+#define LINUX_NFSD_AUTH_H
+
+#define nfsd_luid(rq, uid) ((u32)(uid))
+#define nfsd_lgid(rq, gid) ((u32)(gid))
+#define nfsd_ruid(rq, uid) ((u32)(uid))
+#define nfsd_rgid(rq, gid) ((u32)(gid))
+
+/*
+ * Set the current process's fsuid/fsgid etc to those of the NFS
+ * client user
+ */
+int nfsd_setuser(struct svc_rqst *, struct svc_export *);
+
+#endif /* LINUX_NFSD_AUTH_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 66d0aeb32a47..79b4bf812960 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1357,8 +1357,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
- if (PTR_ERR(exp) == -ENOENT)
- return nfserr_perm;
if (IS_ERR(exp))
return nfserrno(PTR_ERR(exp));
rv = fh_compose(fhp, exp, exp->ex_dentry, NULL);
@@ -1637,13 +1635,19 @@ exp_verify_string(char *cp, int max)
/*
* Initialize the exports module.
*/
-void
+int
nfsd_export_init(void)
{
+ int rv;
dprintk("nfsd: initializing export module.\n");
- cache_register(&svc_export_cache);
- cache_register(&svc_expkey_cache);
+ rv = cache_register(&svc_export_cache);
+ if (rv)
+ return rv;
+ rv = cache_register(&svc_expkey_cache);
+ if (rv)
+ cache_unregister(&svc_export_cache);
+ return rv;
}
@@ -1670,10 +1674,8 @@ nfsd_export_shutdown(void)
exp_writelock();
- if (cache_unregister(&svc_expkey_cache))
- printk(KERN_ERR "nfsd: failed to unregister expkey cache\n");
- if (cache_unregister(&svc_export_cache))
- printk(KERN_ERR "nfsd: failed to unregister export cache\n");
+ cache_unregister(&svc_expkey_cache);
+ cache_unregister(&svc_export_cache);
svcauth_unix_purge();
exp_writeunlock();
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 0e5fa11e6b44..1c3b7654e966 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -221,12 +221,17 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_getaclres *resp)
{
struct dentry *dentry = resp->fh.fh_dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode;
struct kvec *head = rqstp->rq_res.head;
unsigned int base;
int n;
int w;
+ /*
+ * Since this is version 2, the check for nfserr in
+ * nfsd_dispatch actually ensures the following cannot happen.
+ * However, it seems fragile to depend on that.
+ */
if (dentry == NULL || dentry->d_inode == NULL)
return 0;
inode = dentry->d_inode;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index f917fd25858a..d7647f70e02b 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -21,6 +21,7 @@
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/xdr3.h>
+#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_XDR
@@ -88,10 +89,10 @@ encode_fh(__be32 *p, struct svc_fh *fhp)
* no slashes or null bytes.
*/
static __be32 *
-decode_filename(__be32 *p, char **namp, int *lenp)
+decode_filename(__be32 *p, char **namp, unsigned int *lenp)
{
char *name;
- int i;
+ unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) {
@@ -452,8 +453,7 @@ int
nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_symlinkargs *args)
{
- unsigned int len;
- int avail;
+ unsigned int len, avail;
char *old, *new;
struct kvec *vec;
@@ -486,7 +486,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
/* now copy next page if there is one */
if (len && !avail && rqstp->rq_arg.page_len) {
avail = rqstp->rq_arg.page_len;
- if (avail > PAGE_SIZE) avail = PAGE_SIZE;
+ if (avail > PAGE_SIZE)
+ avail = PAGE_SIZE;
old = page_address(rqstp->rq_arg.pages[0]);
}
while (len && avail && *old) {
@@ -816,11 +817,11 @@ static __be32 *
encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
struct svc_fh *fhp)
{
- p = encode_post_op_attr(cd->rqstp, p, fhp);
- *p++ = xdr_one; /* yes, a file handle follows */
- p = encode_fh(p, fhp);
- fh_put(fhp);
- return p;
+ p = encode_post_op_attr(cd->rqstp, p, fhp);
+ *p++ = xdr_one; /* yes, a file handle follows */
+ p = encode_fh(p, fhp);
+ fh_put(fhp);
+ return p;
}
static int
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 9d536a8cb379..aae2b29ae2c9 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -350,30 +350,6 @@ static struct rpc_version * nfs_cb_version[] = {
static int do_probe_callback(void *data)
{
struct nfs4_client *clp = data;
- struct nfs4_callback *cb = &clp->cl_callback;
- struct rpc_message msg = {
- .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
- .rpc_argp = clp,
- };
- int status;
-
- status = rpc_call_sync(cb->cb_client, &msg, RPC_TASK_SOFT);
-
- if (status) {
- rpc_shutdown_client(cb->cb_client);
- cb->cb_client = NULL;
- } else
- atomic_set(&cb->cb_set, 1);
- put_nfs4_client(clp);
- return 0;
-}
-
-/*
- * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
- */
-void
-nfsd4_probe_callback(struct nfs4_client *clp)
-{
struct sockaddr_in addr;
struct nfs4_callback *cb = &clp->cl_callback;
struct rpc_timeout timeparms = {
@@ -390,13 +366,15 @@ nfsd4_probe_callback(struct nfs4_client *clp)
.timeout = &timeparms,
.program = program,
.version = nfs_cb_version[1]->number,
- .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
+ .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
.flags = (RPC_CLNT_CREATE_NOPING),
};
- struct task_struct *t;
-
- if (atomic_read(&cb->cb_set))
- return;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
+ .rpc_argp = clp,
+ };
+ struct rpc_clnt *client;
+ int status;
/* Initialize address */
memset(&addr, 0, sizeof(addr));
@@ -416,29 +394,50 @@ nfsd4_probe_callback(struct nfs4_client *clp)
program->stats->program = program;
/* Create RPC client */
- cb->cb_client = rpc_create(&args);
- if (IS_ERR(cb->cb_client)) {
+ client = rpc_create(&args);
+ if (IS_ERR(client)) {
dprintk("NFSD: couldn't create callback client\n");
+ status = PTR_ERR(client);
goto out_err;
}
+ status = rpc_call_sync(client, &msg, RPC_TASK_SOFT);
+
+ if (status)
+ goto out_release_client;
+
+ cb->cb_client = client;
+ atomic_set(&cb->cb_set, 1);
+ put_nfs4_client(clp);
+ return 0;
+out_release_client:
+ rpc_shutdown_client(client);
+out_err:
+ put_nfs4_client(clp);
+ dprintk("NFSD: warning: no callback path to client %.*s\n",
+ (int)clp->cl_name.len, clp->cl_name.data);
+ return status;
+}
+
+/*
+ * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
+ */
+void
+nfsd4_probe_callback(struct nfs4_client *clp)
+{
+ struct task_struct *t;
+
+ BUG_ON(atomic_read(&clp->cl_callback.cb_set));
+
/* the task holds a reference to the nfs4_client struct */
atomic_inc(&clp->cl_count);
t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe");
if (IS_ERR(t))
- goto out_release_clp;
+ atomic_dec(&clp->cl_count);
return;
-
-out_release_clp:
- atomic_dec(&clp->cl_count);
- rpc_shutdown_client(cb->cb_client);
-out_err:
- cb->cb_client = NULL;
- dprintk("NFSD: warning: no callback path to client %.*s\n",
- (int)clp->cl_name.len, clp->cl_name.data);
}
/*
@@ -458,9 +457,6 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
int retries = 1;
int status = 0;
- if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt)
- return;
-
cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */
cbr->cbr_dp = dp;
@@ -469,6 +465,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
switch (status) {
case -EIO:
/* Network partition? */
+ atomic_set(&clp->cl_callback.cb_set, 0);
case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID:
/* Race: client probably got cb_recall
@@ -481,11 +478,10 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
}
out_put_cred:
- if (status == -EIO)
- atomic_set(&clp->cl_callback.cb_set, 0);
- /* Success or failure, now we're either waiting for lease expiration
- * or deleg_return. */
- dprintk("NFSD: nfs4_cb_recall: dp %p dl_flock %p dl_count %d\n",dp, dp->dl_flock, atomic_read(&dp->dl_count));
+ /*
+ * Success or failure, now we're either waiting for lease expiration
+ * or deleg_return.
+ */
put_nfs4_client(clp);
nfs4_put_delegation(dp);
return;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 4c0c683ce07a..996bd88b75ba 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -255,13 +255,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
goto out;
if (len == 0)
set_bit(CACHE_NEGATIVE, &ent.h.flags);
- else {
- if (error >= IDMAP_NAMESZ) {
- error = -EINVAL;
- goto out;
- }
+ else if (len >= IDMAP_NAMESZ)
+ goto out;
+ else
memcpy(ent.name, buf1, sizeof(ent.name));
- }
error = -ENOMEM;
res = idtoname_update(&ent, res);
if (res == NULL)
@@ -467,20 +464,25 @@ nametoid_update(struct ent *new, struct ent *old)
* Exported API
*/
-void
+int
nfsd_idmap_init(void)
{
- cache_register(&idtoname_cache);
- cache_register(&nametoid_cache);
+ int rv;
+
+ rv = cache_register(&idtoname_cache);
+ if (rv)
+ return rv;
+ rv = cache_register(&nametoid_cache);
+ if (rv)
+ cache_unregister(&idtoname_cache);
+ return rv;
}
void
nfsd_idmap_shutdown(void)
{
- if (cache_unregister(&idtoname_cache))
- printk(KERN_ERR "nfsd: failed to unregister idtoname cache\n");
- if (cache_unregister(&nametoid_cache))
- printk(KERN_ERR "nfsd: failed to unregister nametoid cache\n");
+ cache_unregister(&idtoname_cache);
+ cache_unregister(&nametoid_cache);
}
/*
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 18ead1790bb3..c593db047d8b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -750,7 +750,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->current_fh.fh_export,
cstate->current_fh.fh_dentry, buf,
&count, verify->ve_bmval,
- rqstp);
+ rqstp, 0);
/* this means that nfsd4_encode_fattr() ran out of space */
if (status == nfserr_resource && count == 0)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 31673cd251c3..f6744bc03dae 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -61,7 +61,6 @@ static time_t lease_time = 90; /* default lease time */
static time_t user_lease_time = 90;
static time_t boot_time;
static int in_grace = 1;
-static u32 current_clientid = 1;
static u32 current_ownerid = 1;
static u32 current_fileid = 1;
static u32 current_delegid = 1;
@@ -340,21 +339,20 @@ STALE_CLIENTID(clientid_t *clid)
* This type of memory management is somewhat inefficient, but we use it
* anyway since SETCLIENTID is not a common operation.
*/
-static inline struct nfs4_client *
-alloc_client(struct xdr_netobj name)
+static struct nfs4_client *alloc_client(struct xdr_netobj name)
{
struct nfs4_client *clp;
- if ((clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) {
- if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) {
- memcpy(clp->cl_name.data, name.data, name.len);
- clp->cl_name.len = name.len;
- }
- else {
- kfree(clp);
- clp = NULL;
- }
+ clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
+ if (clp == NULL)
+ return NULL;
+ clp->cl_name.data = kmalloc(name.len, GFP_KERNEL);
+ if (clp->cl_name.data == NULL) {
+ kfree(clp);
+ return NULL;
}
+ memcpy(clp->cl_name.data, name.data, name.len);
+ clp->cl_name.len = name.len;
return clp;
}
@@ -363,8 +361,11 @@ shutdown_callback_client(struct nfs4_client *clp)
{
struct rpc_clnt *clnt = clp->cl_callback.cb_client;
- /* shutdown rpc client, ending any outstanding recall rpcs */
if (clnt) {
+ /*
+ * Callback threads take a reference on the client, so there
+ * should be no outstanding callbacks at this point.
+ */
clp->cl_callback.cb_client = NULL;
rpc_shutdown_client(clnt);
}
@@ -422,12 +423,13 @@ expire_client(struct nfs4_client *clp)
put_nfs4_client(clp);
}
-static struct nfs4_client *
-create_client(struct xdr_netobj name, char *recdir) {
+static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
+{
struct nfs4_client *clp;
- if (!(clp = alloc_client(name)))
- goto out;
+ clp = alloc_client(name);
+ if (clp == NULL)
+ return NULL;
memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
atomic_set(&clp->cl_count, 1);
atomic_set(&clp->cl_callback.cb_set, 0);
@@ -436,32 +438,30 @@ create_client(struct xdr_netobj name, char *recdir) {
INIT_LIST_HEAD(&clp->cl_openowners);
INIT_LIST_HEAD(&clp->cl_delegations);
INIT_LIST_HEAD(&clp->cl_lru);
-out:
return clp;
}
-static void
-copy_verf(struct nfs4_client *target, nfs4_verifier *source) {
- memcpy(target->cl_verifier.data, source->data, sizeof(target->cl_verifier.data));
+static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
+{
+ memcpy(target->cl_verifier.data, source->data,
+ sizeof(target->cl_verifier.data));
}
-static void
-copy_clid(struct nfs4_client *target, struct nfs4_client *source) {
+static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
+{
target->cl_clientid.cl_boot = source->cl_clientid.cl_boot;
target->cl_clientid.cl_id = source->cl_clientid.cl_id;
}
-static void
-copy_cred(struct svc_cred *target, struct svc_cred *source) {
-
+static void copy_cred(struct svc_cred *target, struct svc_cred *source)
+{
target->cr_uid = source->cr_uid;
target->cr_gid = source->cr_gid;
target->cr_group_info = source->cr_group_info;
get_group_info(target->cr_group_info);
}
-static inline int
-same_name(const char *n1, const char *n2)
+static int same_name(const char *n1, const char *n2)
{
return 0 == memcmp(n1, n2, HEXDIR_LEN);
}
@@ -485,26 +485,26 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
return cr1->cr_uid == cr2->cr_uid;
}
-static void
-gen_clid(struct nfs4_client *clp) {
+static void gen_clid(struct nfs4_client *clp)
+{
+ static u32 current_clientid = 1;
+
clp->cl_clientid.cl_boot = boot_time;
clp->cl_clientid.cl_id = current_clientid++;
}
-static void
-gen_confirm(struct nfs4_client *clp) {
- struct timespec tv;
- u32 * p;
+static void gen_confirm(struct nfs4_client *clp)
+{
+ static u32 i;
+ u32 *p;
- tv = CURRENT_TIME;
p = (u32 *)clp->cl_confirm.data;
- *p++ = tv.tv_sec;
- *p++ = tv.tv_nsec;
+ *p++ = get_seconds();
+ *p++ = i++;
}
-static int
-check_name(struct xdr_netobj name) {
-
+static int check_name(struct xdr_netobj name)
+{
if (name.len == 0)
return 0;
if (name.len > NFS4_OPAQUE_LIMIT) {
@@ -683,39 +683,6 @@ out_err:
return;
}
-/*
- * RFC 3010 has a complex implmentation description of processing a
- * SETCLIENTID request consisting of 5 bullets, labeled as
- * CASE0 - CASE4 below.
- *
- * NOTES:
- * callback information will be processed in a future patch
- *
- * an unconfirmed record is added when:
- * NORMAL (part of CASE 4): there is no confirmed nor unconfirmed record.
- * CASE 1: confirmed record found with matching name, principal,
- * verifier, and clientid.
- * CASE 2: confirmed record found with matching name, principal,
- * and there is no unconfirmed record with matching
- * name and principal
- *
- * an unconfirmed record is replaced when:
- * CASE 3: confirmed record found with matching name, principal,
- * and an unconfirmed record is found with matching
- * name, principal, and with clientid and
- * confirm that does not match the confirmed record.
- * CASE 4: there is no confirmed record with matching name and
- * principal. there is an unconfirmed record with
- * matching name, principal.
- *
- * an unconfirmed record is deleted when:
- * CASE 1: an unconfirmed record that matches input name, verifier,
- * and confirmed clientid.
- * CASE 4: any unconfirmed records with matching name and principal
- * that exist after an unconfirmed record has been replaced
- * as described above.
- *
- */
__be32
nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_setclientid *setclid)
@@ -748,11 +715,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfs4_lock_state();
conf = find_confirmed_client_by_str(dname, strhashval);
if (conf) {
- /*
- * CASE 0:
- * clname match, confirmed, different principal
- * or different ip_address
- */
+ /* RFC 3530 14.2.33 CASE 0: */
status = nfserr_clid_inuse;
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)
|| conf->cl_addr != sin->sin_addr.s_addr) {
@@ -761,12 +724,17 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
}
+ /*
+ * section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION")
+ * has a description of SETCLIENTID request processing consisting
+ * of 5 bullet points, labeled as CASE0 - CASE4 below.
+ */
unconf = find_unconfirmed_client_by_str(dname, strhashval);
status = nfserr_resource;
if (!conf) {
- /*
- * CASE 4:
- * placed first, because it is the normal case.
+ /*
+ * RFC 3530 14.2.33 CASE 4:
+ * placed first, because it is the normal case
*/
if (unconf)
expire_client(unconf);
@@ -776,17 +744,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
gen_clid(new);
} else if (same_verf(&conf->cl_verifier, &clverifier)) {
/*
- * CASE 1:
- * cl_name match, confirmed, principal match
- * verifier match: probable callback update
- *
- * remove any unconfirmed nfs4_client with
- * matching cl_name, cl_verifier, and cl_clientid
- *
- * create and insert an unconfirmed nfs4_client with same
- * cl_name, cl_verifier, and cl_clientid as existing
- * nfs4_client, but with the new callback info and a
- * new cl_confirm
+ * RFC 3530 14.2.33 CASE 1:
+ * probable callback update
*/
if (unconf) {
/* Note this is removing unconfirmed {*x***},
@@ -802,43 +761,25 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
copy_clid(new, conf);
} else if (!unconf) {
/*
- * CASE 2:
- * clname match, confirmed, principal match
- * verfier does not match
- * no unconfirmed. create a new unconfirmed nfs4_client
- * using input clverifier, clname, and callback info
- * and generate a new cl_clientid and cl_confirm.
+ * RFC 3530 14.2.33 CASE 2:
+ * probable client reboot; state will be removed if
+ * confirmed.
*/
new = create_client(clname, dname);
if (new == NULL)
goto out;
gen_clid(new);
- } else if (!same_verf(&conf->cl_confirm, &unconf->cl_confirm)) {
- /*
- * CASE3:
- * confirmed found (name, principal match)
- * confirmed verifier does not match input clverifier
- *
- * unconfirmed found (name match)
- * confirmed->cl_confirm != unconfirmed->cl_confirm
- *
- * remove unconfirmed.
- *
- * create an unconfirmed nfs4_client
- * with same cl_name as existing confirmed nfs4_client,
- * but with new callback info, new cl_clientid,
- * new cl_verifier and a new cl_confirm
+ } else {
+ /*
+ * RFC 3530 14.2.33 CASE 3:
+ * probable client reboot; state will be removed if
+ * confirmed.
*/
expire_client(unconf);
new = create_client(clname, dname);
if (new == NULL)
goto out;
gen_clid(new);
- } else {
- /* No cases hit !!! */
- status = nfserr_inval;
- goto out;
-
}
copy_verf(new, &clverifier);
new->cl_addr = sin->sin_addr.s_addr;
@@ -857,11 +798,9 @@ out:
/*
- * RFC 3010 has a complex implmentation description of processing a
- * SETCLIENTID_CONFIRM request consisting of 4 bullets describing
- * processing on a DRC miss, labeled as CASE1 - CASE4 below.
- *
- * NOTE: callback information will be processed here in a future patch
+ * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has
+ * a description of SETCLIENTID_CONFIRM request processing consisting of 4
+ * bullets, labeled as CASE1 - CASE4 below.
*/
__be32
nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
@@ -892,16 +831,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
if (unconf && unconf->cl_addr != sin->sin_addr.s_addr)
goto out;
- if ((conf && unconf) &&
- (same_verf(&unconf->cl_confirm, &confirm)) &&
- (same_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
- (same_name(conf->cl_recdir,unconf->cl_recdir)) &&
- (!same_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
- /* CASE 1:
- * unconf record that matches input clientid and input confirm.
- * conf record that matches input clientid.
- * conf and unconf records match names, verifiers
- */
+ /*
+ * section 14.2.34 of RFC 3530 has a description of
+ * SETCLIENTID_CONFIRM request processing consisting
+ * of 4 bullet points, labeled as CASE1 - CASE4 below.
+ */
+ if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) {
+ /*
+ * RFC 3530 14.2.34 CASE 1:
+ * callback update
+ */
if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
status = nfserr_clid_inuse;
else {
@@ -914,15 +853,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
status = nfs_ok;
}
- } else if ((conf && !unconf) ||
- ((conf && unconf) &&
- (!same_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
- !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
- /* CASE 2:
- * conf record that matches input clientid.
- * if unconf record matches input clientid, then
- * unconf->cl_name or unconf->cl_verifier don't match the
- * conf record.
+ } else if (conf && !unconf) {
+ /*
+ * RFC 3530 14.2.34 CASE 2:
+ * probable retransmitted request; play it safe and
+ * do nothing.
*/
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred))
status = nfserr_clid_inuse;
@@ -930,10 +865,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
status = nfs_ok;
} else if (!conf && unconf
&& same_verf(&unconf->cl_confirm, &confirm)) {
- /* CASE 3:
- * conf record not found.
- * unconf record found.
- * unconf->cl_confirm matches input confirm
+ /*
+ * RFC 3530 14.2.34 CASE 3:
+ * Normal case; new or rebooted client:
*/
if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
status = nfserr_clid_inuse;
@@ -948,16 +882,15 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
}
move_to_confirmed(unconf);
conf = unconf;
+ nfsd4_probe_callback(conf);
status = nfs_ok;
}
} else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
&& (!unconf || (unconf && !same_verf(&unconf->cl_confirm,
&confirm)))) {
- /* CASE 4:
- * conf record not found, or if conf, conf->cl_confirm does not
- * match input confirm.
- * unconf record not found, or if unconf, unconf->cl_confirm
- * does not match input confirm.
+ /*
+ * RFC 3530 14.2.34 CASE 4:
+ * Client probably hasn't noticed that we rebooted yet.
*/
status = nfserr_stale_clientid;
} else {
@@ -965,8 +898,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
status = nfserr_clid_inuse;
}
out:
- if (!status)
- nfsd4_probe_callback(conf);
nfs4_unlock_state();
return status;
}
@@ -1226,14 +1157,19 @@ find_file(struct inode *ino)
return NULL;
}
-static int access_valid(u32 x)
+static inline int access_valid(u32 x)
{
- return (x > 0 && x < 4);
+ if (x < NFS4_SHARE_ACCESS_READ)
+ return 0;
+ if (x > NFS4_SHARE_ACCESS_BOTH)
+ return 0;
+ return 1;
}
-static int deny_valid(u32 x)
+static inline int deny_valid(u32 x)
{
- return (x >= 0 && x < 5);
+ /* Note: unlike access bits, deny bits may be zero. */
+ return x <= NFS4_SHARE_DENY_BOTH;
}
static void
@@ -2162,8 +2098,10 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
goto check_replay;
}
+ *stpp = stp;
+ *sopp = sop = stp->st_stateowner;
+
if (lock) {
- struct nfs4_stateowner *sop = stp->st_stateowner;
clientid_t *lockclid = &lock->v.new.clientid;
struct nfs4_client *clp = sop->so_client;
int lkflg = 0;
@@ -2193,9 +2131,6 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
return nfserr_bad_stateid;
}
- *stpp = stp;
- *sopp = sop = stp->st_stateowner;
-
/*
* We now validate the seqid and stateid generation numbers.
* For the moment, we ignore the possibility of
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 57333944af7f..b0592e7c378d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -148,12 +148,12 @@ xdr_error: \
} \
} while (0)
-static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
+static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
{
/* We want more bytes than seem to be available.
* Maybe we need a new page, maybe we have just run out
*/
- int avail = (char*)argp->end - (char*)argp->p;
+ unsigned int avail = (char *)argp->end - (char *)argp->p;
__be32 *p;
if (avail + argp->pagelen < nbytes)
return NULL;
@@ -169,6 +169,11 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
return NULL;
}
+ /*
+ * The following memcpy is safe because read_buf is always
+ * called with nbytes > avail, and the two cases above both
+ * guarantee p points to at least nbytes bytes.
+ */
memcpy(p, argp->p, avail);
/* step to next page */
argp->p = page_address(argp->pagelist[0]);
@@ -1448,7 +1453,7 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
__be32
nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval,
- struct svc_rqst *rqstp)
+ struct svc_rqst *rqstp, int ignore_crossmnt)
{
u32 bmval0 = bmval[0];
u32 bmval1 = bmval[1];
@@ -1828,7 +1833,12 @@ out_acl:
if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
if ((buflen -= 8) < 0)
goto out_resource;
- if (exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) {
+ /*
+ * Get parent's attributes if not ignoring crossmount
+ * and this is the root of a cross-mounted filesystem.
+ */
+ if (ignore_crossmnt == 0 &&
+ exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) {
err = vfs_getattr(exp->ex_mnt->mnt_parent,
exp->ex_mnt->mnt_mountpoint, &stat);
if (err)
@@ -1864,13 +1874,25 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
struct svc_export *exp = cd->rd_fhp->fh_export;
struct dentry *dentry;
__be32 nfserr;
+ int ignore_crossmnt = 0;
dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
exp_get(exp);
- if (d_mountpoint(dentry)) {
+ /*
+ * In the case of a mountpoint, the client may be asking for
+ * attributes that are only properties of the underlying filesystem
+ * as opposed to the cross-mounted file system. In such a case,
+ * we will not follow the cross mount and will fill the attribtutes
+ * directly from the mountpoint dentry.
+ */
+ if (d_mountpoint(dentry) &&
+ (cd->rd_bmval[0] & ~FATTR4_WORD0_RDATTR_ERROR) == 0 &&
+ (cd->rd_bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) == 0)
+ ignore_crossmnt = 1;
+ else if (d_mountpoint(dentry)) {
int err;
/*
@@ -1889,7 +1911,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
}
nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
- cd->rd_rqstp);
+ cd->rd_rqstp, ignore_crossmnt);
out_put:
dput(dentry);
exp_put(exp);
@@ -2043,7 +2065,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2);
nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
resp->p, &buflen, getattr->ga_bmval,
- resp->rqstp);
+ resp->rqstp, 0);
if (!nfserr)
resp->p += buflen;
return nfserr;
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 578f2c9d56be..5bfc2ac60d54 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -44,17 +44,17 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
*/
static DEFINE_SPINLOCK(cache_lock);
-void
-nfsd_cache_init(void)
+int nfsd_reply_cache_init(void)
{
struct svc_cacherep *rp;
int i;
INIT_LIST_HEAD(&lru_head);
i = CACHESIZE;
- while(i) {
+ while (i) {
rp = kmalloc(sizeof(*rp), GFP_KERNEL);
- if (!rp) break;
+ if (!rp)
+ goto out_nomem;
list_add(&rp->c_lru, &lru_head);
rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE;
@@ -62,23 +62,19 @@ nfsd_cache_init(void)
i--;
}
- if (i)
- printk (KERN_ERR "nfsd: cannot allocate all %d cache entries, only got %d\n",
- CACHESIZE, CACHESIZE-i);
-
hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
- if (!hash_list) {
- nfsd_cache_shutdown();
- printk (KERN_ERR "nfsd: cannot allocate %Zd bytes for hash list\n",
- HASHSIZE * sizeof(struct hlist_head));
- return;
- }
+ if (!hash_list)
+ goto out_nomem;
cache_disabled = 0;
+ return 0;
+out_nomem:
+ printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
+ nfsd_reply_cache_shutdown();
+ return -ENOMEM;
}
-void
-nfsd_cache_shutdown(void)
+void nfsd_reply_cache_shutdown(void)
{
struct svc_cacherep *rp;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 77dc9893b7ba..8516137cdbb0 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -304,6 +304,9 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
struct auth_domain *dom;
struct knfsd_fh fh;
+ if (size == 0)
+ return -EINVAL;
+
if (buf[size-1] != '\n')
return -EINVAL;
buf[size-1] = 0;
@@ -503,7 +506,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
int len = 0;
lock_kernel();
if (nfsd_serv)
- len = svc_sock_names(buf, nfsd_serv, NULL);
+ len = svc_xprt_names(nfsd_serv, buf, 0);
unlock_kernel();
return len;
}
@@ -540,7 +543,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
}
return err < 0 ? err : 0;
}
- if (buf[0] == '-') {
+ if (buf[0] == '-' && isdigit(buf[1])) {
char *toclose = kstrdup(buf+1, GFP_KERNEL);
int len = 0;
if (!toclose)
@@ -554,6 +557,53 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
kfree(toclose);
return len;
}
+ /*
+ * Add a transport listener by writing it's transport name
+ */
+ if (isalpha(buf[0])) {
+ int err;
+ char transport[16];
+ int port;
+ if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
+ err = nfsd_create_serv();
+ if (!err) {
+ err = svc_create_xprt(nfsd_serv,
+ transport, port,
+ SVC_SOCK_ANONYMOUS);
+ if (err == -ENOENT)
+ /* Give a reasonable perror msg for
+ * bad transport string */
+ err = -EPROTONOSUPPORT;
+ }
+ return err < 0 ? err : 0;
+ }
+ }
+ /*
+ * Remove a transport by writing it's transport name and port number
+ */
+ if (buf[0] == '-' && isalpha(buf[1])) {
+ struct svc_xprt *xprt;
+ int err = -EINVAL;
+ char transport[16];
+ int port;
+ if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
+ if (port == 0)
+ return -EINVAL;
+ lock_kernel();
+ if (nfsd_serv) {
+ xprt = svc_find_xprt(nfsd_serv, transport,
+ AF_UNSPEC, port);
+ if (xprt) {
+ svc_close_xprt(xprt);
+ svc_xprt_put(xprt);
+ err = 0;
+ } else
+ err = -ENOTCONN;
+ }
+ unlock_kernel();
+ return err < 0 ? err : 0;
+ }
+ }
return -EINVAL;
}
@@ -616,7 +666,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
char *recdir;
int len, status;
- if (size > PATH_MAX || buf[size-1] != '\n')
+ if (size == 0 || size > PATH_MAX || buf[size-1] != '\n')
return -EINVAL;
buf[size-1] = 0;
@@ -674,6 +724,27 @@ static struct file_system_type nfsd_fs_type = {
.kill_sb = kill_litter_super,
};
+#ifdef CONFIG_PROC_FS
+static int create_proc_exports_entry(void)
+{
+ struct proc_dir_entry *entry;
+
+ entry = proc_mkdir("fs/nfs", NULL);
+ if (!entry)
+ return -ENOMEM;
+ entry = create_proc_entry("fs/nfs/exports", 0, NULL);
+ if (!entry)
+ return -ENOMEM;
+ entry->proc_fops = &exports_operations;
+ return 0;
+}
+#else /* CONFIG_PROC_FS */
+static int create_proc_exports_entry(void)
+{
+ return 0;
+}
+#endif
+
static int __init init_nfsd(void)
{
int retval;
@@ -683,32 +754,43 @@ static int __init init_nfsd(void)
if (retval)
return retval;
nfsd_stat_init(); /* Statistics */
- nfsd_cache_init(); /* RPC reply cache */
- nfsd_export_init(); /* Exports table */
+ retval = nfsd_reply_cache_init();
+ if (retval)
+ goto out_free_stat;
+ retval = nfsd_export_init();
+ if (retval)
+ goto out_free_cache;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
- nfsd_idmap_init(); /* Name to ID mapping */
- if (proc_mkdir("fs/nfs", NULL)) {
- struct proc_dir_entry *entry;
- entry = create_proc_entry("fs/nfs/exports", 0, NULL);
- if (entry)
- entry->proc_fops = &exports_operations;
- }
+ retval = nfsd_idmap_init();
+ if (retval)
+ goto out_free_lockd;
+ retval = create_proc_exports_entry();
+ if (retval)
+ goto out_free_idmap;
retval = register_filesystem(&nfsd_fs_type);
- if (retval) {
- nfsd_export_shutdown();
- nfsd_cache_shutdown();
- remove_proc_entry("fs/nfs/exports", NULL);
- remove_proc_entry("fs/nfs", NULL);
- nfsd_stat_shutdown();
- nfsd_lockd_shutdown();
- }
+ if (retval)
+ goto out_free_all;
+ return 0;
+out_free_all:
+ remove_proc_entry("fs/nfs/exports", NULL);
+ remove_proc_entry("fs/nfs", NULL);
+out_free_idmap:
+ nfsd_idmap_shutdown();
+out_free_lockd:
+ nfsd_lockd_shutdown();
+ nfsd_export_shutdown();
+out_free_cache:
+ nfsd_reply_cache_shutdown();
+out_free_stat:
+ nfsd_stat_shutdown();
+ nfsd4_free_slabs();
return retval;
}
static void __exit exit_nfsd(void)
{
nfsd_export_shutdown();
- nfsd_cache_shutdown();
+ nfsd_reply_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 468f17a78441..8fbd2dc08a92 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -22,6 +22,7 @@
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/nfsd/nfsd.h>
+#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_FH
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 1190aeaa92be..9647b0f7bc0c 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -155,8 +155,8 @@ static int killsig; /* signal that was used to kill last nfsd */
static void nfsd_last_thread(struct svc_serv *serv)
{
/* When last nfsd thread exits we need to do some clean-up */
- struct svc_sock *svsk;
- list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
+ struct svc_xprt *xprt;
+ list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
lockd_down();
nfsd_serv = NULL;
nfsd_racache_shutdown();
@@ -236,7 +236,7 @@ static int nfsd_init_socks(int port)
error = lockd_up(IPPROTO_UDP);
if (error >= 0) {
- error = svc_makesock(nfsd_serv, IPPROTO_UDP, port,
+ error = svc_create_xprt(nfsd_serv, "udp", port,
SVC_SOCK_DEFAULTS);
if (error < 0)
lockd_down();
@@ -247,7 +247,7 @@ static int nfsd_init_socks(int port)
#ifdef CONFIG_NFSD_TCP
error = lockd_up(IPPROTO_TCP);
if (error >= 0) {
- error = svc_makesock(nfsd_serv, IPPROTO_TCP, port,
+ error = svc_create_xprt(nfsd_serv, "tcp", port,
SVC_SOCK_DEFAULTS);
if (error < 0)
lockd_down();
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index b86e3658a0af..61ad61743d94 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -15,6 +15,7 @@
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/xdr.h>
#include <linux/mm.h>
+#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_XDR
@@ -62,10 +63,10 @@ encode_fh(__be32 *p, struct svc_fh *fhp)
* no slashes or null bytes.
*/
static __be32 *
-decode_filename(__be32 *p, char **namp, int *lenp)
+decode_filename(__be32 *p, char **namp, unsigned int *lenp)
{
char *name;
- int i;
+ unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) {
@@ -78,10 +79,10 @@ decode_filename(__be32 *p, char **namp, int *lenp)
}
static __be32 *
-decode_pathname(__be32 *p, char **namp, int *lenp)
+decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
{
char *name;
- int i;
+ unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d0199189924c..cc75e4fcd02b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -132,7 +132,7 @@ out:
__be32
nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
- const char *name, int len,
+ const char *name, unsigned int len,
struct svc_export **exp_ret, struct dentry **dentry_ret)
{
struct svc_export *exp;
@@ -226,7 +226,7 @@ out_nfserr:
*/
__be32
nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
- int len, struct svc_fh *resfh)
+ unsigned int len, struct svc_fh *resfh)
{
struct svc_export *exp;
struct dentry *dentry;
@@ -1151,6 +1151,26 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif /* CONFIG_NFSD_V3 */
+__be32
+nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
+ struct iattr *iap)
+{
+ /*
+ * Mode has already been set earlier in create:
+ */
+ iap->ia_valid &= ~ATTR_MODE;
+ /*
+ * Setting uid/gid works only for root. Irix appears to
+ * send along the gid on create when it tries to implement
+ * setgid directories via NFS:
+ */
+ if (current->fsuid != 0)
+ iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
+ if (iap->ia_valid)
+ return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ return 0;
+}
+
/*
* Create a file (regular, directory, device, fifo); UNIX sockets
* not yet implemented.
@@ -1167,6 +1187,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
+ __be32 err2;
int host_err;
err = nfserr_perm;
@@ -1257,16 +1278,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
- /* Set file attributes. Mode has already been set and
- * setting uid/gid works only for root. Irix appears to
- * send along the gid when it tries to implement setgid
- * directories via NFS.
- */
- if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
- __be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
- if (err2)
- err = err2;
- }
+ err2 = nfsd_create_setattr(rqstp, resfhp, iap);
+ if (err2)
+ err = err2;
/*
* Update the file handle to get the new inode info.
*/
@@ -1295,6 +1309,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
+ __be32 err2;
int host_err;
__u32 v_mtime=0, v_atime=0;
@@ -1399,16 +1414,10 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_atime.tv_nsec = 0;
}
- /* Set file attributes.
- * Irix appears to send along the gid when it tries to
- * implement setgid directories via NFS. Clear out all that cruft.
- */
set_attr:
- if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
- __be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
- if (err2)
- err = err2;
- }
+ err2 = nfsd_create_setattr(rqstp, resfhp, iap);
+ if (err2)
+ err = err2;
/*
* Update the filehandle to get the new inode info.
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index ad87cb01299b..00e9ccde8e42 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -87,13 +87,17 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
/* Check for the current buffer head overflowing. */
if (unlikely(file_ofs + bh->b_size > init_size)) {
int ofs;
+ void *kaddr;
ofs = 0;
if (file_ofs < init_size)
ofs = init_size - file_ofs;
local_irq_save(flags);
- zero_user_page(page, bh_offset(bh) + ofs,
- bh->b_size - ofs, KM_BIO_SRC_IRQ);
+ kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+ memset(kaddr + bh_offset(bh) + ofs, 0,
+ bh->b_size - ofs);
+ flush_dcache_page(page);
+ kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
local_irq_restore(flags);
}
} else {
@@ -334,7 +338,7 @@ handle_hole:
bh->b_blocknr = -1UL;
clear_buffer_mapped(bh);
handle_zblock:
- zero_user_page(page, i * blocksize, blocksize, KM_USER0);
+ zero_user(page, i * blocksize, blocksize);
if (likely(!err))
set_buffer_uptodate(bh);
} while (i++, iblock++, (bh = bh->b_this_page) != head);
@@ -410,7 +414,7 @@ retry_readpage:
/* Is the page fully outside i_size? (truncate in progress) */
if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT)) {
- zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+ zero_user(page, 0, PAGE_CACHE_SIZE);
ntfs_debug("Read outside i_size - truncated?");
goto done;
}
@@ -459,7 +463,7 @@ retry_readpage:
* ok to ignore the compressed flag here.
*/
if (unlikely(page->index > 0)) {
- zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+ zero_user(page, 0, PAGE_CACHE_SIZE);
goto done;
}
if (!NInoAttr(ni))
@@ -788,8 +792,7 @@ lock_retry_remap:
if (err == -ENOENT || lcn == LCN_ENOENT) {
bh->b_blocknr = -1;
clear_buffer_dirty(bh);
- zero_user_page(page, bh_offset(bh), blocksize,
- KM_USER0);
+ zero_user(page, bh_offset(bh), blocksize);
set_buffer_uptodate(bh);
err = 0;
continue;
@@ -1414,8 +1417,7 @@ retry_writepage:
if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
/* The page straddles i_size. */
unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
- zero_user_page(page, ofs, PAGE_CACHE_SIZE - ofs,
- KM_USER0);
+ zero_user_segment(page, ofs, PAGE_CACHE_SIZE);
}
/* Handle mst protected attributes. */
if (NInoMstProtected(ni))
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index d1619d05eb23..33ff314cc507 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -565,7 +565,7 @@ int ntfs_read_compressed_block(struct page *page)
if (xpage >= max_page) {
kfree(bhs);
kfree(pages);
- zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+ zero_user(page, 0, PAGE_CACHE_SIZE);
ntfs_debug("Compressed read outside i_size - truncated?");
SetPageUptodate(page);
unlock_page(page);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 6cd08dfdc2ed..3c5550cd11d6 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -607,8 +607,8 @@ do_next_page:
ntfs_submit_bh_for_read(bh);
*wait_bh++ = bh;
} else {
- zero_user_page(page, bh_offset(bh),
- blocksize, KM_USER0);
+ zero_user(page, bh_offset(bh),
+ blocksize);
set_buffer_uptodate(bh);
}
}
@@ -683,9 +683,8 @@ map_buffer_cached:
ntfs_submit_bh_for_read(bh);
*wait_bh++ = bh;
} else {
- zero_user_page(page,
- bh_offset(bh),
- blocksize, KM_USER0);
+ zero_user(page, bh_offset(bh),
+ blocksize);
set_buffer_uptodate(bh);
}
}
@@ -703,8 +702,8 @@ map_buffer_cached:
*/
if (bh_end <= pos || bh_pos >= end) {
if (!buffer_uptodate(bh)) {
- zero_user_page(page, bh_offset(bh),
- blocksize, KM_USER0);
+ zero_user(page, bh_offset(bh),
+ blocksize);
set_buffer_uptodate(bh);
}
mark_buffer_dirty(bh);
@@ -743,8 +742,7 @@ map_buffer_cached:
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
} else if (!buffer_uptodate(bh)) {
- zero_user_page(page, bh_offset(bh), blocksize,
- KM_USER0);
+ zero_user(page, bh_offset(bh), blocksize);
set_buffer_uptodate(bh);
}
continue;
@@ -868,8 +866,8 @@ rl_not_mapped_enoent:
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
} else if (!buffer_uptodate(bh)) {
- zero_user_page(page, bh_offset(bh),
- blocksize, KM_USER0);
+ zero_user(page, bh_offset(bh),
+ blocksize);
set_buffer_uptodate(bh);
}
continue;
@@ -1128,8 +1126,8 @@ rl_not_mapped_enoent:
if (likely(bh_pos < initialized_size))
ofs = initialized_size - bh_pos;
- zero_user_page(page, bh_offset(bh) + ofs,
- blocksize - ofs, KM_USER0);
+ zero_user_segment(page, bh_offset(bh) + ofs,
+ blocksize);
}
} else /* if (unlikely(!buffer_uptodate(bh))) */
err = -EIO;
@@ -1269,8 +1267,8 @@ rl_not_mapped_enoent:
if (PageUptodate(page))
set_buffer_uptodate(bh);
else {
- zero_user_page(page, bh_offset(bh),
- blocksize, KM_USER0);
+ zero_user(page, bh_offset(bh),
+ blocksize);
set_buffer_uptodate(bh);
}
}
@@ -1330,7 +1328,7 @@ err_out:
len = PAGE_CACHE_SIZE;
if (len > bytes)
len = bytes;
- zero_user_page(*pages, 0, len, KM_USER0);
+ zero_user(*pages, 0, len);
}
goto out;
}
@@ -1451,7 +1449,7 @@ err_out:
len = PAGE_CACHE_SIZE;
if (len > bytes)
len = bytes;
- zero_user_page(*pages, 0, len, KM_USER0);
+ zero_user(*pages, 0, len);
}
goto out;
}
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index e38e402e4103..cd0be3f5c3cd 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -85,8 +85,7 @@ static inline void *ntfs_malloc_nofs_nofail(unsigned long size)
static inline void ntfs_free(void *addr)
{
- if (likely(((unsigned long)addr < VMALLOC_START) ||
- ((unsigned long)addr >= VMALLOC_END ))) {
+ if (!is_vmalloc_addr(addr)) {
kfree(addr);
/* free_page((unsigned long)addr); */
return;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index e6df06ac6405..447206eb5c2e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3338,7 +3338,7 @@ static int ocfs2_insert_path(struct inode *inode,
if (insert->ins_split != SPLIT_NONE) {
/*
* We could call ocfs2_insert_at_leaf() for some types
- * of splits, but it's easier to just let one seperate
+ * of splits, but it's easier to just let one separate
* function sort it all out.
*/
ocfs2_split_record(inode, left_path, right_path,
@@ -5670,7 +5670,7 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
mlog_errno(ret);
if (zero)
- zero_user_page(page, from, to - from, KM_USER0);
+ zero_user_segment(page, from, to);
/*
* Need to set the buffers we zero'd into uptodate
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index bc7b4cbbe8ec..82243127eebf 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -307,7 +307,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
* XXX sys_readahead() seems to get that wrong?
*/
if (start >= i_size_read(inode)) {
- zero_user_page(page, 0, PAGE_SIZE, KM_USER0);
+ zero_user(page, 0, PAGE_SIZE);
SetPageUptodate(page);
ret = 0;
goto out_alloc;
@@ -869,7 +869,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
if (block_start >= to)
break;
- zero_user_page(page, block_start, bh->b_size, KM_USER0);
+ zero_user(page, block_start, bh->b_size);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
@@ -1034,7 +1034,7 @@ static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to
start = max(from, block_start);
end = min(to, block_end);
- zero_user_page(page, start, end - start, KM_USER0);
+ zero_user_segment(page, start, end);
set_buffer_uptodate(bh);
}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 6b0107f21344..e280833ceb9a 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1215,7 +1215,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
down_write(&oi->ip_alloc_sem);
/*
- * Prepare for worst case allocation scenario of two seperate
+ * Prepare for worst case allocation scenario of two separate
* extents.
*/
if (alloc == 2)
diff --git a/fs/ocfs2/ocfs1_fs_compat.h b/fs/ocfs2/ocfs1_fs_compat.h
index 0b499bccec5a..dfb313bda5dd 100644
--- a/fs/ocfs2/ocfs1_fs_compat.h
+++ b/fs/ocfs2/ocfs1_fs_compat.h
@@ -77,7 +77,7 @@ struct ocfs1_disk_lock
{
/*00*/ __u32 curr_master;
__u8 file_lock;
- __u8 compat_pad[3]; /* Not in orignal definition. Used to
+ __u8 compat_pad[3]; /* Not in original definition. Used to
make the already existing alignment
explicit */
__u64 last_write_time;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 7e397e2c25dd..72c198a004df 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -646,7 +646,7 @@ bail:
* sync-data inodes."
*
* Note: OCFS2 already does this differently for metadata vs data
- * allocations, as those bitmaps are seperate and undo access is never
+ * allocations, as those bitmaps are separate and undo access is never
* called on a metadata group descriptor.
*/
static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
diff --git a/fs/proc/array.c b/fs/proc/array.c
index eb97f2897e2b..6ba2746e4517 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -141,12 +141,7 @@ static const char *task_state_array[] = {
static inline const char *get_task_state(struct task_struct *tsk)
{
- unsigned int state = (tsk->state & (TASK_RUNNING |
- TASK_INTERRUPTIBLE |
- TASK_UNINTERRUPTIBLE |
- TASK_STOPPED |
- TASK_TRACED)) |
- tsk->exit_state;
+ unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
const char **p = &task_state_array[0];
while (state) {
@@ -286,14 +281,23 @@ static inline char *task_sig(struct task_struct *p, char *buffer)
return buffer;
}
+static char *render_cap_t(const char *header, kernel_cap_t *a, char *buffer)
+{
+ unsigned __capi;
+
+ buffer += sprintf(buffer, "%s", header);
+ CAP_FOR_EACH_U32(__capi) {
+ buffer += sprintf(buffer, "%08x",
+ a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]);
+ }
+ return buffer + sprintf(buffer, "\n");
+}
+
static inline char *task_cap(struct task_struct *p, char *buffer)
{
- return buffer + sprintf(buffer, "CapInh:\t%016x\n"
- "CapPrm:\t%016x\n"
- "CapEff:\t%016x\n",
- cap_t(p->cap_inheritable),
- cap_t(p->cap_permitted),
- cap_t(p->cap_effective));
+ buffer = render_cap_t("CapInh:\t", &p->cap_inheritable, buffer);
+ buffer = render_cap_t("CapPrm:\t", &p->cap_permitted, buffer);
+ return render_cap_t("CapEff:\t", &p->cap_effective, buffer);
}
static inline char *task_context_switch_counts(struct task_struct *p,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 91fa8e6ce8ad..c59852b38787 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -88,10 +88,6 @@
* in /proc for a task before it execs a suid executable.
*/
-
-/* Worst case buffer size needed for holding an integer. */
-#define PROC_NUMBUF 13
-
struct pid_entry {
char *name;
int len;
@@ -199,7 +195,7 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
(task == current || \
(task->parent == current && \
(task->ptrace & PT_PTRACED) && \
- (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
+ (task_is_stopped_or_traced(task)) && \
security_ptrace(current,task) == 0))
struct mm_struct *mm_for_maps(struct task_struct *task)
@@ -787,7 +783,7 @@ out_no_task:
}
#endif
-static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
+loff_t mem_lseek(struct file *file, loff_t offset, int orig)
{
switch (orig) {
case 0:
@@ -935,42 +931,6 @@ static const struct file_operations proc_oom_adjust_operations = {
.write = oom_adjust_write,
};
-#ifdef CONFIG_MMU
-static ssize_t clear_refs_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct task_struct *task;
- char buffer[PROC_NUMBUF], *end;
- struct mm_struct *mm;
-
- memset(buffer, 0, sizeof(buffer));
- if (count > sizeof(buffer) - 1)
- count = sizeof(buffer) - 1;
- if (copy_from_user(buffer, buf, count))
- return -EFAULT;
- if (!simple_strtol(buffer, &end, 0))
- return -EINVAL;
- if (*end == '\n')
- end++;
- task = get_proc_task(file->f_path.dentry->d_inode);
- if (!task)
- return -ESRCH;
- mm = get_task_mm(task);
- if (mm) {
- clear_refs_smap(mm);
- mmput(mm);
- }
- put_task_struct(task);
- if (end - buffer == 0)
- return -EIO;
- return end - buffer;
-}
-
-static struct file_operations proc_clear_refs_operations = {
- .write = clear_refs_write,
-};
-#endif
-
#ifdef CONFIG_AUDITSYSCALL
#define TMPBUFLEN 21
static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
@@ -984,7 +944,7 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
if (!task)
return -ESRCH;
length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
- audit_get_loginuid(task->audit_context));
+ audit_get_loginuid(task));
put_task_struct(task);
return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
}
@@ -2289,9 +2249,10 @@ static const struct pid_entry tgid_base_stuff[] = {
LNK("exe", exe),
REG("mounts", S_IRUGO, mounts),
REG("mountstats", S_IRUSR, mountstats),
-#ifdef CONFIG_MMU
+#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, clear_refs),
REG("smaps", S_IRUGO, smaps),
+ REG("pagemap", S_IRUSR, pagemap),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
@@ -2360,7 +2321,8 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
name.len = snprintf(buf, sizeof(buf), "%d", pid);
dentry = d_hash_and_lookup(mnt->mnt_root, &name);
if (dentry) {
- shrink_dcache_parent(dentry);
+ if (!(current->flags & PF_EXITING))
+ shrink_dcache_parent(dentry);
d_drop(dentry);
dput(dentry);
}
@@ -2617,9 +2579,10 @@ static const struct pid_entry tid_base_stuff[] = {
LNK("root", root),
LNK("exe", exe),
REG("mounts", S_IRUGO, mounts),
-#ifdef CONFIG_MMU
+#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, clear_refs),
REG("smaps", S_IRUGO, smaps),
+ REG("pagemap", S_IRUSR, pagemap),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 05b3e9006262..7d57e8069924 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -52,15 +52,13 @@ extern int proc_tid_stat(struct task_struct *, char *);
extern int proc_tgid_stat(struct task_struct *, char *);
extern int proc_pid_status(struct task_struct *, char *);
extern int proc_pid_statm(struct task_struct *, char *);
+extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
extern const struct file_operations proc_maps_operations;
extern const struct file_operations proc_numa_maps_operations;
extern const struct file_operations proc_smaps_operations;
-
-extern const struct file_operations proc_maps_operations;
-extern const struct file_operations proc_numa_maps_operations;
-extern const struct file_operations proc_smaps_operations;
-
+extern const struct file_operations proc_clear_refs_operations;
+extern const struct file_operations proc_pagemap_operations;
void free_proc_entry(struct proc_dir_entry *de);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 1be73082edd3..7dd26e18cbfd 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -325,7 +325,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
if (m == NULL) {
if (clear_user(buffer, tsz))
return -EFAULT;
- } else if ((start >= VMALLOC_START) && (start < VMALLOC_END)) {
+ } else if (is_vmalloc_addr((void *)start)) {
char * elf_buf;
struct vm_struct *m;
unsigned long curstart = start;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 3462bfde89f6..51288db37a0c 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -46,6 +46,7 @@
#include <linux/vmalloc.h>
#include <linux/crash_dump.h>
#include <linux/pid_namespace.h>
+#include <linux/bootmem.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -675,6 +676,137 @@ static const struct file_operations proc_sysrq_trigger_operations = {
};
#endif
+#ifdef CONFIG_PROC_PAGE_MONITOR
+#define KPMSIZE sizeof(u64)
+#define KPMMASK (KPMSIZE - 1)
+/* /proc/kpagecount - an array exposing page counts
+ *
+ * Each entry is a u64 representing the corresponding
+ * physical page count.
+ */
+static ssize_t kpagecount_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 __user *out = (u64 __user *)buf;
+ struct page *ppage;
+ unsigned long src = *ppos;
+ unsigned long pfn;
+ ssize_t ret = 0;
+ u64 pcount;
+
+ pfn = src / KPMSIZE;
+ count = min_t(size_t, count, (max_pfn * KPMSIZE) - src);
+ if (src & KPMMASK || count & KPMMASK)
+ return -EIO;
+
+ while (count > 0) {
+ ppage = NULL;
+ if (pfn_valid(pfn))
+ ppage = pfn_to_page(pfn);
+ pfn++;
+ if (!ppage)
+ pcount = 0;
+ else
+ pcount = atomic_read(&ppage->_count);
+
+ if (put_user(pcount, out++)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ count -= KPMSIZE;
+ }
+
+ *ppos += (char __user *)out - buf;
+ if (!ret)
+ ret = (char __user *)out - buf;
+ return ret;
+}
+
+static struct file_operations proc_kpagecount_operations = {
+ .llseek = mem_lseek,
+ .read = kpagecount_read,
+};
+
+/* /proc/kpageflags - an array exposing page flags
+ *
+ * Each entry is a u64 representing the corresponding
+ * physical page flags.
+ */
+
+/* These macros are used to decouple internal flags from exported ones */
+
+#define KPF_LOCKED 0
+#define KPF_ERROR 1
+#define KPF_REFERENCED 2
+#define KPF_UPTODATE 3
+#define KPF_DIRTY 4
+#define KPF_LRU 5
+#define KPF_ACTIVE 6
+#define KPF_SLAB 7
+#define KPF_WRITEBACK 8
+#define KPF_RECLAIM 9
+#define KPF_BUDDY 10
+
+#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos)
+
+static ssize_t kpageflags_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 __user *out = (u64 __user *)buf;
+ struct page *ppage;
+ unsigned long src = *ppos;
+ unsigned long pfn;
+ ssize_t ret = 0;
+ u64 kflags, uflags;
+
+ pfn = src / KPMSIZE;
+ count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
+ if (src & KPMMASK || count & KPMMASK)
+ return -EIO;
+
+ while (count > 0) {
+ ppage = NULL;
+ if (pfn_valid(pfn))
+ ppage = pfn_to_page(pfn);
+ pfn++;
+ if (!ppage)
+ kflags = 0;
+ else
+ kflags = ppage->flags;
+
+ uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) |
+ kpf_copy_bit(kflags, KPF_ERROR, PG_error) |
+ kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) |
+ kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) |
+ kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) |
+ kpf_copy_bit(kflags, KPF_LRU, PG_lru) |
+ kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) |
+ kpf_copy_bit(kflags, KPF_SLAB, PG_slab) |
+ kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) |
+ kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) |
+ kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy);
+
+ if (put_user(uflags, out++)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ count -= KPMSIZE;
+ }
+
+ *ppos += (char __user *)out - buf;
+ if (!ret)
+ ret = (char __user *)out - buf;
+ return ret;
+}
+
+static struct file_operations proc_kpageflags_operations = {
+ .llseek = mem_lseek,
+ .read = kpageflags_read,
+};
+#endif /* CONFIG_PROC_PAGE_MONITOR */
+
struct proc_dir_entry *proc_root_kcore;
void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
@@ -755,6 +887,10 @@ void __init proc_misc_init(void)
(size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
}
#endif
+#ifdef CONFIG_PROC_PAGE_MONITOR
+ create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations);
+ create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations);
+#endif
#ifdef CONFIG_PROC_VMCORE
proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL);
if (proc_vmcore)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8043a3eab52c..38338ed98cc6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -5,7 +5,10 @@
#include <linux/highmem.h>
#include <linux/ptrace.h>
#include <linux/pagemap.h>
+#include <linux/ptrace.h>
#include <linux/mempolicy.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
@@ -114,24 +117,124 @@ static void pad_len_spaces(struct seq_file *m, int len)
seq_printf(m, "%*c", len, ' ');
}
-struct mem_size_stats
+static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
{
- unsigned long resident;
- unsigned long shared_clean;
- unsigned long shared_dirty;
- unsigned long private_clean;
- unsigned long private_dirty;
- unsigned long referenced;
-};
+ if (vma && vma != priv->tail_vma) {
+ struct mm_struct *mm = vma->vm_mm;
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ }
+}
-struct pmd_walker {
- struct vm_area_struct *vma;
- void *private;
- void (*action)(struct vm_area_struct *, pmd_t *, unsigned long,
- unsigned long, void *);
-};
+static void *m_start(struct seq_file *m, loff_t *pos)
+{
+ struct proc_maps_private *priv = m->private;
+ unsigned long last_addr = m->version;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma, *tail_vma = NULL;
+ loff_t l = *pos;
+
+ /* Clear the per syscall fields in priv */
+ priv->task = NULL;
+ priv->tail_vma = NULL;
+
+ /*
+ * We remember last_addr rather than next_addr to hit with
+ * mmap_cache most of the time. We have zero last_addr at
+ * the beginning and also after lseek. We will have -1 last_addr
+ * after the end of the vmas.
+ */
+
+ if (last_addr == -1UL)
+ return NULL;
+
+ priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
+ if (!priv->task)
+ return NULL;
+
+ mm = mm_for_maps(priv->task);
+ if (!mm)
+ return NULL;
+
+ tail_vma = get_gate_vma(priv->task);
+ priv->tail_vma = tail_vma;
+
+ /* Start with last addr hint */
+ vma = find_vma(mm, last_addr);
+ if (last_addr && vma) {
+ vma = vma->vm_next;
+ goto out;
+ }
+
+ /*
+ * Check the vma index is within the range and do
+ * sequential scan until m_index.
+ */
+ vma = NULL;
+ if ((unsigned long)l < mm->map_count) {
+ vma = mm->mmap;
+ while (l-- && vma)
+ vma = vma->vm_next;
+ goto out;
+ }
+
+ if (l != mm->map_count)
+ tail_vma = NULL; /* After gate vma */
+
+out:
+ if (vma)
+ return vma;
+
+ /* End of vmas has been reached */
+ m->version = (tail_vma != NULL)? 0: -1UL;
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ return tail_vma;
+}
-static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
+static void *m_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct proc_maps_private *priv = m->private;
+ struct vm_area_struct *vma = v;
+ struct vm_area_struct *tail_vma = priv->tail_vma;
+
+ (*pos)++;
+ if (vma && (vma != tail_vma) && vma->vm_next)
+ return vma->vm_next;
+ vma_stop(priv, vma);
+ return (vma != tail_vma)? tail_vma: NULL;
+}
+
+static void m_stop(struct seq_file *m, void *v)
+{
+ struct proc_maps_private *priv = m->private;
+ struct vm_area_struct *vma = v;
+
+ vma_stop(priv, vma);
+ if (priv->task)
+ put_task_struct(priv->task);
+}
+
+static int do_maps_open(struct inode *inode, struct file *file,
+ struct seq_operations *ops)
+{
+ struct proc_maps_private *priv;
+ int ret = -ENOMEM;
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (priv) {
+ priv->pid = proc_pid(inode);
+ ret = seq_open(file, ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = priv;
+ } else {
+ kfree(priv);
+ }
+ }
+ return ret;
+}
+
+static int show_map(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
struct task_struct *task = priv->task;
@@ -191,41 +294,71 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
}
seq_putc(m, '\n');
- if (mss)
- seq_printf(m,
- "Size: %8lu kB\n"
- "Rss: %8lu kB\n"
- "Shared_Clean: %8lu kB\n"
- "Shared_Dirty: %8lu kB\n"
- "Private_Clean: %8lu kB\n"
- "Private_Dirty: %8lu kB\n"
- "Referenced: %8lu kB\n",
- (vma->vm_end - vma->vm_start) >> 10,
- mss->resident >> 10,
- mss->shared_clean >> 10,
- mss->shared_dirty >> 10,
- mss->private_clean >> 10,
- mss->private_dirty >> 10,
- mss->referenced >> 10);
-
if (m->count < m->size) /* vma is copied successfully */
m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
return 0;
}
-static int show_map(struct seq_file *m, void *v)
+static struct seq_operations proc_pid_maps_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_map
+};
+
+static int maps_open(struct inode *inode, struct file *file)
{
- return show_map_internal(m, v, NULL);
+ return do_maps_open(inode, file, &proc_pid_maps_op);
}
-static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- void *private)
+const struct file_operations proc_maps_operations = {
+ .open = maps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+/*
+ * Proportional Set Size(PSS): my share of RSS.
+ *
+ * PSS of a process is the count of pages it has in memory, where each
+ * page is divided by the number of processes sharing it. So if a
+ * process has 1000 pages all to itself, and 1000 shared with one other
+ * process, its PSS will be 1500.
+ *
+ * To keep (accumulated) division errors low, we adopt a 64bit
+ * fixed-point pss counter to minimize division errors. So (pss >>
+ * PSS_SHIFT) would be the real byte count.
+ *
+ * A shift of 12 before division means (assuming 4K page size):
+ * - 1M 3-user-pages add up to 8KB errors;
+ * - supports mapcount up to 2^24, or 16M;
+ * - supports PSS up to 2^52 bytes, or 4PB.
+ */
+#define PSS_SHIFT 12
+
+#ifdef CONFIG_PROC_PAGE_MONITOR
+struct mem_size_stats
+{
+ struct vm_area_struct *vma;
+ unsigned long resident;
+ unsigned long shared_clean;
+ unsigned long shared_dirty;
+ unsigned long private_clean;
+ unsigned long private_dirty;
+ unsigned long referenced;
+ u64 pss;
+};
+
+static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ void *private)
{
struct mem_size_stats *mss = private;
+ struct vm_area_struct *vma = mss->vma;
pte_t *pte, ptent;
spinlock_t *ptl;
struct page *page;
+ int mapcount;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -242,26 +375,88 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
/* Accumulate the size in pages that have been accessed. */
if (pte_young(ptent) || PageReferenced(page))
mss->referenced += PAGE_SIZE;
- if (page_mapcount(page) >= 2) {
+ mapcount = page_mapcount(page);
+ if (mapcount >= 2) {
if (pte_dirty(ptent))
mss->shared_dirty += PAGE_SIZE;
else
mss->shared_clean += PAGE_SIZE;
+ mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
} else {
if (pte_dirty(ptent))
mss->private_dirty += PAGE_SIZE;
else
mss->private_clean += PAGE_SIZE;
+ mss->pss += (PAGE_SIZE << PSS_SHIFT);
}
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
+ return 0;
}
-static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- void *private)
+static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
+
+static int show_smap(struct seq_file *m, void *v)
{
+ struct vm_area_struct *vma = v;
+ struct mem_size_stats mss;
+ int ret;
+
+ memset(&mss, 0, sizeof mss);
+ mss.vma = vma;
+ if (vma->vm_mm && !is_vm_hugetlb_page(vma))
+ walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
+ &smaps_walk, &mss);
+
+ ret = show_map(m, v);
+ if (ret)
+ return ret;
+
+ seq_printf(m,
+ "Size: %8lu kB\n"
+ "Rss: %8lu kB\n"
+ "Pss: %8lu kB\n"
+ "Shared_Clean: %8lu kB\n"
+ "Shared_Dirty: %8lu kB\n"
+ "Private_Clean: %8lu kB\n"
+ "Private_Dirty: %8lu kB\n"
+ "Referenced: %8lu kB\n",
+ (vma->vm_end - vma->vm_start) >> 10,
+ mss.resident >> 10,
+ (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
+ mss.shared_clean >> 10,
+ mss.shared_dirty >> 10,
+ mss.private_clean >> 10,
+ mss.private_dirty >> 10,
+ mss.referenced >> 10);
+
+ return ret;
+}
+
+static struct seq_operations proc_pid_smaps_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_smap
+};
+
+static int smaps_open(struct inode *inode, struct file *file)
+{
+ return do_maps_open(inode, file, &proc_pid_smaps_op);
+}
+
+const struct file_operations proc_smaps_operations = {
+ .open = smaps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end, void *private)
+{
+ struct vm_area_struct *vma = private;
pte_t *pte, ptent;
spinlock_t *ptl;
struct page *page;
@@ -282,235 +477,248 @@ static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
+ return 0;
}
-static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud,
- unsigned long addr, unsigned long end)
+static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
+
+static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
{
- pmd_t *pmd;
- unsigned long next;
+ struct task_struct *task;
+ char buffer[PROC_NUMBUF], *end;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
- for (pmd = pmd_offset(pud, addr); addr != end;
- pmd++, addr = next) {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- walker->action(walker->vma, pmd, addr, next, walker->private);
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count))
+ return -EFAULT;
+ if (!simple_strtol(buffer, &end, 0))
+ return -EINVAL;
+ if (*end == '\n')
+ end++;
+ task = get_proc_task(file->f_path.dentry->d_inode);
+ if (!task)
+ return -ESRCH;
+ mm = get_task_mm(task);
+ if (mm) {
+ down_read(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ if (!is_vm_hugetlb_page(vma))
+ walk_page_range(mm, vma->vm_start, vma->vm_end,
+ &clear_refs_walk, vma);
+ flush_tlb_mm(mm);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
}
+ put_task_struct(task);
+ if (end - buffer == 0)
+ return -EIO;
+ return end - buffer;
}
-static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd,
- unsigned long addr, unsigned long end)
-{
- pud_t *pud;
- unsigned long next;
+const struct file_operations proc_clear_refs_operations = {
+ .write = clear_refs_write,
+};
- for (pud = pud_offset(pgd, addr); addr != end;
- pud++, addr = next) {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- walk_pmd_range(walker, pud, addr, next);
+struct pagemapread {
+ char __user *out, *end;
+};
+
+#define PM_ENTRY_BYTES sizeof(u64)
+#define PM_RESERVED_BITS 3
+#define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS)
+#define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET)
+#define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK)
+#define PM_NOT_PRESENT PM_SPECIAL(1LL)
+#define PM_SWAP PM_SPECIAL(2LL)
+#define PM_END_OF_BUFFER 1
+
+static int add_to_pagemap(unsigned long addr, u64 pfn,
+ struct pagemapread *pm)
+{
+ /*
+ * Make sure there's room in the buffer for an
+ * entire entry. Otherwise, only copy part of
+ * the pfn.
+ */
+ if (pm->out + PM_ENTRY_BYTES >= pm->end) {
+ if (copy_to_user(pm->out, &pfn, pm->end - pm->out))
+ return -EFAULT;
+ pm->out = pm->end;
+ return PM_END_OF_BUFFER;
}
+
+ if (put_user(pfn, pm->out))
+ return -EFAULT;
+ pm->out += PM_ENTRY_BYTES;
+ return 0;
}
-/*
- * walk_page_range - walk the page tables of a VMA with a callback
- * @vma - VMA to walk
- * @action - callback invoked for every bottom-level (PTE) page table
- * @private - private data passed to the callback function
- *
- * Recursively walk the page table for the memory area in a VMA, calling
- * a callback for every bottom-level (PTE) page table.
- */
-static inline void walk_page_range(struct vm_area_struct *vma,
- void (*action)(struct vm_area_struct *,
- pmd_t *, unsigned long,
- unsigned long, void *),
- void *private)
+static int pagemap_pte_hole(unsigned long start, unsigned long end,
+ void *private)
{
- unsigned long addr = vma->vm_start;
- unsigned long end = vma->vm_end;
- struct pmd_walker walker = {
- .vma = vma,
- .private = private,
- .action = action,
- };
- pgd_t *pgd;
- unsigned long next;
-
- for (pgd = pgd_offset(vma->vm_mm, addr); addr != end;
- pgd++, addr = next) {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- walk_pud_range(&walker, pgd, addr, next);
+ struct pagemapread *pm = private;
+ unsigned long addr;
+ int err = 0;
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
+ if (err)
+ break;
}
+ return err;
}
-static int show_smap(struct seq_file *m, void *v)
+u64 swap_pte_to_pagemap_entry(pte_t pte)
{
- struct vm_area_struct *vma = v;
- struct mem_size_stats mss;
-
- memset(&mss, 0, sizeof mss);
- if (vma->vm_mm && !is_vm_hugetlb_page(vma))
- walk_page_range(vma, smaps_pte_range, &mss);
- return show_map_internal(m, v, &mss);
+ swp_entry_t e = pte_to_swp_entry(pte);
+ return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
}
-void clear_refs_smap(struct mm_struct *mm)
+static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ void *private)
{
- struct vm_area_struct *vma;
+ struct pagemapread *pm = private;
+ pte_t *pte;
+ int err = 0;
+
+ for (; addr != end; addr += PAGE_SIZE) {
+ u64 pfn = PM_NOT_PRESENT;
+ pte = pte_offset_map(pmd, addr);
+ if (is_swap_pte(*pte))
+ pfn = swap_pte_to_pagemap_entry(*pte);
+ else if (pte_present(*pte))
+ pfn = pte_pfn(*pte);
+ /* unmap so we're not in atomic when we copy to userspace */
+ pte_unmap(pte);
+ err = add_to_pagemap(addr, pfn, pm);
+ if (err)
+ return err;
+ }
- down_read(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next)
- if (vma->vm_mm && !is_vm_hugetlb_page(vma))
- walk_page_range(vma, clear_refs_pte_range, NULL);
- flush_tlb_mm(mm);
- up_read(&mm->mmap_sem);
+ cond_resched();
+
+ return err;
}
-static void *m_start(struct seq_file *m, loff_t *pos)
+static struct mm_walk pagemap_walk = {
+ .pmd_entry = pagemap_pte_range,
+ .pte_hole = pagemap_pte_hole
+};
+
+/*
+ * /proc/pid/pagemap - an array mapping virtual pages to pfns
+ *
+ * For each page in the address space, this file contains one 64-bit
+ * entry representing the corresponding physical page frame number
+ * (PFN) if the page is present. If there is a swap entry for the
+ * physical page, then an encoding of the swap file number and the
+ * page's offset into the swap file are returned. If no page is
+ * present at all, PM_NOT_PRESENT is returned. This allows determining
+ * precisely which pages are mapped (or in swap) and comparing mapped
+ * pages between processes.
+ *
+ * Efficient users of this interface will use /proc/pid/maps to
+ * determine which areas of memory are actually mapped and llseek to
+ * skip over unmapped regions.
+ */
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
{
- struct proc_maps_private *priv = m->private;
- unsigned long last_addr = m->version;
+ struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ struct page **pages, *page;
+ unsigned long uaddr, uend;
struct mm_struct *mm;
- struct vm_area_struct *vma, *tail_vma = NULL;
- loff_t l = *pos;
-
- /* Clear the per syscall fields in priv */
- priv->task = NULL;
- priv->tail_vma = NULL;
+ struct pagemapread pm;
+ int pagecount;
+ int ret = -ESRCH;
- /*
- * We remember last_addr rather than next_addr to hit with
- * mmap_cache most of the time. We have zero last_addr at
- * the beginning and also after lseek. We will have -1 last_addr
- * after the end of the vmas.
- */
+ if (!task)
+ goto out;
- if (last_addr == -1UL)
- return NULL;
+ ret = -EACCES;
+ if (!ptrace_may_attach(task))
+ goto out;
- priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
- if (!priv->task)
- return NULL;
+ ret = -EINVAL;
+ /* file position must be aligned */
+ if (*ppos % PM_ENTRY_BYTES)
+ goto out;
- mm = mm_for_maps(priv->task);
+ ret = 0;
+ mm = get_task_mm(task);
if (!mm)
- return NULL;
-
- priv->tail_vma = tail_vma = get_gate_vma(priv->task);
-
- /* Start with last addr hint */
- if (last_addr && (vma = find_vma(mm, last_addr))) {
- vma = vma->vm_next;
goto out;
- }
- /*
- * Check the vma index is within the range and do
- * sequential scan until m_index.
- */
- vma = NULL;
- if ((unsigned long)l < mm->map_count) {
- vma = mm->mmap;
- while (l-- && vma)
- vma = vma->vm_next;
- goto out;
- }
+ ret = -ENOMEM;
+ uaddr = (unsigned long)buf & PAGE_MASK;
+ uend = (unsigned long)(buf + count);
+ pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
+ pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ goto out_task;
- if (l != mm->map_count)
- tail_vma = NULL; /* After gate vma */
+ down_read(&current->mm->mmap_sem);
+ ret = get_user_pages(current, current->mm, uaddr, pagecount,
+ 1, 0, pages, NULL);
+ up_read(&current->mm->mmap_sem);
-out:
- if (vma)
- return vma;
+ if (ret < 0)
+ goto out_free;
- /* End of vmas has been reached */
- m->version = (tail_vma != NULL)? 0: -1UL;
- up_read(&mm->mmap_sem);
- mmput(mm);
- return tail_vma;
-}
+ pm.out = buf;
+ pm.end = buf + count;
-static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
-{
- if (vma && vma != priv->tail_vma) {
- struct mm_struct *mm = vma->vm_mm;
- up_read(&mm->mmap_sem);
- mmput(mm);
+ if (!ptrace_may_attach(task)) {
+ ret = -EIO;
+ } else {
+ unsigned long src = *ppos;
+ unsigned long svpfn = src / PM_ENTRY_BYTES;
+ unsigned long start_vaddr = svpfn << PAGE_SHIFT;
+ unsigned long end_vaddr = TASK_SIZE_OF(task);
+
+ /* watch out for wraparound */
+ if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
+ start_vaddr = end_vaddr;
+
+ /*
+ * The odds are that this will stop walking way
+ * before end_vaddr, because the length of the
+ * user buffer is tracked in "pm", and the walk
+ * will stop when we hit the end of the buffer.
+ */
+ ret = walk_page_range(mm, start_vaddr, end_vaddr,
+ &pagemap_walk, &pm);
+ if (ret == PM_END_OF_BUFFER)
+ ret = 0;
+ /* don't need mmap_sem for these, but this looks cleaner */
+ *ppos += pm.out - buf;
+ if (!ret)
+ ret = pm.out - buf;
}
-}
-
-static void *m_next(struct seq_file *m, void *v, loff_t *pos)
-{
- struct proc_maps_private *priv = m->private;
- struct vm_area_struct *vma = v;
- struct vm_area_struct *tail_vma = priv->tail_vma;
-
- (*pos)++;
- if (vma && (vma != tail_vma) && vma->vm_next)
- return vma->vm_next;
- vma_stop(priv, vma);
- return (vma != tail_vma)? tail_vma: NULL;
-}
-
-static void m_stop(struct seq_file *m, void *v)
-{
- struct proc_maps_private *priv = m->private;
- struct vm_area_struct *vma = v;
- vma_stop(priv, vma);
- if (priv->task)
- put_task_struct(priv->task);
-}
-
-static struct seq_operations proc_pid_maps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_map
-};
-
-static struct seq_operations proc_pid_smaps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_smap
-};
-
-static int do_maps_open(struct inode *inode, struct file *file,
- struct seq_operations *ops)
-{
- struct proc_maps_private *priv;
- int ret = -ENOMEM;
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (priv) {
- priv->pid = proc_pid(inode);
- ret = seq_open(file, ops);
- if (!ret) {
- struct seq_file *m = file->private_data;
- m->private = priv;
- } else {
- kfree(priv);
- }
+ for (; pagecount; pagecount--) {
+ page = pages[pagecount-1];
+ if (!PageReserved(page))
+ SetPageDirty(page);
+ page_cache_release(page);
}
+ mmput(mm);
+out_free:
+ kfree(pages);
+out_task:
+ put_task_struct(task);
+out:
return ret;
}
-static int maps_open(struct inode *inode, struct file *file)
-{
- return do_maps_open(inode, file, &proc_pid_maps_op);
-}
-
-const struct file_operations proc_maps_operations = {
- .open = maps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
+const struct file_operations proc_pagemap_operations = {
+ .llseek = mem_lseek, /* borrow this */
+ .read = pagemap_read,
};
+#endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA
extern int show_numa_map(struct seq_file *m, void *v);
@@ -545,15 +753,3 @@ const struct file_operations proc_numa_maps_operations = {
.release = seq_release_private,
};
#endif
-
-static int smaps_open(struct inode *inode, struct file *file)
-{
- return do_maps_open(inode, file, &proc_pid_smaps_op);
-}
-
-const struct file_operations proc_smaps_operations = {
- .open = smaps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
diff --git a/fs/readdir.c b/fs/readdir.c
index efe52e676577..4e026e5407fb 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -30,7 +30,10 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf)
if (res)
goto out;
- mutex_lock(&inode->i_mutex);
+ res = mutex_lock_killable(&inode->i_mutex);
+ if (res)
+ goto out;
+
res = -ENOENT;
if (!IS_DEADDIR(inode)) {
res = file->f_op->readdir(file, buf, filler);
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 16b331dd9913..f491ceb5af02 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -272,7 +272,7 @@ static inline int block_group_used(struct super_block *s, u32 id)
/* If we don't have cached information on this bitmap block, we're
* going to have to load it later anyway. Loading it here allows us
- * to make a better decision. This favors long-term performace gain
+ * to make a better decision. This favors long-term performance gain
* with a better on-disk layout vs. a short term gain of skipping the
* read and potentially having a bad placement. */
if (info->free_count == UINT_MAX) {
@@ -663,7 +663,7 @@ static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
/*
* Relocation based on dirid, hashing them into a given bitmap block
- * files. Formatted nodes are unaffected, a seperate policy covers them
+ * files. Formatted nodes are unaffected, a separate policy covers them
*/
static void dirid_groups(reiserfs_blocknr_hint_t * hint)
{
@@ -688,7 +688,7 @@ static void dirid_groups(reiserfs_blocknr_hint_t * hint)
/*
* Relocation based on oid, hashing them into a given bitmap block
- * files. Formatted nodes are unaffected, a seperate policy covers them
+ * files. Formatted nodes are unaffected, a separate policy covers them
*/
static void oid_groups(reiserfs_blocknr_hint_t * hint)
{
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 231fd5ccadc5..195309857e63 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2143,7 +2143,7 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
/* if we are not on a block boundary */
if (length) {
length = blocksize - length;
- zero_user_page(page, offset, length, KM_USER0);
+ zero_user(page, offset, length);
if (buffer_mapped(bh) && bh->b_blocknr != 0) {
mark_buffer_dirty(bh);
}
@@ -2367,7 +2367,7 @@ static int reiserfs_write_full_page(struct page *page,
unlock_page(page);
return 0;
}
- zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0);
+ zero_user_segment(page, last_offset, PAGE_CACHE_SIZE);
}
bh = head;
block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index fb7f7e8034df..2d3e107da2d3 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -66,7 +66,7 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128);
/*
- * Unused memebers should be zero ...
+ * Unused members should be zero ...
*/
err = __clear_user(uinfo, sizeof(*uinfo));
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 9416ead0c7aa..4e5c22ca802e 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -500,6 +500,13 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
struct smb_fattr root;
int ver;
void *mem;
+ static int warn_count;
+
+ if (warn_count < 5) {
+ warn_count++;
+ printk(KERN_EMERG "smbfs is deprecated and will be removed"
+ "from the 2.6.27 kernel. Please migrate to cifs\n");
+ }
if (!raw_data)
goto out_no_data;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index ca4b2d59c0ca..45f45933e862 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -105,7 +105,7 @@ struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize)
if (nfs_try_to_free_pages(server))
continue;
- if (signalled() && (server->flags & NFS_MOUNT_INTR))
+ if (fatal_signal_pending(current))
return ERR_PTR(-ERESTARTSYS);
current->policy = SCHED_YIELD;
schedule();
diff --git a/fs/splice.c b/fs/splice.c
index 1577a7391d23..4ee49e86edde 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1033,9 +1033,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
done:
pipe->nrbufs = pipe->curbuf = 0;
- if (bytes > 0)
- file_accessed(in);
-
+ file_accessed(in);
return bytes;
out_release:
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 61983f3b107c..10c80b59ec4b 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -25,13 +25,15 @@ struct timerfd_ctx {
struct hrtimer tmr;
ktime_t tintv;
wait_queue_head_t wqh;
+ u64 ticks;
int expired;
+ int clockid;
};
/*
* This gets called when the timer event triggers. We set the "expired"
* flag, but we do not re-arm the timer (in case it's necessary,
- * tintv.tv64 != 0) until the timer is read.
+ * tintv.tv64 != 0) until the timer is accessed.
*/
static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
{
@@ -40,13 +42,24 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
spin_lock_irqsave(&ctx->wqh.lock, flags);
ctx->expired = 1;
+ ctx->ticks++;
wake_up_locked(&ctx->wqh);
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
return HRTIMER_NORESTART;
}
-static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags,
+static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
+{
+ ktime_t now, remaining;
+
+ now = ctx->tmr.base->get_time();
+ remaining = ktime_sub(ctx->tmr.expires, now);
+
+ return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
+}
+
+static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
const struct itimerspec *ktmr)
{
enum hrtimer_mode htmode;
@@ -57,8 +70,9 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags,
texp = timespec_to_ktime(ktmr->it_value);
ctx->expired = 0;
+ ctx->ticks = 0;
ctx->tintv = timespec_to_ktime(ktmr->it_interval);
- hrtimer_init(&ctx->tmr, clockid, htmode);
+ hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
ctx->tmr.expires = texp;
ctx->tmr.function = timerfd_tmrproc;
if (texp.tv64 != 0)
@@ -83,7 +97,7 @@ static unsigned int timerfd_poll(struct file *file, poll_table *wait)
poll_wait(file, &ctx->wqh, wait);
spin_lock_irqsave(&ctx->wqh.lock, flags);
- if (ctx->expired)
+ if (ctx->ticks)
events |= POLLIN;
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
@@ -102,11 +116,11 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
return -EINVAL;
spin_lock_irq(&ctx->wqh.lock);
res = -EAGAIN;
- if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) {
+ if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) {
__add_wait_queue(&ctx->wqh, &wait);
for (res = 0;;) {
set_current_state(TASK_INTERRUPTIBLE);
- if (ctx->expired) {
+ if (ctx->ticks) {
res = 0;
break;
}
@@ -121,22 +135,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
__remove_wait_queue(&ctx->wqh, &wait);
__set_current_state(TASK_RUNNING);
}
- if (ctx->expired) {
- ctx->expired = 0;
- if (ctx->tintv.tv64 != 0) {
+ if (ctx->ticks) {
+ ticks = ctx->ticks;
+ if (ctx->expired && ctx->tintv.tv64) {
/*
* If tintv.tv64 != 0, this is a periodic timer that
* needs to be re-armed. We avoid doing it in the timer
* callback to avoid DoS attacks specifying a very
* short timer period.
*/
- ticks = (u64)
- hrtimer_forward(&ctx->tmr,
- hrtimer_cb_get_time(&ctx->tmr),
- ctx->tintv);
+ ticks += hrtimer_forward_now(&ctx->tmr,
+ ctx->tintv) - 1;
hrtimer_restart(&ctx->tmr);
- } else
- ticks = 1;
+ }
+ ctx->expired = 0;
+ ctx->ticks = 0;
}
spin_unlock_irq(&ctx->wqh.lock);
if (ticks)
@@ -150,76 +163,132 @@ static const struct file_operations timerfd_fops = {
.read = timerfd_read,
};
-asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
- const struct itimerspec __user *utmr)
+static struct file *timerfd_fget(int fd)
+{
+ struct file *file;
+
+ file = fget(fd);
+ if (!file)
+ return ERR_PTR(-EBADF);
+ if (file->f_op != &timerfd_fops) {
+ fput(file);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return file;
+}
+
+asmlinkage long sys_timerfd_create(int clockid, int flags)
{
- int error;
+ int error, ufd;
struct timerfd_ctx *ctx;
struct file *file;
struct inode *inode;
- struct itimerspec ktmr;
-
- if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
- return -EFAULT;
+ if (flags)
+ return -EINVAL;
if (clockid != CLOCK_MONOTONIC &&
clockid != CLOCK_REALTIME)
return -EINVAL;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ init_waitqueue_head(&ctx->wqh);
+ ctx->clockid = clockid;
+ hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
+
+ error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
+ &timerfd_fops, ctx);
+ if (error) {
+ kfree(ctx);
+ return error;
+ }
+
+ return ufd;
+}
+
+asmlinkage long sys_timerfd_settime(int ufd, int flags,
+ const struct itimerspec __user *utmr,
+ struct itimerspec __user *otmr)
+{
+ struct file *file;
+ struct timerfd_ctx *ctx;
+ struct itimerspec ktmr, kotmr;
+
+ if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
+ return -EFAULT;
+
if (!timespec_valid(&ktmr.it_value) ||
!timespec_valid(&ktmr.it_interval))
return -EINVAL;
- if (ufd == -1) {
- ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
-
- init_waitqueue_head(&ctx->wqh);
-
- timerfd_setup(ctx, clockid, flags, &ktmr);
-
- /*
- * When we call this, the initialization must be complete, since
- * anon_inode_getfd() will install the fd.
- */
- error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
- &timerfd_fops, ctx);
- if (error)
- goto err_tmrcancel;
- } else {
- file = fget(ufd);
- if (!file)
- return -EBADF;
- ctx = file->private_data;
- if (file->f_op != &timerfd_fops) {
- fput(file);
- return -EINVAL;
- }
- /*
- * We need to stop the existing timer before reprogramming
- * it to the new values.
- */
- for (;;) {
- spin_lock_irq(&ctx->wqh.lock);
- if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
- break;
- spin_unlock_irq(&ctx->wqh.lock);
- cpu_relax();
- }
- /*
- * Re-program the timer to the new value ...
- */
- timerfd_setup(ctx, clockid, flags, &ktmr);
+ file = timerfd_fget(ufd);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+ ctx = file->private_data;
+ /*
+ * We need to stop the existing timer before reprogramming
+ * it to the new values.
+ */
+ for (;;) {
+ spin_lock_irq(&ctx->wqh.lock);
+ if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
+ break;
spin_unlock_irq(&ctx->wqh.lock);
- fput(file);
+ cpu_relax();
}
- return ufd;
+ /*
+ * If the timer is expired and it's periodic, we need to advance it
+ * because the caller may want to know the previous expiration time.
+ * We do not update "ticks" and "expired" since the timer will be
+ * re-programmed again in the following timerfd_setup() call.
+ */
+ if (ctx->expired && ctx->tintv.tv64)
+ hrtimer_forward_now(&ctx->tmr, ctx->tintv);
-err_tmrcancel:
- hrtimer_cancel(&ctx->tmr);
- kfree(ctx);
- return error;
+ kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
+ kotmr.it_interval = ktime_to_timespec(ctx->tintv);
+
+ /*
+ * Re-program the timer to the new value ...
+ */
+ timerfd_setup(ctx, flags, &ktmr);
+
+ spin_unlock_irq(&ctx->wqh.lock);
+ fput(file);
+ if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
+ return -EFAULT;
+
+ return 0;
+}
+
+asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr)
+{
+ struct file *file;
+ struct timerfd_ctx *ctx;
+ struct itimerspec kotmr;
+
+ file = timerfd_fget(ufd);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+ ctx = file->private_data;
+
+ spin_lock_irq(&ctx->wqh.lock);
+ if (ctx->expired && ctx->tintv.tv64) {
+ ctx->expired = 0;
+ ctx->ticks +=
+ hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
+ hrtimer_restart(&ctx->tmr);
+ }
+ kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
+ kotmr.it_interval = ktime_to_timespec(ctx->tintv);
+ spin_unlock_irq(&ctx->wqh.lock);
+ fput(file);
+
+ return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
}
diff --git a/fs/xattr.c b/fs/xattr.c
index 6645b7313b33..f7c8f87bb390 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -105,6 +105,33 @@ out:
EXPORT_SYMBOL_GPL(vfs_setxattr);
ssize_t
+xattr_getsecurity(struct inode *inode, const char *name, void *value,
+ size_t size)
+{
+ void *buffer = NULL;
+ ssize_t len;
+
+ if (!value || !size) {
+ len = security_inode_getsecurity(inode, name, &buffer, false);
+ goto out_noalloc;
+ }
+
+ len = security_inode_getsecurity(inode, name, &buffer, true);
+ if (len < 0)
+ return len;
+ if (size < len) {
+ len = -ERANGE;
+ goto out;
+ }
+ memcpy(value, buffer, len);
+out:
+ security_release_secctx(buffer, len);
+out_noalloc:
+ return len;
+}
+EXPORT_SYMBOL_GPL(xattr_getsecurity);
+
+ssize_t
vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
{
struct inode *inode = dentry->d_inode;
@@ -118,23 +145,23 @@ vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
if (error)
return error;
- if (inode->i_op->getxattr)
- error = inode->i_op->getxattr(dentry, name, value, size);
- else
- error = -EOPNOTSUPP;
-
if (!strncmp(name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN)) {
const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
- int ret = security_inode_getsecurity(inode, suffix, value,
- size, error);
+ int ret = xattr_getsecurity(inode, suffix, value, size);
/*
* Only overwrite the return value if a security module
* is actually active.
*/
- if (ret != -EOPNOTSUPP)
- error = ret;
+ if (ret == -EOPNOTSUPP)
+ goto nolsm;
+ return ret;
}
+nolsm:
+ if (inode->i_op->getxattr)
+ error = inode->i_op->getxattr(dentry, name, value, size);
+ else
+ error = -EOPNOTSUPP;
return error;
}
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index ed2b16dff914..e040f1ce1b6a 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -92,8 +92,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
void
kmem_free(void *ptr, size_t size)
{
- if (((unsigned long)ptr < VMALLOC_START) ||
- ((unsigned long)ptr >= VMALLOC_END)) {
+ if (!is_vmalloc_addr(ptr)) {
kfree(ptr);
} else {
vfree(ptr);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index a49dd8d4b069..0382c19d6523 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -709,8 +709,7 @@ static inline struct page *
mem_to_page(
void *addr)
{
- if (((unsigned long)addr < VMALLOC_START) ||
- ((unsigned long)addr >= VMALLOC_END)) {
+ if ((!is_vmalloc_addr(addr))) {
return virt_to_page(addr);
} else {
return vmalloc_to_page(addr);
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index d6a8dddb2268..6f614f35f650 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -155,7 +155,7 @@ xfs_iozero(
if (status)
break;
- zero_user_page(page, offset, bytes, KM_USER0);
+ zero_user(page, offset, bytes);
status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
page, fsdata);