diff options
Diffstat (limited to 'fs')
248 files changed, 6347 insertions, 4610 deletions
diff --git a/fs/9p/9p.c b/fs/9p/9p.c index e847f504a47c..1a6d08761f39 100644 --- a/fs/9p/9p.c +++ b/fs/9p/9p.c @@ -1,8 +1,9 @@ /* * linux/fs/9p/9p.c * - * This file contains functions 9P2000 functions + * This file contains functions to perform synchronous 9P calls * + * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * @@ -33,6 +34,7 @@ #include "debug.h" #include "v9fs.h" #include "9p.h" +#include "conv.h" #include "mux.h" /** @@ -46,16 +48,21 @@ int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, - char *version, struct v9fs_fcall **fcall) + char *version, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version); - msg.id = TVERSION; - msg.params.tversion.msize = msize; - msg.params.tversion.version = version; + tc = v9fs_create_tversion(msize, version); - return v9fs_mux_rpc(v9ses, &msg, fcall); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; } /** @@ -71,19 +78,45 @@ v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, - u32 fid, u32 afid, struct v9fs_fcall **fcall) + u32 fid, u32 afid, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall* tc; dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname, aname, fid, afid); - msg.id = TATTACH; - msg.params.tattach.fid = fid; - msg.params.tattach.afid = afid; - msg.params.tattach.uname = uname; - msg.params.tattach.aname = aname; - return v9fs_mux_rpc(v9ses, &msg, fcall); + tc = v9fs_create_tattach(fid, afid, uname, aname); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; +} + +static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc, + struct v9fs_fcall *rc, int err) +{ + int fid; + struct v9fs_session_info *v9ses; + + if (err) + return; + + fid = tc->params.tclunk.fid; + kfree(tc); + + if (!rc) + return; + + dprintk(DEBUG_9P, "tcall id %d rcall id %d\n", tc->id, rc->id); + v9ses = a; + if (rc->id == RCLUNK) + v9fs_put_idpool(fid, &v9ses->fidpool); + + kfree(rc); } /** @@ -95,16 +128,25 @@ v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, */ int -v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **fcall) +v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc, *rc; dprintk(DEBUG_9P, "fid %d\n", fid); - msg.id = TCLUNK; - msg.params.tclunk.fid = fid; - return v9fs_mux_rpc(v9ses, &msg, fcall); + rc = NULL; + tc = v9fs_create_tclunk(fid); + if (!IS_ERR(tc)) + ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); + else + ret = PTR_ERR(tc); + + if (ret) + dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret); + + v9fs_t_clunk_cb(v9ses, tc, rc, ret); + return ret; } /** @@ -114,14 +156,21 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, * */ -int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag) +int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; + + dprintk(DEBUG_9P, "oldtag %d\n", oldtag); + + tc = v9fs_create_tflush(oldtag); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, NULL); + kfree(tc); + } else + ret = PTR_ERR(tc); - dprintk(DEBUG_9P, "oldtag %d\n", tag); - msg.id = TFLUSH; - msg.params.tflush.oldtag = tag; - return v9fs_mux_rpc(v9ses, &msg, NULL); + return ret; } /** @@ -133,17 +182,22 @@ int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag) */ int -v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall) +v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; dprintk(DEBUG_9P, "fid %d\n", fid); - if (fcall) - *fcall = NULL; - msg.id = TSTAT; - msg.params.tstat.fid = fid; - return v9fs_mux_rpc(v9ses, &msg, fcall); + ret = -ENOMEM; + tc = v9fs_create_tstat(fid); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; } /** @@ -157,16 +211,21 @@ v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall) int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_stat *stat, struct v9fs_fcall **fcall) + struct v9fs_wstat *wstat, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; + + dprintk(DEBUG_9P, "fid %d\n", fid); - dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length); - msg.id = TWSTAT; - msg.params.twstat.fid = fid; - msg.params.twstat.stat = stat; + tc = v9fs_create_twstat(fid, wstat, v9ses->extended); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); - return v9fs_mux_rpc(v9ses, &msg, fcall); + return ret; } /** @@ -183,23 +242,27 @@ v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, - char *name, struct v9fs_fcall **fcall) + char *name, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; + int nwname; dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name); - msg.id = TWALK; - msg.params.twalk.fid = fid; - msg.params.twalk.newfid = newfid; - - if (name) { - msg.params.twalk.nwname = 1; - msg.params.twalk.wnames = &name; - } else { - msg.params.twalk.nwname = 0; - } - - return v9fs_mux_rpc(v9ses, &msg, fcall); + + if (name) + nwname = 1; + else + nwname = 0; + + tc = v9fs_create_twalk(fid, newfid, nwname, &name); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; } /** @@ -214,19 +277,21 @@ v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, - struct v9fs_fcall **fcall) + struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; - long errorno = -1; + int ret; + struct v9fs_fcall *tc; dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode); - msg.id = TOPEN; - msg.params.topen.fid = fid; - msg.params.topen.mode = mode; - errorno = v9fs_mux_rpc(v9ses, &msg, fcall); + tc = v9fs_create_topen(fid, mode); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); - return errorno; + return ret; } /** @@ -239,14 +304,21 @@ v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **fcall) + struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; dprintk(DEBUG_9P, "fid %d\n", fid); - msg.id = TREMOVE; - msg.params.tremove.fid = fid; - return v9fs_mux_rpc(v9ses, &msg, fcall); + + tc = v9fs_create_tremove(fid); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; } /** @@ -262,20 +334,22 @@ v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, - u32 perm, u8 mode, struct v9fs_fcall **fcall) + u32 perm, u8 mode, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n", fid, name, perm, mode); - msg.id = TCREATE; - msg.params.tcreate.fid = fid; - msg.params.tcreate.name = name; - msg.params.tcreate.perm = perm; - msg.params.tcreate.mode = mode; + tc = v9fs_create_tcreate(fid, name, perm, mode); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); - return v9fs_mux_rpc(v9ses, &msg, fcall); + return ret; } /** @@ -290,31 +364,29 @@ v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, - u32 count, struct v9fs_fcall **fcall) + u32 count, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; - struct v9fs_fcall *rc = NULL; - long errorno = -1; - - dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid, - (long unsigned int)offset, count); - msg.id = TREAD; - msg.params.tread.fid = fid; - msg.params.tread.offset = offset; - msg.params.tread.count = count; - errorno = v9fs_mux_rpc(v9ses, &msg, &rc); - - if (!errorno) { - errorno = rc->params.rread.count; - dump_data(rc->params.rread.data, rc->params.rread.count); - } - - if (fcall) - *fcall = rc; - else - kfree(rc); - - return errorno; + int ret; + struct v9fs_fcall *tc, *rc; + + dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid, + (long long unsigned) offset, count); + + tc = v9fs_create_tread(fid, offset, count); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); + if (!ret) + ret = rc->params.rread.count; + if (rcp) + *rcp = rc; + else + kfree(rc); + + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; } /** @@ -328,32 +400,30 @@ v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, */ int -v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, - u64 offset, u32 count, void *data, struct v9fs_fcall **fcall) +v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count, + const char __user *data, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; - struct v9fs_fcall *rc = NULL; - long errorno = -1; + int ret; + struct v9fs_fcall *tc, *rc; - dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid, - (unsigned long long)offset, count); - dump_data(data, count); + dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid, + (long long unsigned) offset, count); - msg.id = TWRITE; - msg.params.twrite.fid = fid; - msg.params.twrite.offset = offset; - msg.params.twrite.count = count; - msg.params.twrite.data = data; + tc = v9fs_create_twrite(fid, offset, count, data); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); - errorno = v9fs_mux_rpc(v9ses, &msg, &rc); + if (!ret) + ret = rc->params.rwrite.count; + if (rcp) + *rcp = rc; + else + kfree(rc); - if (!errorno) - errorno = rc->params.rwrite.count; + kfree(tc); + } else + ret = PTR_ERR(tc); - if (fcall) - *fcall = rc; - else - kfree(rc); - - return errorno; + return ret; } + diff --git a/fs/9p/9p.h b/fs/9p/9p.h index f55424216be2..0cd374d94717 100644 --- a/fs/9p/9p.h +++ b/fs/9p/9p.h @@ -3,6 +3,7 @@ * * 9P protocol definitions. * + * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * @@ -100,9 +101,18 @@ enum { V9FS_QTFILE = 0x00, }; +#define V9FS_NOTAG (u16)(~0) +#define V9FS_NOFID (u32)(~0) +#define V9FS_MAXWELEM 16 + /* ample room for Twrite/Rread header (iounit) */ #define V9FS_IOHDRSZ 24 +struct v9fs_str { + u16 len; + char *str; +}; + /* qids are the unique ID for a file (like an inode */ struct v9fs_qid { u8 type; @@ -120,6 +130,29 @@ struct v9fs_stat { u32 atime; u32 mtime; u64 length; + struct v9fs_str name; + struct v9fs_str uid; + struct v9fs_str gid; + struct v9fs_str muid; + struct v9fs_str extension; /* 9p2000.u extensions */ + u32 n_uid; /* 9p2000.u extensions */ + u32 n_gid; /* 9p2000.u extensions */ + u32 n_muid; /* 9p2000.u extensions */ +}; + +/* file metadata (stat) structure used to create Twstat message + The is similar to v9fs_stat, but the strings don't point to + the same memory block and should be freed separately +*/ +struct v9fs_wstat { + u16 size; + u16 type; + u32 dev; + struct v9fs_qid qid; + u32 mode; + u32 atime; + u32 mtime; + u64 length; char *name; char *uid; char *gid; @@ -128,25 +161,24 @@ struct v9fs_stat { u32 n_uid; /* 9p2000.u extensions */ u32 n_gid; /* 9p2000.u extensions */ u32 n_muid; /* 9p2000.u extensions */ - char data[0]; }; /* Structures for Protocol Operations */ struct Tversion { u32 msize; - char *version; + struct v9fs_str version; }; struct Rversion { u32 msize; - char *version; + struct v9fs_str version; }; struct Tauth { u32 afid; - char *uname; - char *aname; + struct v9fs_str uname; + struct v9fs_str aname; }; struct Rauth { @@ -154,12 +186,12 @@ struct Rauth { }; struct Rerror { - char *error; + struct v9fs_str error; u32 errno; /* 9p2000.u extension */ }; struct Tflush { - u32 oldtag; + u16 oldtag; }; struct Rflush { @@ -168,8 +200,8 @@ struct Rflush { struct Tattach { u32 fid; u32 afid; - char *uname; - char *aname; + struct v9fs_str uname; + struct v9fs_str aname; }; struct Rattach { @@ -179,13 +211,13 @@ struct Rattach { struct Twalk { u32 fid; u32 newfid; - u32 nwname; - char **wnames; + u16 nwname; + struct v9fs_str wnames[16]; }; struct Rwalk { - u32 nwqid; - struct v9fs_qid *wqids; + u16 nwqid; + struct v9fs_qid wqids[16]; }; struct Topen { @@ -200,7 +232,7 @@ struct Ropen { struct Tcreate { u32 fid; - char *name; + struct v9fs_str name; u32 perm; u8 mode; }; @@ -251,12 +283,12 @@ struct Tstat { }; struct Rstat { - struct v9fs_stat *stat; + struct v9fs_stat stat; }; struct Twstat { u32 fid; - struct v9fs_stat *stat; + struct v9fs_stat stat; }; struct Rwstat { @@ -271,6 +303,7 @@ struct v9fs_fcall { u32 size; u8 id; u16 tag; + void *sdata; union { struct Tversion tversion; @@ -303,7 +336,9 @@ struct v9fs_fcall { } params; }; -#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "") +#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \ + fcall?fcall->params.rerror.error.len:0, \ + fcall?fcall->params.rerror.error.str:""); int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, char *version, struct v9fs_fcall **rcall); @@ -311,8 +346,7 @@ int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, u32 fid, u32 afid, struct v9fs_fcall **rcall); -int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **rcall); +int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid); int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag); @@ -320,7 +354,7 @@ int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcall); int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_stat *stat, struct v9fs_fcall **rcall); + struct v9fs_wstat *wstat, struct v9fs_fcall **rcall); int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, char *name, struct v9fs_fcall **rcall); @@ -338,4 +372,5 @@ int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count, struct v9fs_fcall **rcall); int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, - u32 count, void *data, struct v9fs_fcall **rcall); + u32 count, const char __user * data, + struct v9fs_fcall **rcall); diff --git a/fs/9p/Makefile b/fs/9p/Makefile index e4e4ffe5a7dc..3d023089707e 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile @@ -1,17 +1,17 @@ obj-$(CONFIG_9P_FS) := 9p2000.o 9p2000-objs := \ + trans_fd.o \ + trans_sock.o \ + mux.o \ + 9p.o \ + conv.o \ vfs_super.o \ vfs_inode.o \ vfs_file.o \ vfs_dir.o \ vfs_dentry.o \ error.o \ - mux.o \ - trans_fd.o \ - trans_sock.o \ - 9p.o \ - conv.o \ v9fs.o \ fid.o diff --git a/fs/9p/conv.c b/fs/9p/conv.c index 18121af99d3e..55ccfa10ee9e 100644 --- a/fs/9p/conv.c +++ b/fs/9p/conv.c @@ -30,7 +30,7 @@ #include <linux/errno.h> #include <linux/fs.h> #include <linux/idr.h> - +#include <asm/uaccess.h> #include "debug.h" #include "v9fs.h" #include "9p.h" @@ -58,12 +58,15 @@ static inline int buf_check_overflow(struct cbuf *buf) static inline int buf_check_size(struct cbuf *buf, int len) { - if (buf->p+len > buf->ep) { + if (buf->p + len > buf->ep) { if (buf->p < buf->ep) { - eprintk(KERN_ERR, "buffer overflow\n"); + eprintk(KERN_ERR, "buffer overflow: want %d has %d\n", + len, (int)(buf->ep - buf->p)); + dump_stack(); buf->p = buf->ep + 1; - return 0; } + + return 0; } return 1; @@ -127,14 +130,6 @@ static inline void buf_put_string(struct cbuf *buf, const char *s) buf_put_stringn(buf, s, strlen(s)); } -static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen) -{ - if (buf_check_size(buf, datalen)) { - memcpy(buf->p, data, datalen); - buf->p += datalen; - } -} - static inline u8 buf_get_int8(struct cbuf *buf) { u8 ret = 0; @@ -183,86 +178,37 @@ static inline u64 buf_get_int64(struct cbuf *buf) return ret; } -static inline int -buf_get_string(struct cbuf *buf, char *data, unsigned int datalen) -{ - u16 len = 0; - - len = buf_get_int16(buf); - if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) { - memcpy(data, buf->p, len); - data[len] = 0; - buf->p += len; - len++; - } - - return len; -} - -static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf) -{ - char *ret; - u16 len; - - ret = NULL; - len = buf_get_int16(buf); - - if (!buf_check_overflow(buf) && buf_check_size(buf, len) && - buf_check_size(sbuf, len+1)) { - - memcpy(sbuf->p, buf->p, len); - sbuf->p[len] = 0; - ret = sbuf->p; - buf->p += len; - sbuf->p += len + 1; - } - - return ret; -} - -static inline int buf_get_data(struct cbuf *buf, void *data, int datalen) +static inline void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr) { - int ret = 0; - - if (buf_check_size(buf, datalen)) { - memcpy(data, buf->p, datalen); - buf->p += datalen; - ret = datalen; + vstr->len = buf_get_int16(buf); + if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) { + vstr->str = buf->p; + buf->p += vstr->len; + } else { + vstr->len = 0; + vstr->str = NULL; } - - return ret; } -static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf, - int datalen) +static inline void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid) { - char *ret = NULL; - int n = 0; - - if (buf_check_size(dbuf, datalen)) { - n = buf_get_data(buf, dbuf->p, datalen); - if (n > 0) { - ret = dbuf->p; - dbuf->p += n; - } - } - - return ret; + qid->type = buf_get_int8(bufp); + qid->version = buf_get_int32(bufp); + qid->path = buf_get_int64(bufp); } /** - * v9fs_size_stat - calculate the size of a variable length stat struct - * @v9ses: session information + * v9fs_size_wstat - calculate the size of a variable length stat struct * @stat: metadata (stat) structure + * @extended: non-zero if 9P2000.u * */ -static int v9fs_size_stat(struct v9fs_session_info *v9ses, - struct v9fs_stat *stat) +static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended) { int size = 0; - if (stat == NULL) { + if (wstat == NULL) { eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n"); return 0; } @@ -279,82 +225,38 @@ static int v9fs_size_stat(struct v9fs_session_info *v9ses, 8 + /* length[8] */ 8; /* minimum sum of string lengths */ - if (stat->name) - size += strlen(stat->name); - if (stat->uid) - size += strlen(stat->uid); - if (stat->gid) - size += strlen(stat->gid); - if (stat->muid) - size += strlen(stat->muid); + if (wstat->name) + size += strlen(wstat->name); + if (wstat->uid) + size += strlen(wstat->uid); + if (wstat->gid) + size += strlen(wstat->gid); + if (wstat->muid) + size += strlen(wstat->muid); - if (v9ses->extended) { + if (extended) { size += 4 + /* n_uid[4] */ 4 + /* n_gid[4] */ 4 + /* n_muid[4] */ 2; /* string length of extension[4] */ - if (stat->extension) - size += strlen(stat->extension); + if (wstat->extension) + size += strlen(wstat->extension); } return size; } /** - * serialize_stat - safely format a stat structure for transmission - * @v9ses: session info - * @stat: metadata (stat) structure - * @bufp: buffer to serialize structure into - * - */ - -static int -serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat, - struct cbuf *bufp) -{ - buf_put_int16(bufp, stat->size); - buf_put_int16(bufp, stat->type); - buf_put_int32(bufp, stat->dev); - buf_put_int8(bufp, stat->qid.type); - buf_put_int32(bufp, stat->qid.version); - buf_put_int64(bufp, stat->qid.path); - buf_put_int32(bufp, stat->mode); - buf_put_int32(bufp, stat->atime); - buf_put_int32(bufp, stat->mtime); - buf_put_int64(bufp, stat->length); - - buf_put_string(bufp, stat->name); - buf_put_string(bufp, stat->uid); - buf_put_string(bufp, stat->gid); - buf_put_string(bufp, stat->muid); - - if (v9ses->extended) { - buf_put_string(bufp, stat->extension); - buf_put_int32(bufp, stat->n_uid); - buf_put_int32(bufp, stat->n_gid); - buf_put_int32(bufp, stat->n_muid); - } - - if (buf_check_overflow(bufp)) - return 0; - - return stat->size; -} - -/** - * deserialize_stat - safely decode a recieved metadata (stat) structure - * @v9ses: session info + * buf_get_stat - safely decode a recieved metadata (stat) structure * @bufp: buffer to deserialize * @stat: metadata (stat) structure - * @dbufp: buffer to deserialize variable strings into + * @extended: non-zero if 9P2000.u * */ -static inline int -deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp, - struct v9fs_stat *stat, struct cbuf *dbufp) +static inline void +buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended) { - stat->size = buf_get_int16(bufp); stat->type = buf_get_int16(bufp); stat->dev = buf_get_int32(bufp); @@ -365,282 +267,82 @@ deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp, stat->atime = buf_get_int32(bufp); stat->mtime = buf_get_int32(bufp); stat->length = buf_get_int64(bufp); - stat->name = buf_get_stringb(bufp, dbufp); - stat->uid = buf_get_stringb(bufp, dbufp); - stat->gid = buf_get_stringb(bufp, dbufp); - stat->muid = buf_get_stringb(bufp, dbufp); + buf_get_str(bufp, &stat->name); + buf_get_str(bufp, &stat->uid); + buf_get_str(bufp, &stat->gid); + buf_get_str(bufp, &stat->muid); - if (v9ses->extended) { - stat->extension = buf_get_stringb(bufp, dbufp); + if (extended) { + buf_get_str(bufp, &stat->extension); stat->n_uid = buf_get_int32(bufp); stat->n_gid = buf_get_int32(bufp); stat->n_muid = buf_get_int32(bufp); } - - if (buf_check_overflow(bufp) || buf_check_overflow(dbufp)) - return 0; - - return stat->size + 2; -} - -/** - * deserialize_statb - wrapper for decoding a received metadata structure - * @v9ses: session info - * @bufp: buffer to deserialize - * @dbufp: buffer to deserialize variable strings into - * - */ - -static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info - *v9ses, struct cbuf *bufp, - struct cbuf *dbufp) -{ - struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat)); - - if (ret) { - int n = deserialize_stat(v9ses, bufp, ret, dbufp); - if (n <= 0) - return NULL; - } - - return ret; } /** * v9fs_deserialize_stat - decode a received metadata structure - * @v9ses: session info * @buf: buffer to deserialize * @buflen: length of received buffer * @stat: metadata structure to decode into - * @statlen: length of destination metadata structure + * @extended: non-zero if 9P2000.u * + * Note: stat will point to the buf region. */ int -v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf, - u32 buflen, struct v9fs_stat *stat, u32 statlen) +v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat, + int extended) { struct cbuf buffer; struct cbuf *bufp = &buffer; - struct cbuf dbuffer; - struct cbuf *dbufp = &dbuffer; + unsigned char *p; buf_init(bufp, buf, buflen); - buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat), - statlen - sizeof(struct v9fs_stat)); - - return deserialize_stat(v9ses, bufp, stat, dbufp); -} - -static inline int -v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall) -{ - int size = 4 + 1 + 2; /* size[4] msg[1] tag[2] */ - int i = 0; - - switch (fcall->id) { - default: - eprintk(KERN_ERR, "bad msg type %d\n", fcall->id); - return 0; - case TVERSION: /* msize[4] version[s] */ - size += 4 + 2 + strlen(fcall->params.tversion.version); - break; - case TAUTH: /* afid[4] uname[s] aname[s] */ - size += 4 + 2 + strlen(fcall->params.tauth.uname) + - 2 + strlen(fcall->params.tauth.aname); - break; - case TFLUSH: /* oldtag[2] */ - size += 2; - break; - case TATTACH: /* fid[4] afid[4] uname[s] aname[s] */ - size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) + - 2 + strlen(fcall->params.tattach.aname); - break; - case TWALK: /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */ - size += 4 + 4 + 2; - /* now compute total for the array of names */ - for (i = 0; i < fcall->params.twalk.nwname; i++) - size += 2 + strlen(fcall->params.twalk.wnames[i]); - break; - case TOPEN: /* fid[4] mode[1] */ - size += 4 + 1; - break; - case TCREATE: /* fid[4] name[s] perm[4] mode[1] */ - size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1; - break; - case TREAD: /* fid[4] offset[8] count[4] */ - size += 4 + 8 + 4; - break; - case TWRITE: /* fid[4] offset[8] count[4] data[count] */ - size += 4 + 8 + 4 + fcall->params.twrite.count; - break; - case TCLUNK: /* fid[4] */ - size += 4; - break; - case TREMOVE: /* fid[4] */ - size += 4; - break; - case TSTAT: /* fid[4] */ - size += 4; - break; - case TWSTAT: /* fid[4] stat[n] */ - fcall->params.twstat.stat->size = - v9fs_size_stat(v9ses, fcall->params.twstat.stat); - size += 4 + 2 + 2 + fcall->params.twstat.stat->size; - } - return size; -} - -/* - * v9fs_serialize_fcall - marshall fcall struct into a packet - * @v9ses: session information - * @fcall: structure to convert - * @data: buffer to serialize fcall into - * @datalen: length of buffer to serialize fcall into - * - */ - -int -v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall, - void *data, u32 datalen) -{ - int i = 0; - struct v9fs_stat *stat = NULL; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - buf_init(bufp, data, datalen); - - if (!fcall) { - eprintk(KERN_ERR, "no fcall\n"); - return -EINVAL; - } - - fcall->size = v9fs_size_fcall(v9ses, fcall); - - buf_put_int32(bufp, fcall->size); - buf_put_int8(bufp, fcall->id); - buf_put_int16(bufp, fcall->tag); - - dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id, - fcall->tag); - - /* now encode it */ - switch (fcall->id) { - default: - eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id); - return -EPROTO; - case TVERSION: - buf_put_int32(bufp, fcall->params.tversion.msize); - buf_put_string(bufp, fcall->params.tversion.version); - break; - case TAUTH: - buf_put_int32(bufp, fcall->params.tauth.afid); - buf_put_string(bufp, fcall->params.tauth.uname); - buf_put_string(bufp, fcall->params.tauth.aname); - break; - case TFLUSH: - buf_put_int16(bufp, fcall->params.tflush.oldtag); - break; - case TATTACH: - buf_put_int32(bufp, fcall->params.tattach.fid); - buf_put_int32(bufp, fcall->params.tattach.afid); - buf_put_string(bufp, fcall->params.tattach.uname); - buf_put_string(bufp, fcall->params.tattach.aname); - break; - case TWALK: - buf_put_int32(bufp, fcall->params.twalk.fid); - buf_put_int32(bufp, fcall->params.twalk.newfid); - buf_put_int16(bufp, fcall->params.twalk.nwname); - for (i = 0; i < fcall->params.twalk.nwname; i++) - buf_put_string(bufp, fcall->params.twalk.wnames[i]); - break; - case TOPEN: - buf_put_int32(bufp, fcall->params.topen.fid); - buf_put_int8(bufp, fcall->params.topen.mode); - break; - case TCREATE: - buf_put_int32(bufp, fcall->params.tcreate.fid); - buf_put_string(bufp, fcall->params.tcreate.name); - buf_put_int32(bufp, fcall->params.tcreate.perm); - buf_put_int8(bufp, fcall->params.tcreate.mode); - break; - case TREAD: - buf_put_int32(bufp, fcall->params.tread.fid); - buf_put_int64(bufp, fcall->params.tread.offset); - buf_put_int32(bufp, fcall->params.tread.count); - break; - case TWRITE: - buf_put_int32(bufp, fcall->params.twrite.fid); - buf_put_int64(bufp, fcall->params.twrite.offset); - buf_put_int32(bufp, fcall->params.twrite.count); - buf_put_data(bufp, fcall->params.twrite.data, - fcall->params.twrite.count); - break; - case TCLUNK: - buf_put_int32(bufp, fcall->params.tclunk.fid); - break; - case TREMOVE: - buf_put_int32(bufp, fcall->params.tremove.fid); - break; - case TSTAT: - buf_put_int32(bufp, fcall->params.tstat.fid); - break; - case TWSTAT: - buf_put_int32(bufp, fcall->params.twstat.fid); - stat = fcall->params.twstat.stat; - - buf_put_int16(bufp, stat->size + 2); - serialize_stat(v9ses, stat, bufp); - break; - } + p = bufp->p; + buf_get_stat(bufp, stat, extended); if (buf_check_overflow(bufp)) - return -EIO; - - return fcall->size; + return 0; + else + return bufp->p - p; } /** * deserialize_fcall - unmarshal a response - * @v9ses: session information - * @msgsize: size of rcall message * @buf: recieved buffer * @buflen: length of received buffer * @rcall: fcall structure to populate * @rcalllen: length of fcall structure to populate + * @extended: non-zero if 9P2000.u * */ int -v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, - void *buf, u32 buflen, struct v9fs_fcall *rcall, - int rcalllen) +v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall, + int extended) { struct cbuf buffer; struct cbuf *bufp = &buffer; - struct cbuf dbuffer; - struct cbuf *dbufp = &dbuffer; int i = 0; buf_init(bufp, buf, buflen); - buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall), - rcalllen - sizeof(struct v9fs_fcall)); - rcall->size = msgsize; + rcall->size = buf_get_int32(bufp); rcall->id = buf_get_int8(bufp); rcall->tag = buf_get_int16(bufp); dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id, rcall->tag); + switch (rcall->id) { default: eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id); return -EPROTO; case RVERSION: rcall->params.rversion.msize = buf_get_int32(bufp); - rcall->params.rversion.version = buf_get_stringb(bufp, dbufp); + buf_get_str(bufp, &rcall->params.rversion.version); break; case RFLUSH: break; @@ -651,34 +353,27 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, break; case RWALK: rcall->params.rwalk.nwqid = buf_get_int16(bufp); - rcall->params.rwalk.wqids = buf_alloc(dbufp, - rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid)); - if (rcall->params.rwalk.wqids) - for (i = 0; i < rcall->params.rwalk.nwqid; i++) { - rcall->params.rwalk.wqids[i].type = - buf_get_int8(bufp); - rcall->params.rwalk.wqids[i].version = - buf_get_int16(bufp); - rcall->params.rwalk.wqids[i].path = - buf_get_int64(bufp); - } + if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) { + eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n", + V9FS_MAXWELEM, rcall->params.rwalk.nwqid); + return -EPROTO; + } + + for (i = 0; i < rcall->params.rwalk.nwqid; i++) + buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]); break; case ROPEN: - rcall->params.ropen.qid.type = buf_get_int8(bufp); - rcall->params.ropen.qid.version = buf_get_int32(bufp); - rcall->params.ropen.qid.path = buf_get_int64(bufp); + buf_get_qid(bufp, &rcall->params.ropen.qid); rcall->params.ropen.iounit = buf_get_int32(bufp); break; case RCREATE: - rcall->params.rcreate.qid.type = buf_get_int8(bufp); - rcall->params.rcreate.qid.version = buf_get_int32(bufp); - rcall->params.rcreate.qid.path = buf_get_int64(bufp); + buf_get_qid(bufp, &rcall->params.rcreate.qid); rcall->params.rcreate.iounit = buf_get_int32(bufp); break; case RREAD: rcall->params.rread.count = buf_get_int32(bufp); - rcall->params.rread.data = buf_get_datab(bufp, dbufp, - rcall->params.rread.count); + rcall->params.rread.data = bufp->p; + buf_check_size(bufp, rcall->params.rread.count); break; case RWRITE: rcall->params.rwrite.count = buf_get_int32(bufp); @@ -689,20 +384,443 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, break; case RSTAT: buf_get_int16(bufp); - rcall->params.rstat.stat = - deserialize_statb(v9ses, bufp, dbufp); + buf_get_stat(bufp, &rcall->params.rstat.stat, extended); break; case RWSTAT: break; case RERROR: - rcall->params.rerror.error = buf_get_stringb(bufp, dbufp); - if (v9ses->extended) + buf_get_str(bufp, &rcall->params.rerror.error); + if (extended) rcall->params.rerror.errno = buf_get_int16(bufp); break; } - if (buf_check_overflow(bufp) || buf_check_overflow(dbufp)) + if (buf_check_overflow(bufp)) { + dprintk(DEBUG_ERROR, "buffer overflow\n"); return -EIO; + } + + return bufp->p - bufp->sp; +} + +static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p) +{ + *p = val; + buf_put_int8(bufp, val); +} + +static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p) +{ + *p = val; + buf_put_int16(bufp, val); +} + +static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p) +{ + *p = val; + buf_put_int32(bufp, val); +} + +static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p) +{ + *p = val; + buf_put_int64(bufp, val); +} - return rcall->size; +static inline void +v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str) +{ + if (data) { + str->len = strlen(data); + str->str = bufp->p; + } else { + str->len = 0; + str->str = NULL; + } + + buf_put_stringn(bufp, data, str->len); +} + +static inline int +v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count, + unsigned char **pdata) +{ + *pdata = buf_alloc(bufp, count); + return copy_from_user(*pdata, data, count); +} + +static void +v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat, + struct v9fs_stat *stat, int statsz, int extended) +{ + v9fs_put_int16(bufp, statsz, &stat->size); + v9fs_put_int16(bufp, wstat->type, &stat->type); + v9fs_put_int32(bufp, wstat->dev, &stat->dev); + v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type); + v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version); + v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path); + v9fs_put_int32(bufp, wstat->mode, &stat->mode); + v9fs_put_int32(bufp, wstat->atime, &stat->atime); + v9fs_put_int32(bufp, wstat->mtime, &stat->mtime); + v9fs_put_int64(bufp, wstat->length, &stat->length); + + v9fs_put_str(bufp, wstat->name, &stat->name); + v9fs_put_str(bufp, wstat->uid, &stat->uid); + v9fs_put_str(bufp, wstat->gid, &stat->gid); + v9fs_put_str(bufp, wstat->muid, &stat->muid); + + if (extended) { + v9fs_put_str(bufp, wstat->extension, &stat->extension); + v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid); + v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid); + v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid); + } +} + +static struct v9fs_fcall * +v9fs_create_common(struct cbuf *bufp, u32 size, u8 id) +{ + struct v9fs_fcall *fc; + + size += 4 + 1 + 2; /* size[4] id[1] tag[2] */ + fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL); + if (!fc) + return ERR_PTR(-ENOMEM); + + fc->sdata = (char *)fc + sizeof(*fc); + + buf_init(bufp, (char *)fc->sdata, size); + v9fs_put_int32(bufp, size, &fc->size); + v9fs_put_int8(bufp, id, &fc->id); + v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag); + + return fc; +} + +void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag) +{ + fc->tag = tag; + *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag); +} + +struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 2 + strlen(version); /* msize[4] version[s] */ + fc = v9fs_create_common(bufp, size, TVERSION); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, msize, &fc->params.tversion.msize); + v9fs_put_str(bufp, version, &fc->params.tversion.version); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 2 + strlen(uname) + 2 + strlen(aname); /* afid[4] uname[s] aname[s] */ + fc = v9fs_create_common(bufp, size, TAUTH); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, afid, &fc->params.tauth.afid); + v9fs_put_str(bufp, uname, &fc->params.tauth.uname); + v9fs_put_str(bufp, aname, &fc->params.tauth.aname); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall * +v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname); /* fid[4] afid[4] uname[s] aname[s] */ + fc = v9fs_create_common(bufp, size, TATTACH); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tattach.fid); + v9fs_put_int32(bufp, afid, &fc->params.tattach.afid); + v9fs_put_str(bufp, uname, &fc->params.tattach.uname); + v9fs_put_str(bufp, aname, &fc->params.tattach.aname); + + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tflush(u16 oldtag) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 2; /* oldtag[2] */ + fc = v9fs_create_common(bufp, size, TFLUSH); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname, + char **wnames) +{ + int i, size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + if (nwname > V9FS_MAXWELEM) { + dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM); + return NULL; + } + + size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */ + for (i = 0; i < nwname; i++) { + size += 2 + strlen(wnames[i]); /* wname[s] */ + } + + fc = v9fs_create_common(bufp, size, TWALK); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.twalk.fid); + v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid); + v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname); + for (i = 0; i < nwname; i++) { + v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]); + } + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 1; /* fid[4] mode[1] */ + fc = v9fs_create_common(bufp, size, TOPEN); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.topen.fid); + v9fs_put_int8(bufp, mode, &fc->params.topen.mode); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */ + fc = v9fs_create_common(bufp, size, TCREATE); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid); + v9fs_put_str(bufp, name, &fc->params.tcreate.name); + v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm); + v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */ + fc = v9fs_create_common(bufp, size, TREAD); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tread.fid); + v9fs_put_int64(bufp, offset, &fc->params.tread.offset); + v9fs_put_int32(bufp, count, &fc->params.tread.count); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count, + const char __user * data) +{ + int size, err; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 8 + 4 + count; /* fid[4] offset[8] count[4] data[count] */ + fc = v9fs_create_common(bufp, size, TWRITE); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.twrite.fid); + v9fs_put_int64(bufp, offset, &fc->params.twrite.offset); + v9fs_put_int32(bufp, count, &fc->params.twrite.count); + err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data); + if (err) { + kfree(fc); + fc = ERR_PTR(err); + } + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tclunk(u32 fid) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4; /* fid[4] */ + fc = v9fs_create_common(bufp, size, TCLUNK); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tremove(u32 fid) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4; /* fid[4] */ + fc = v9fs_create_common(bufp, size, TREMOVE); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tremove.fid); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tstat(u32 fid) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4; /* fid[4] */ + fc = v9fs_create_common(bufp, size, TSTAT); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tstat.fid); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat, + int extended) +{ + int size, statsz; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + statsz = v9fs_size_wstat(wstat, extended); + size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */ + fc = v9fs_create_common(bufp, size, TWSTAT); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.twstat.fid); + buf_put_int16(bufp, statsz + 2); + v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; } diff --git a/fs/9p/conv.h b/fs/9p/conv.h index ee849613c61a..26a736e4a2e7 100644 --- a/fs/9p/conv.h +++ b/fs/9p/conv.h @@ -1,8 +1,9 @@ /* * linux/fs/9p/conv.h * - * 9P protocol conversion definitions + * 9P protocol conversion definitions. * + * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * @@ -24,13 +25,27 @@ * */ -int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf, - u32 buflen, struct v9fs_stat *stat, u32 statlen); -int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall, - void *buf, u32 buflen); -int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen, - void *buf, u32 buflen, struct v9fs_fcall *rcall, - int rcalllen); +int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat, + int extended); +int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall, + int extended); -/* this one is actually in error.c right now */ -int v9fs_errstr2errno(char *errstr); +void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag); + +struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version); +struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname); +struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname, + char *aname); +struct v9fs_fcall *v9fs_create_tflush(u16 oldtag); +struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname, + char **wnames); +struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode); +struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode); +struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count); +struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count, + const char __user *data); +struct v9fs_fcall *v9fs_create_tclunk(u32 fid); +struct v9fs_fcall *v9fs_create_tremove(u32 fid); +struct v9fs_fcall *v9fs_create_tstat(u32 fid); +struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat, + int extended); diff --git a/fs/9p/debug.h b/fs/9p/debug.h index 4445f06919d9..fe551032788b 100644 --- a/fs/9p/debug.h +++ b/fs/9p/debug.h @@ -51,16 +51,23 @@ do { \ #if DEBUG_DUMP_PKT static inline void dump_data(const unsigned char *data, unsigned int datalen) { - int i, j; - int len = datalen; + int i, n; + char buf[5*8]; - printk(KERN_DEBUG "data "); - for (i = 0; i < len; i += 4) { - for (j = 0; (j < 4) && (i + j < len); j++) - printk(KERN_DEBUG "%02x", data[i + j]); - printk(KERN_DEBUG " "); + n = 0; + i = 0; + while (i < datalen) { + n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]); + if (i%4 == 0) + n += snprintf(buf+n, sizeof(buf)-n, " "); + + if (i%16 == 0) { + dprintk(DEBUG_ERROR, "%s\n", buf); + n = 0; + } } - printk(KERN_DEBUG "\n"); + + dprintk(DEBUG_ERROR, "%s\n", buf); } #else /* DEBUG_DUMP_PKT */ static inline void dump_data(const unsigned char *data, unsigned int datalen) diff --git a/fs/9p/error.c b/fs/9p/error.c index 834cb179e388..e4b6f8f38b6f 100644 --- a/fs/9p/error.c +++ b/fs/9p/error.c @@ -33,7 +33,6 @@ #include <linux/list.h> #include <linux/jhash.h> -#include <linux/string.h> #include "debug.h" #include "error.h" @@ -55,7 +54,8 @@ int v9fs_error_init(void) /* load initial error map into hash table */ for (c = errmap; c->name != NULL; c++) { - bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ; + c->namelen = strlen(c->name); + bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ; INIT_HLIST_NODE(&c->list); hlist_add_head(&c->list, &hash_errmap[bucket]); } @@ -69,15 +69,15 @@ int v9fs_error_init(void) * */ -int v9fs_errstr2errno(char *errstr) +int v9fs_errstr2errno(char *errstr, int len) { int errno = 0; struct hlist_node *p = NULL; struct errormap *c = NULL; - int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ; + int bucket = jhash(errstr, len, 0) % ERRHASHSZ; hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { - if (!strcmp(c->name, errstr)) { + if (c->namelen==len && !memcmp(c->name, errstr, len)) { errno = c->val; break; } diff --git a/fs/9p/error.h b/fs/9p/error.h index 78f89acf7c9a..a9794e85fe51 100644 --- a/fs/9p/error.h +++ b/fs/9p/error.h @@ -36,6 +36,7 @@ struct errormap { char *name; int val; + int namelen; struct hlist_node list; }; @@ -175,4 +176,3 @@ static struct errormap errmap[] = { }; extern int v9fs_error_init(void); -extern int v9fs_errstr2errno(char *errstr); diff --git a/fs/9p/fid.c b/fs/9p/fid.c index d95f8626d170..eda449778fa5 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -31,9 +31,6 @@ #include "v9fs.h" #include "9p.h" #include "v9fs_vfs.h" -#include "transport.h" -#include "mux.h" -#include "conv.h" #include "fid.h" /** @@ -164,7 +161,7 @@ static struct v9fs_fid *v9fs_fid_walk_up(struct dentry *dentry) return v9fs_fid_create(dentry, v9ses, fidnum, 0); clunk_fid: - v9fs_t_clunk(v9ses, fidnum, NULL); + v9fs_t_clunk(v9ses, fidnum); return ERR_PTR(err); } diff --git a/fs/9p/mux.c b/fs/9p/mux.c index 8835b576f744..945cb368d451 100644 --- a/fs/9p/mux.c +++ b/fs/9p/mux.c @@ -4,7 +4,7 @@ * Protocol Multiplexer * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net> + * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,448 +28,943 @@ #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/poll.h> #include <linux/kthread.h> #include <linux/idr.h> #include "debug.h" #include "v9fs.h" #include "9p.h" -#include "transport.h" #include "conv.h" +#include "transport.h" #include "mux.h" +#define ERREQFLUSH 1 +#define SCHED_TIMEOUT 10 +#define MAXPOLLWADDR 2 + +enum { + Rworksched = 1, /* read work scheduled or running */ + Rpending = 2, /* can read */ + Wworksched = 4, /* write work scheduled or running */ + Wpending = 8, /* can write */ +}; + +struct v9fs_mux_poll_task; + +struct v9fs_req { + int tag; + struct v9fs_fcall *tcall; + struct v9fs_fcall *rcall; + int err; + v9fs_mux_req_callback cb; + void *cba; + struct list_head req_list; +}; + +struct v9fs_mux_data { + spinlock_t lock; + struct list_head mux_list; + struct v9fs_mux_poll_task *poll_task; + int msize; + unsigned char *extended; + struct v9fs_transport *trans; + struct v9fs_idpool tidpool; + int err; + wait_queue_head_t equeue; + struct list_head req_list; + struct list_head unsent_req_list; + struct v9fs_fcall *rcall; + int rpos; + char *rbuf; + int wpos; + int wsize; + char *wbuf; + wait_queue_t poll_wait[MAXPOLLWADDR]; + wait_queue_head_t *poll_waddr[MAXPOLLWADDR]; + poll_table pt; + struct work_struct rq; + struct work_struct wq; + unsigned long wsched; +}; + +struct v9fs_mux_poll_task { + struct task_struct *task; + struct list_head mux_list; + int muxnum; +}; + +struct v9fs_mux_rpc { + struct v9fs_mux_data *m; + struct v9fs_req *req; + int err; + struct v9fs_fcall *rcall; + wait_queue_head_t wqueue; +}; + +static int v9fs_poll_proc(void *); +static void v9fs_read_work(void *); +static void v9fs_write_work(void *); +static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address, + poll_table * p); +static u16 v9fs_mux_get_tag(struct v9fs_mux_data *); +static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16); + +static DECLARE_MUTEX(v9fs_mux_task_lock); +static struct workqueue_struct *v9fs_mux_wq; + +static int v9fs_mux_num; +static int v9fs_mux_poll_task_num; +static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100]; + +int v9fs_mux_global_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) + v9fs_mux_poll_tasks[i].task = NULL; + + v9fs_mux_wq = create_workqueue("v9fs"); + if (!v9fs_mux_wq) + return -ENOMEM; + + return 0; +} + +void v9fs_mux_global_exit(void) +{ + destroy_workqueue(v9fs_mux_wq); +} + /** - * dprintcond - print condition of session info - * @v9ses: session info structure - * @req: RPC request structure + * v9fs_mux_calc_poll_procs - calculates the number of polling procs + * based on the number of mounted v9fs filesystems. * + * The current implementation returns sqrt of the number of mounts. */ +inline int v9fs_mux_calc_poll_procs(int muxnum) +{ + int n; + + if (v9fs_mux_poll_task_num) + n = muxnum / v9fs_mux_poll_task_num + + (muxnum % v9fs_mux_poll_task_num ? 1 : 0); + else + n = 1; + + if (n > ARRAY_SIZE(v9fs_mux_poll_tasks)) + n = ARRAY_SIZE(v9fs_mux_poll_tasks); + + return n; +} -static inline int -dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) +static int v9fs_mux_poll_start(struct v9fs_mux_data *m) { - dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status, - req->rcall); + int i, n; + struct v9fs_mux_poll_task *vpt, *vptlast; + struct task_struct *pproc; + + dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num, + v9fs_mux_poll_task_num); + up(&v9fs_mux_task_lock); + + n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1); + if (n > v9fs_mux_poll_task_num) { + for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) { + if (v9fs_mux_poll_tasks[i].task == NULL) { + vpt = &v9fs_mux_poll_tasks[i]; + dprintk(DEBUG_MUX, "create proc %p\n", vpt); + pproc = kthread_create(v9fs_poll_proc, vpt, + "v9fs-poll"); + + if (!IS_ERR(pproc)) { + vpt->task = pproc; + INIT_LIST_HEAD(&vpt->mux_list); + vpt->muxnum = 0; + v9fs_mux_poll_task_num++; + wake_up_process(vpt->task); + } + break; + } + } + + if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) + dprintk(DEBUG_ERROR, "warning: no free poll slots\n"); + } + + n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num + + ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0); + + vptlast = NULL; + for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) { + vpt = &v9fs_mux_poll_tasks[i]; + if (vpt->task != NULL) { + vptlast = vpt; + if (vpt->muxnum < n) { + dprintk(DEBUG_MUX, "put in proc %d\n", i); + list_add(&m->mux_list, &vpt->mux_list); + vpt->muxnum++; + m->poll_task = vpt; + memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); + init_poll_funcptr(&m->pt, v9fs_pollwait); + break; + } + } + } + + if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) { + if (vptlast == NULL) + return -ENOMEM; + + dprintk(DEBUG_MUX, "put in proc %d\n", i); + list_add(&m->mux_list, &vptlast->mux_list); + vptlast->muxnum++; + m->poll_task = vptlast; + memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); + init_poll_funcptr(&m->pt, v9fs_pollwait); + } + + v9fs_mux_num++; + down(&v9fs_mux_task_lock); + return 0; } +static void v9fs_mux_poll_stop(struct v9fs_mux_data *m) +{ + int i; + struct v9fs_mux_poll_task *vpt; + + up(&v9fs_mux_task_lock); + vpt = m->poll_task; + list_del(&m->mux_list); + for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { + if (m->poll_waddr[i] != NULL) { + remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]); + m->poll_waddr[i] = NULL; + } + } + vpt->muxnum--; + if (!vpt->muxnum) { + dprintk(DEBUG_MUX, "destroy proc %p\n", vpt); + send_sig(SIGKILL, vpt->task, 1); + vpt->task = NULL; + v9fs_mux_poll_task_num--; + } + v9fs_mux_num--; + down(&v9fs_mux_task_lock); +} + /** - * xread - force read of a certain number of bytes - * @v9ses: session info structure - * @ptr: pointer to buffer - * @sz: number of bytes to read + * v9fs_mux_init - allocate and initialize the per-session mux data + * Creates the polling task if this is the first session. * - * Chuck Cranor CS-533 project1 + * @trans - transport structure + * @msize - maximum message size + * @extended - pointer to the extended flag */ - -static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz) +struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize, + unsigned char *extended) { - int rd = 0; - int ret = 0; - while (rd < sz) { - ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd); - if (ret <= 0) { - dprintk(DEBUG_ERROR, "xread errno %d\n", ret); - return ret; + int i, n; + struct v9fs_mux_data *m, *mtmp; + + dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize); + m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL); + if (!m) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&m->lock); + INIT_LIST_HEAD(&m->mux_list); + m->msize = msize; + m->extended = extended; + m->trans = trans; + idr_init(&m->tidpool.pool); + init_MUTEX(&m->tidpool.lock); + m->err = 0; + init_waitqueue_head(&m->equeue); + INIT_LIST_HEAD(&m->req_list); + INIT_LIST_HEAD(&m->unsent_req_list); + m->rcall = NULL; + m->rpos = 0; + m->rbuf = NULL; + m->wpos = m->wsize = 0; + m->wbuf = NULL; + INIT_WORK(&m->rq, v9fs_read_work, m); + INIT_WORK(&m->wq, v9fs_write_work, m); + m->wsched = 0; + memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); + m->poll_task = NULL; + n = v9fs_mux_poll_start(m); + if (n) + return ERR_PTR(n); + + n = trans->poll(trans, &m->pt); + if (n & POLLIN) { + dprintk(DEBUG_MUX, "mux %p can read\n", m); + set_bit(Rpending, &m->wsched); + } + + if (n & POLLOUT) { + dprintk(DEBUG_MUX, "mux %p can write\n", m); + set_bit(Wpending, &m->wsched); + } + + for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { + if (IS_ERR(m->poll_waddr[i])) { + v9fs_mux_poll_stop(m); + mtmp = (void *)m->poll_waddr; /* the error code */ + kfree(m); + m = mtmp; + break; } - rd += ret; - ptr += ret; } - return (rd); + + return m; } /** - * read_message - read a full 9P2000 fcall packet - * @v9ses: session info structure - * @rcall: fcall structure to read into - * @rcalllen: size of fcall buffer - * + * v9fs_mux_destroy - cancels all pending requests and frees mux resources */ +void v9fs_mux_destroy(struct v9fs_mux_data *m) +{ + dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m, + m->mux_list.prev, m->mux_list.next); + v9fs_mux_cancel(m, -ECONNRESET); + + if (!list_empty(&m->req_list)) { + /* wait until all processes waiting on this session exit */ + dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n", + m); + wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); + dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m, + list_empty(&m->req_list)); + } + + v9fs_mux_poll_stop(m); + m->trans = NULL; + + kfree(m); +} -static int -read_message(struct v9fs_session_info *v9ses, - struct v9fs_fcall *rcall, int rcalllen) +/** + * v9fs_pollwait - called by files poll operation to add v9fs-poll task + * to files wait queue + */ +static void +v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address, + poll_table * p) { - unsigned char buf[4]; - void *data; - int size = 0; - int res = 0; - - res = xread(v9ses, buf, sizeof(buf)); - if (res < 0) { - dprintk(DEBUG_ERROR, - "Reading of count field failed returned: %d\n", res); - return res; + int i; + struct v9fs_mux_data *m; + + m = container_of(p, struct v9fs_mux_data, pt); + for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) + if (m->poll_waddr[i] == NULL) + break; + + if (i >= ARRAY_SIZE(m->poll_waddr)) { + dprintk(DEBUG_ERROR, "not enough wait_address slots\n"); + return; } - if (res < 4) { - dprintk(DEBUG_ERROR, - "Reading of count field failed returned: %d\n", res); - return -EIO; + m->poll_waddr[i] = wait_address; + + if (!wait_address) { + dprintk(DEBUG_ERROR, "no wait_address\n"); + m->poll_waddr[i] = ERR_PTR(-EIO); + return; } - size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); - dprintk(DEBUG_MUX, "got a packet count: %d\n", size); + init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task); + add_wait_queue(wait_address, &m->poll_wait[i]); +} + +/** + * v9fs_poll_mux - polls a mux and schedules read or write works if necessary + */ +static inline void v9fs_poll_mux(struct v9fs_mux_data *m) +{ + int n; - /* adjust for the four bytes of size */ - size -= 4; + if (m->err < 0) + return; - if (size > v9ses->maxdata) { - dprintk(DEBUG_ERROR, "packet too big: %d\n", size); - return -E2BIG; + n = m->trans->poll(m->trans, NULL); + if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { + dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n); + if (n >= 0) + n = -ECONNRESET; + v9fs_mux_cancel(m, n); } - data = kmalloc(size, GFP_KERNEL); - if (!data) { - eprintk(KERN_WARNING, "out of memory\n"); - return -ENOMEM; + if (n & POLLIN) { + set_bit(Rpending, &m->wsched); + dprintk(DEBUG_MUX, "mux %p can read\n", m); + if (!test_and_set_bit(Rworksched, &m->wsched)) { + dprintk(DEBUG_MUX, "schedule read work mux %p\n", m); + queue_work(v9fs_mux_wq, &m->rq); + } } - res = xread(v9ses, data, size); - if (res < size) { - dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n", - res); - kfree(data); - return res; + if (n & POLLOUT) { + set_bit(Wpending, &m->wsched); + dprintk(DEBUG_MUX, "mux %p can write\n", m); + if ((m->wsize || !list_empty(&m->unsent_req_list)) + && !test_and_set_bit(Wworksched, &m->wsched)) { + dprintk(DEBUG_MUX, "schedule write work mux %p\n", m); + queue_work(v9fs_mux_wq, &m->wq); + } } +} + +/** + * v9fs_poll_proc - polls all v9fs transports for new events and queues + * the appropriate work to the work queue + */ +static int v9fs_poll_proc(void *a) +{ + struct v9fs_mux_data *m, *mtmp; + struct v9fs_mux_poll_task *vpt; - /* we now have an in-memory string that is the reply. - * deserialize it. There is very little to go wrong at this point - * save for v9fs_alloc errors. - */ - res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata, - rcall, rcalllen); + vpt = a; + dprintk(DEBUG_MUX, "start %p %p\n", current, vpt); + allow_signal(SIGKILL); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) + break; - kfree(data); + list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) { + v9fs_poll_mux(m); + } - if (res < 0) - return res; + dprintk(DEBUG_MUX, "sleeping...\n"); + schedule_timeout(SCHED_TIMEOUT * HZ); + } + __set_current_state(TASK_RUNNING); + dprintk(DEBUG_MUX, "finish\n"); return 0; } /** - * v9fs_recv - receive an RPC response for a particular tag - * @v9ses: session info structure - * @req: RPC request structure - * + * v9fs_write_work - called when a transport can send some data */ - -static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) +static void v9fs_write_work(void *a) { - int ret = 0; + int n, err; + struct v9fs_mux_data *m; + struct v9fs_req *req; - dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag); - ret = wait_event_interruptible(v9ses->read_wait, - ((v9ses->transport->status != Connected) || - (req->rcall != 0) || (req->err < 0) || - dprintcond(v9ses, req))); + m = a; - dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall); + if (m->err < 0) { + clear_bit(Wworksched, &m->wsched); + return; + } - spin_lock(&v9ses->muxlock); - list_del(&req->next); - spin_unlock(&v9ses->muxlock); + if (!m->wsize) { + if (list_empty(&m->unsent_req_list)) { + clear_bit(Wworksched, &m->wsched); + return; + } - if (req->err < 0) - return req->err; + spin_lock(&m->lock); + req = + list_entry(m->unsent_req_list.next, struct v9fs_req, + req_list); + list_move_tail(&req->req_list, &m->req_list); + m->wbuf = req->tcall->sdata; + m->wsize = req->tcall->size; + m->wpos = 0; + dump_data(m->wbuf, m->wsize); + spin_unlock(&m->lock); + } - if (v9ses->transport->status == Disconnected) - return -ECONNRESET; + dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize); + clear_bit(Wpending, &m->wsched); + err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos); + dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err); + if (err == -EAGAIN) { + clear_bit(Wworksched, &m->wsched); + return; + } - return ret; -} + if (err <= 0) + goto error; -/** - * v9fs_send - send a 9P request - * @v9ses: session info structure - * @req: RPC request to send - * - */ + m->wpos += err; + if (m->wpos == m->wsize) + m->wpos = m->wsize = 0; + + if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { + if (test_and_clear_bit(Wpending, &m->wsched)) + n = POLLOUT; + else + n = m->trans->poll(m->trans, NULL); + + if (n & POLLOUT) { + dprintk(DEBUG_MUX, "schedule write work mux %p\n", m); + queue_work(v9fs_mux_wq, &m->wq); + } else + clear_bit(Wworksched, &m->wsched); + } else + clear_bit(Wworksched, &m->wsched); + + return; -static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) + error: + v9fs_mux_cancel(m, err); + clear_bit(Wworksched, &m->wsched); +} + +static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req) { - int ret = -1; - void *data = NULL; - struct v9fs_fcall *tcall = req->tcall; + int ecode, tag; + struct v9fs_str *ename; - data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL); - if (!data) - return -ENOMEM; + tag = req->tag; + if (req->rcall->id == RERROR && !req->err) { + ecode = req->rcall->params.rerror.errno; + ename = &req->rcall->params.rerror.error; - tcall->size = 0; /* enforce size recalculation */ - ret = - v9fs_serialize_fcall(v9ses, tcall, data, - v9ses->maxdata + V9FS_IOHDRSZ); - if (ret < 0) - goto free_data; + dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str); - spin_lock(&v9ses->muxlock); - list_add(&req->next, &v9ses->mux_fcalls); - spin_unlock(&v9ses->muxlock); + if (*m->extended) + req->err = -ecode; - dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag, - tcall->size); - ret = v9ses->transport->write(v9ses->transport, data, tcall->size); + if (!req->err) { + req->err = v9fs_errstr2errno(ename->str, ename->len); - if (ret != tcall->size) { - spin_lock(&v9ses->muxlock); - list_del(&req->next); - kfree(req->rcall); + if (!req->err) { /* string match failed */ + PRINT_FCALL_ERROR("unknown error", req->rcall); + } + + if (!req->err) + req->err = -ESERVERFAULT; + } + } else if (req->tcall && req->rcall->id != req->tcall->id + 1) { + dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n", + req->tcall->id + 1, req->rcall->id); + if (!req->err) + req->err = -EIO; + } - spin_unlock(&v9ses->muxlock); - if (ret >= 0) - ret = -EREMOTEIO; + if (req->cb && req->err != ERREQFLUSH) { + dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n", + req->tcall, req->rcall); + + (*req->cb) (req->cba, req->tcall, req->rcall, req->err); + req->cb = NULL; } else - ret = 0; + kfree(req->rcall); - free_data: - kfree(data); - return ret; + v9fs_mux_put_tag(m, tag); + + wake_up(&m->equeue); + kfree(req); } /** - * v9fs_mux_rpc - send a request, receive a response - * @v9ses: session info structure - * @tcall: fcall to send - * @rcall: buffer to place response into - * + * v9fs_read_work - called when there is some data to be read from a transport */ - -long -v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall, - struct v9fs_fcall **rcall) +static void v9fs_read_work(void *a) { - int tid = -1; - struct v9fs_fcall *fcall = NULL; - struct v9fs_rpcreq req; - int ret = -1; - - if (!v9ses) - return -EINVAL; - - if (!v9ses->transport || v9ses->transport->status != Connected) - return -EIO; + int n, err; + struct v9fs_mux_data *m; + struct v9fs_req *req, *rptr, *rreq; + struct v9fs_fcall *rcall; + char *rbuf; + + m = a; + + if (m->err < 0) + return; + + rcall = NULL; + dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos); + + if (!m->rcall) { + m->rcall = + kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL); + if (!m->rcall) { + err = -ENOMEM; + goto error; + } - if (rcall) - *rcall = NULL; + m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall); + m->rpos = 0; + } - if (tcall->id != TVERSION) { - tid = v9fs_get_idpool(&v9ses->tidpool); - if (tid < 0) - return -ENOMEM; + clear_bit(Rpending, &m->wsched); + err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos); + dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err); + if (err == -EAGAIN) { + clear_bit(Rworksched, &m->wsched); + return; } - tcall->tag = tid; + if (err <= 0) + goto error; - req.tcall = tcall; - req.err = 0; - req.rcall = NULL; + m->rpos += err; + while (m->rpos > 4) { + n = le32_to_cpu(*(__le32 *) m->rbuf); + if (n >= m->msize) { + dprintk(DEBUG_ERROR, + "requested packet size too big: %d\n", n); + err = -EIO; + goto error; + } - ret = v9fs_send(v9ses, &req); + if (m->rpos < n) + break; - if (ret < 0) { - if (tcall->id != TVERSION) - v9fs_put_idpool(tid, &v9ses->tidpool); - dprintk(DEBUG_MUX, "error %d\n", ret); - return ret; - } + dump_data(m->rbuf, n); + err = + v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended); + if (err < 0) { + goto error; + } + + rcall = m->rcall; + rbuf = m->rbuf; + if (m->rpos > n) { + m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize, + GFP_KERNEL); + if (!m->rcall) { + err = -ENOMEM; + goto error; + } - ret = v9fs_recv(v9ses, &req); - - fcall = req.rcall; - - dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret); - if (ret == -ERESTARTSYS) { - if (v9ses->transport->status != Disconnected - && tcall->id != TFLUSH) { - unsigned long flags; - - dprintk(DEBUG_MUX, "flushing the tag: %d\n", - tcall->tag); - clear_thread_flag(TIF_SIGPENDING); - v9fs_t_flush(v9ses, tcall->tag); - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, - flags); - dprintk(DEBUG_MUX, "flushing done\n"); + m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall); + memmove(m->rbuf, rbuf + n, m->rpos - n); + m->rpos -= n; + } else { + m->rcall = NULL; + m->rbuf = NULL; + m->rpos = 0; } - goto release_req; - } else if (ret < 0) - goto release_req; - - if (!fcall) - ret = -EIO; - else { - if (fcall->id == RERROR) { - ret = v9fs_errstr2errno(fcall->params.rerror.error); - if (ret == 0) { /* string match failed */ - if (fcall->params.rerror.errno) - ret = -(fcall->params.rerror.errno); - else - ret = -ESERVERFAULT; + dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id, + rcall->tag); + + req = NULL; + spin_lock(&m->lock); + list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { + if (rreq->tag == rcall->tag) { + req = rreq; + req->rcall = rcall; + list_del(&req->req_list); + spin_unlock(&m->lock); + process_request(m, req); + break; } - } else if (fcall->id != tcall->id + 1) { - dprintk(DEBUG_ERROR, - "fcall mismatch: expected %d, got %d\n", - tcall->id + 1, fcall->id); - ret = -EIO; + + } + + if (!req) { + spin_unlock(&m->lock); + if (err >= 0 && rcall->id != RFLUSH) + dprintk(DEBUG_ERROR, + "unexpected response mux %p id %d tag %d\n", + m, rcall->id, rcall->tag); + kfree(rcall); } } - release_req: - if (tcall->id != TVERSION) - v9fs_put_idpool(tid, &v9ses->tidpool); - if (rcall) - *rcall = fcall; - else - kfree(fcall); + if (!list_empty(&m->req_list)) { + if (test_and_clear_bit(Rpending, &m->wsched)) + n = POLLIN; + else + n = m->trans->poll(m->trans, NULL); + + if (n & POLLIN) { + dprintk(DEBUG_MUX, "schedule read work mux %p\n", m); + queue_work(v9fs_mux_wq, &m->rq); + } else + clear_bit(Rworksched, &m->wsched); + } else + clear_bit(Rworksched, &m->wsched); + + return; - return ret; + error: + v9fs_mux_cancel(m, err); + clear_bit(Rworksched, &m->wsched); } /** - * v9fs_mux_cancel_requests - cancels all pending requests + * v9fs_send_request - send 9P request + * The function can sleep until the request is scheduled for sending. + * The function can be interrupted. Return from the function is not + * a guarantee that the request is sent succesfully. Can return errors + * that can be retrieved by PTR_ERR macros. * - * @v9ses: session info structure - * @err: error code to return to the requests + * @m: mux data + * @tc: request to be sent + * @cb: callback function to call when response is received + * @cba: parameter to pass to the callback function */ -void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err) +static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m, + struct v9fs_fcall *tc, + v9fs_mux_req_callback cb, void *cba) { - struct v9fs_rpcreq *rptr; - struct v9fs_rpcreq *rreq; + int n; + struct v9fs_req *req; - dprintk(DEBUG_MUX, " %d\n", err); - spin_lock(&v9ses->muxlock); - list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { - rreq->err = err; - } - spin_unlock(&v9ses->muxlock); - wake_up_all(&v9ses->read_wait); -} + dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current, + tc, tc->id); + if (m->err < 0) + return ERR_PTR(m->err); -/** - * v9fs_recvproc - kproc to handle demultiplexing responses - * @data: session info structure - * - */ + req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL); + if (!req) + return ERR_PTR(-ENOMEM); -static int v9fs_recvproc(void *data) -{ - struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data; - struct v9fs_fcall *rcall = NULL; - struct v9fs_rpcreq *rptr; - struct v9fs_rpcreq *req; - struct v9fs_rpcreq *rreq; - int err = 0; + if (tc->id == TVERSION) + n = V9FS_NOTAG; + else + n = v9fs_mux_get_tag(m); - allow_signal(SIGKILL); - set_current_state(TASK_INTERRUPTIBLE); - complete(&v9ses->proccmpl); - while (!kthread_should_stop() && err >= 0) { - req = rptr = rreq = NULL; - - rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL); - if (!rcall) { - eprintk(KERN_ERR, "no memory for buffers\n"); - break; - } + if (n < 0) + return ERR_PTR(-ENOMEM); - err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ); - spin_lock(&v9ses->muxlock); - if (err < 0) { - list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { - rreq->err = err; - } - if(err != -ERESTARTSYS) - eprintk(KERN_ERR, - "Transport error while reading message %d\n", err); - } else { - list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { - if (rreq->tcall->tag == rcall->tag) { - req = rreq; - req->rcall = rcall; - break; - } - } - } + v9fs_set_tag(tc, n); - if (req && (req->tcall->id == TFLUSH)) { - struct v9fs_rpcreq *treq = NULL; - list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) { - if (treq->tcall->tag == - req->tcall->params.tflush.oldtag) { - list_del(&rptr->next); - kfree(treq->rcall); - break; - } + req->tag = n; + req->tcall = tc; + req->rcall = NULL; + req->err = 0; + req->cb = cb; + req->cba = cba; + + spin_lock(&m->lock); + list_add_tail(&req->req_list, &m->unsent_req_list); + spin_unlock(&m->lock); + + if (test_and_clear_bit(Wpending, &m->wsched)) + n = POLLOUT; + else + n = m->trans->poll(m->trans, NULL); + + if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) + queue_work(v9fs_mux_wq, &m->wq); + + return req; +} + +static inline void +v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, + int err) +{ + v9fs_mux_req_callback cb; + int tag; + struct v9fs_mux_data *m; + struct v9fs_req *req, *rptr; + + m = a; + dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, tc, + rc, err, tc->params.tflush.oldtag); + + spin_lock(&m->lock); + cb = NULL; + tag = tc->params.tflush.oldtag; + list_for_each_entry_safe(req, rptr, &m->req_list, req_list) { + if (req->tag == tag) { + list_del(&req->req_list); + if (req->cb) { + cb = req->cb; + req->cb = NULL; + spin_unlock(&m->lock); + (*cb) (req->cba, req->tcall, req->rcall, + req->err); } + kfree(req); + wake_up(&m->equeue); + break; } + } - spin_unlock(&v9ses->muxlock); + if (!cb) + spin_unlock(&m->lock); - if (!req) { - if (err >= 0) - dprintk(DEBUG_ERROR, - "unexpected response: id %d tag %d\n", - rcall->id, rcall->tag); + v9fs_mux_put_tag(m, tag); + kfree(tc); + kfree(rc); +} - kfree(rcall); - } +static void +v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req) +{ + struct v9fs_fcall *fc; - wake_up_all(&v9ses->read_wait); - set_current_state(TASK_INTERRUPTIBLE); + dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); + + fc = v9fs_create_tflush(req->tag); + v9fs_send_request(m, fc, v9fs_mux_flush_cb, m); +} + +static void +v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err) +{ + struct v9fs_mux_rpc *r; + + if (err == ERREQFLUSH) { + dprintk(DEBUG_MUX, "err req flush\n"); + return; } - v9ses->transport->close(v9ses->transport); + r = a; + dprintk(DEBUG_MUX, "mux %p req %p tc %p rc %p err %d\n", r->m, r->req, + tc, rc, err); + r->rcall = rc; + r->err = err; + wake_up(&r->wqueue); +} - /* Inform all pending processes about the failure */ - wake_up_all(&v9ses->read_wait); +/** + * v9fs_mux_rpc - sends 9P request and waits until a response is available. + * The function can be interrupted. + * @m: mux data + * @tc: request to be sent + * @rc: pointer where a pointer to the response is stored + */ +int +v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, + struct v9fs_fcall **rc) +{ + int err; + unsigned long flags; + struct v9fs_req *req; + struct v9fs_mux_rpc r; + + r.err = 0; + r.rcall = NULL; + r.m = m; + init_waitqueue_head(&r.wqueue); + + if (rc) + *rc = NULL; + + req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r); + if (IS_ERR(req)) { + err = PTR_ERR(req); + dprintk(DEBUG_MUX, "error %d\n", err); + return PTR_ERR(req); + } - if (signal_pending(current)) - complete(&v9ses->proccmpl); + r.req = req; + dprintk(DEBUG_MUX, "mux %p tc %p tag %d rpc %p req %p\n", m, tc, + req->tag, &r, req); + err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); + if (r.err < 0) + err = r.err; + + if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) { + spin_lock(&m->lock); + req->tcall = NULL; + req->err = ERREQFLUSH; + spin_unlock(&m->lock); + + clear_thread_flag(TIF_SIGPENDING); + v9fs_mux_flush_request(m, req); + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } - dprintk(DEBUG_MUX, "recvproc: end\n"); - v9ses->recvproc = NULL; + if (!err) { + if (r.rcall) + dprintk(DEBUG_MUX, "got response id %d tag %d\n", + r.rcall->id, r.rcall->tag); + + if (rc) + *rc = r.rcall; + else + kfree(r.rcall); + } else { + kfree(r.rcall); + dprintk(DEBUG_MUX, "got error %d\n", err); + if (err > 0) + err = -EIO; + } - return err >= 0; + return err; } /** - * v9fs_mux_init - initialize multiplexer (spawn kproc) - * @v9ses: session info structure - * @dev_name: mount device information (to create unique kproc) - * + * v9fs_mux_rpcnb - sends 9P request without waiting for response. + * @m: mux data + * @tc: request to be sent + * @cb: callback function to be called when response arrives + * @cba: value to pass to the callback function */ +int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc, + v9fs_mux_req_callback cb, void *a) +{ + int err; + struct v9fs_req *req; + + req = v9fs_send_request(m, tc, cb, a); + if (IS_ERR(req)) { + err = PTR_ERR(req); + dprintk(DEBUG_MUX, "error %d\n", err); + return PTR_ERR(req); + } + + dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag); + return 0; +} -int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name) +/** + * v9fs_mux_cancel - cancel all pending requests with error + * @m: mux data + * @err: error code + */ +void v9fs_mux_cancel(struct v9fs_mux_data *m, int err) { - char procname[60]; - - strncpy(procname, dev_name, sizeof(procname)); - procname[sizeof(procname) - 1] = 0; - - init_waitqueue_head(&v9ses->read_wait); - init_completion(&v9ses->fcread); - init_completion(&v9ses->proccmpl); - spin_lock_init(&v9ses->muxlock); - INIT_LIST_HEAD(&v9ses->mux_fcalls); - v9ses->recvproc = NULL; - v9ses->curfcall = NULL; - - v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses, - "v9fs_recvproc %s", procname); - - if (IS_ERR(v9ses->recvproc)) { - eprintk(KERN_ERR, "cannot create receiving thread\n"); - v9fs_session_close(v9ses); - return -ECONNABORTED; + struct v9fs_req *req, *rtmp; + LIST_HEAD(cancel_list); + + dprintk(DEBUG_MUX, "mux %p err %d\n", m, err); + m->err = err; + spin_lock(&m->lock); + list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { + list_move(&req->req_list, &cancel_list); } + spin_unlock(&m->lock); - wake_up_process(v9ses->recvproc); - wait_for_completion(&v9ses->proccmpl); + list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { + list_del(&req->req_list); + if (!req->err) + req->err = err; - return 0; + if (req->cb) + (*req->cb) (req->cba, req->tcall, req->rcall, req->err); + else + kfree(req->rcall); + + kfree(req); + } + + wake_up(&m->equeue); +} + +static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m) +{ + int tag; + + tag = v9fs_get_idpool(&m->tidpool); + if (tag < 0) + return V9FS_NOTAG; + else + return (u16) tag; +} + +static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag) +{ + if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tidpool)) + v9fs_put_idpool(tag, &m->tidpool); } diff --git a/fs/9p/mux.h b/fs/9p/mux.h index 4994cb10badf..9473b84f24b2 100644 --- a/fs/9p/mux.h +++ b/fs/9p/mux.h @@ -3,6 +3,7 @@ * * Multiplexer Definitions * + * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * * This program is free software; you can redistribute it and/or modify @@ -23,19 +24,35 @@ * */ -/* structure to manage each RPC transaction */ +struct v9fs_mux_data; -struct v9fs_rpcreq { - struct v9fs_fcall *tcall; - struct v9fs_fcall *rcall; - int err; /* error code if response failed */ +/** + * v9fs_mux_req_callback - callback function that is called when the + * response of a request is received. The callback is called from + * a workqueue and shouldn't block. + * + * @a - the pointer that was specified when the request was send to be + * passed to the callback + * @tc - request call + * @rc - response call + * @err - error code (non-zero if error occured) + */ +typedef void (*v9fs_mux_req_callback)(void *a, struct v9fs_fcall *tc, + struct v9fs_fcall *rc, int err); + +int v9fs_mux_global_init(void); +void v9fs_mux_global_exit(void); - /* XXX - could we put scatter/gather buffers here? */ +struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize, + unsigned char *extended); +void v9fs_mux_destroy(struct v9fs_mux_data *); - struct list_head next; -}; +int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc); +struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m); +int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc); +int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc, + v9fs_mux_req_callback cb, void *a); -int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name); -long v9fs_mux_rpc(struct v9fs_session_info *v9ses, - struct v9fs_fcall *tcall, struct v9fs_fcall **rcall); -void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err); +void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush); +void v9fs_mux_cancel(struct v9fs_mux_data *m, int err); +int v9fs_errstr2errno(char *errstr, int len); diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c index 63b58ce98ff4..1a28ef97a3d1 100644 --- a/fs/9p/trans_fd.c +++ b/fs/9p/trans_fd.c @@ -3,6 +3,7 @@ * * File Descriptor Transport Layer * + * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> * * This program is free software; you can redistribute it and/or modify @@ -106,9 +107,6 @@ v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data) return -ENOPROTOOPT; } - sema_init(&trans->writelock, 1); - sema_init(&trans->readlock, 1); - ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL); if (!ts) @@ -148,12 +146,12 @@ static void v9fs_fd_close(struct v9fs_transport *trans) if (!trans) return; - trans->status = Disconnected; - ts = trans->priv; + ts = xchg(&trans->priv, NULL); if (!ts) return; + trans->status = Disconnected; if (ts->in_file) fput(ts->in_file); @@ -163,10 +161,55 @@ static void v9fs_fd_close(struct v9fs_transport *trans) kfree(ts); } +static unsigned int +v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt) +{ + int ret, n; + struct v9fs_trans_fd *ts; + mm_segment_t oldfs; + + if (!trans) + return -EIO; + + ts = trans->priv; + if (trans->status != Connected || !ts) + return -EIO; + + oldfs = get_fs(); + set_fs(get_ds()); + + if (!ts->in_file->f_op || !ts->in_file->f_op->poll) { + ret = -EIO; + goto end; + } + + ret = ts->in_file->f_op->poll(ts->in_file, pt); + + if (ts->out_file != ts->in_file) { + if (!ts->out_file->f_op || !ts->out_file->f_op->poll) { + ret = -EIO; + goto end; + } + + n = ts->out_file->f_op->poll(ts->out_file, pt); + + ret &= ~POLLOUT; + n &= ~POLLIN; + + ret |= n; + } + +end: + set_fs(oldfs); + return ret; +} + + struct v9fs_transport v9fs_trans_fd = { .init = v9fs_fd_init, .write = v9fs_fd_send, .read = v9fs_fd_recv, .close = v9fs_fd_close, + .poll = v9fs_fd_poll, }; diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c index 6a9a75d40f73..44e830697acb 100644 --- a/fs/9p/trans_sock.c +++ b/fs/9p/trans_sock.c @@ -3,6 +3,7 @@ * * Socket Transport Layer * + * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de> @@ -36,6 +37,7 @@ #include <asm/uaccess.h> #include <linux/inet.h> #include <linux/idr.h> +#include <linux/file.h> #include "debug.h" #include "v9fs.h" @@ -45,6 +47,7 @@ struct v9fs_trans_sock { struct socket *s; + struct file *filp; }; /** @@ -57,41 +60,26 @@ struct v9fs_trans_sock { static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len) { - struct msghdr msg; - struct kvec iov; - int result; - mm_segment_t oldfs; - struct v9fs_trans_sock *ts = trans ? trans->priv : NULL; + int ret; + struct v9fs_trans_sock *ts; - if (trans->status == Disconnected) + if (!trans || trans->status == Disconnected) { + dprintk(DEBUG_ERROR, "disconnected ...\n"); return -EREMOTEIO; + } - result = -EINVAL; - - oldfs = get_fs(); - set_fs(get_ds()); - - iov.iov_base = v; - iov.iov_len = len; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_iovlen = 1; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_namelen = 0; - msg.msg_flags = MSG_NOSIGNAL; + ts = trans->priv; - result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0); + if (!(ts->filp->f_flags & O_NONBLOCK)) + dprintk(DEBUG_ERROR, "blocking read ...\n"); - dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state); - set_fs(oldfs); - - if (result <= 0) { - if (result != -ERESTARTSYS) + ret = kernel_read(ts->filp, ts->filp->f_pos, v, len); + if (ret <= 0) { + if (ret != -ERESTARTSYS && ret != -EAGAIN) trans->status = Disconnected; } - return result; + return ret; } /** @@ -104,40 +92,72 @@ static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len) static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len) { - struct kvec iov; - struct msghdr msg; - int result = -1; + int ret; mm_segment_t oldfs; - struct v9fs_trans_sock *ts = trans ? trans->priv : NULL; + struct v9fs_trans_sock *ts; - dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len); - dump_data(v, len); + if (!trans || trans->status == Disconnected) { + dprintk(DEBUG_ERROR, "disconnected ...\n"); + return -EREMOTEIO; + } + + ts = trans->priv; + if (!ts) { + dprintk(DEBUG_ERROR, "no transport ...\n"); + return -EREMOTEIO; + } - down(&trans->writelock); + if (!(ts->filp->f_flags & O_NONBLOCK)) + dprintk(DEBUG_ERROR, "blocking write ...\n"); oldfs = get_fs(); set_fs(get_ds()); - iov.iov_base = v; - iov.iov_len = len; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_iovlen = 1; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_namelen = 0; - msg.msg_flags = MSG_NOSIGNAL; - result = kernel_sendmsg(ts->s, &msg, &iov, 1, len); + ret = vfs_write(ts->filp, (void __user *)v, len, &ts->filp->f_pos); set_fs(oldfs); - if (result < 0) { - if (result != -ERESTARTSYS) + if (ret < 0) { + if (ret != -ERESTARTSYS) trans->status = Disconnected; } - up(&trans->writelock); - return result; + return ret; +} + +static unsigned int v9fs_sock_poll(struct v9fs_transport *trans, + struct poll_table_struct *pt) { + + int ret; + struct v9fs_trans_sock *ts; + mm_segment_t oldfs; + + if (!trans) { + dprintk(DEBUG_ERROR, "no transport\n"); + return -EIO; + } + + ts = trans->priv; + if (trans->status != Connected || !ts) { + dprintk(DEBUG_ERROR, "transport disconnected: %d\n", trans->status); + return -EIO; + } + + oldfs = get_fs(); + set_fs(get_ds()); + + if (!ts->filp->f_op || !ts->filp->f_op->poll) { + dprintk(DEBUG_ERROR, "no poll operation\n"); + ret = -EIO; + goto end; + } + + ret = ts->filp->f_op->poll(ts->filp, pt); + +end: + set_fs(oldfs); + return ret; } + /** * v9fs_tcp_init - initialize TCP socket * @v9ses: session information @@ -154,9 +174,9 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data) int rc = 0; struct v9fs_trans_sock *ts = NULL; struct v9fs_transport *trans = v9ses->transport; + int fd; - sema_init(&trans->writelock, 1); - sema_init(&trans->readlock, 1); + trans->status = Disconnected; ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL); @@ -165,6 +185,7 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data) trans->priv = ts; ts->s = NULL; + ts->filp = NULL; if (!addr) return -EINVAL; @@ -185,7 +206,18 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data) return rc; } csocket->sk->sk_allocation = GFP_NOIO; + + fd = sock_map_fd(csocket); + if (fd < 0) { + sock_release(csocket); + kfree(ts); + trans->priv = NULL; + return fd; + } + ts->s = csocket; + ts->filp = fget(fd); + ts->filp->f_flags |= O_NONBLOCK; trans->status = Connected; return 0; @@ -203,7 +235,7 @@ static int v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, char *data) { - int rc; + int rc, fd; struct socket *csocket; struct sockaddr_un sun_server; struct v9fs_transport *trans; @@ -213,6 +245,8 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, csocket = NULL; trans = v9ses->transport; + trans->status = Disconnected; + if (strlen(dev_name) > UNIX_PATH_MAX) { eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n", dev_name); @@ -225,9 +259,7 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, trans->priv = ts; ts->s = NULL; - - sema_init(&trans->writelock, 1); - sema_init(&trans->readlock, 1); + ts->filp = NULL; sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, dev_name); @@ -241,7 +273,18 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, return rc; } csocket->sk->sk_allocation = GFP_NOIO; + + fd = sock_map_fd(csocket); + if (fd < 0) { + sock_release(csocket); + kfree(ts); + trans->priv = NULL; + return fd; + } + ts->s = csocket; + ts->filp = fget(fd); + ts->filp->f_flags |= O_NONBLOCK; trans->status = Connected; return 0; @@ -262,12 +305,11 @@ static void v9fs_sock_close(struct v9fs_transport *trans) ts = trans->priv; - if ((ts) && (ts->s)) { - dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s); - sock_release(ts->s); + if ((ts) && (ts->filp)) { + fput(ts->filp); + ts->filp = NULL; ts->s = NULL; trans->status = Disconnected; - dprintk(DEBUG_TRANS, "socket closed\n"); } kfree(ts); @@ -280,6 +322,7 @@ struct v9fs_transport v9fs_trans_tcp = { .write = v9fs_sock_send, .read = v9fs_sock_recv, .close = v9fs_sock_close, + .poll = v9fs_sock_poll, }; struct v9fs_transport v9fs_trans_unix = { @@ -287,4 +330,5 @@ struct v9fs_transport v9fs_trans_unix = { .write = v9fs_sock_send, .read = v9fs_sock_recv, .close = v9fs_sock_close, + .poll = v9fs_sock_poll, }; diff --git a/fs/9p/transport.h b/fs/9p/transport.h index 9e9cd418efd5..91fcdb94b361 100644 --- a/fs/9p/transport.h +++ b/fs/9p/transport.h @@ -3,6 +3,7 @@ * * Transport Definition * + * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * * This program is free software; you can redistribute it and/or modify @@ -31,14 +32,13 @@ enum v9fs_transport_status { struct v9fs_transport { enum v9fs_transport_status status; - struct semaphore writelock; - struct semaphore readlock; void *priv; int (*init) (struct v9fs_session_info *, const char *, char *); int (*write) (struct v9fs_transport *, void *, int); int (*read) (struct v9fs_transport *, void *, int); void (*close) (struct v9fs_transport *); + unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *); }; extern struct v9fs_transport v9fs_trans_tcp; diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 418c3743fdee..5250c428fc1f 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -37,7 +37,6 @@ #include "v9fs_vfs.h" #include "transport.h" #include "mux.h" -#include "conv.h" /* TODO: sysfs or debugfs interface */ int v9fs_debug_level = 0; /* feature-rific global debug level */ @@ -213,7 +212,8 @@ retry: return -1; } - error = idr_get_new(&p->pool, NULL, &i); + /* no need to store exactly p, we just need something non-null */ + error = idr_get_new(&p->pool, p, &i); up(&p->lock); if (error == -EAGAIN) @@ -243,6 +243,16 @@ void v9fs_put_idpool(int id, struct v9fs_idpool *p) } /** + * v9fs_check_idpool - check if the specified id is available + * @id - id to check + * @p - pool + */ +int v9fs_check_idpool(int id, struct v9fs_idpool *p) +{ + return idr_find(&p->pool, id) != NULL; +} + +/** * v9fs_session_init - initialize session * @v9ses: session information structure * @dev_name: device being mounted @@ -259,6 +269,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses, int n = 0; int newfid = -1; int retval = -EINVAL; + struct v9fs_str *version; v9ses->name = __getname(); if (!v9ses->name) @@ -281,9 +292,6 @@ v9fs_session_init(struct v9fs_session_info *v9ses, /* id pools that are session-dependent: FIDs and TIDs */ idr_init(&v9ses->fidpool.pool); init_MUTEX(&v9ses->fidpool.lock); - idr_init(&v9ses->tidpool.pool); - init_MUTEX(&v9ses->tidpool.lock); - switch (v9ses->proto) { case PROTO_TCP: @@ -320,7 +328,12 @@ v9fs_session_init(struct v9fs_session_info *v9ses, v9ses->shutdown = 0; v9ses->session_hung = 0; - if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) { + v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ, + &v9ses->extended); + + if (IS_ERR(v9ses->mux)) { + retval = PTR_ERR(v9ses->mux); + v9ses->mux = NULL; dprintk(DEBUG_ERROR, "problem initializing mux\n"); goto SessCleanUp; } @@ -339,13 +352,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses, goto FreeFcall; } - /* Really should check for 9P1 and report error */ - if (!strcmp(fcall->params.rversion.version, "9P2000.u")) { + version = &fcall->params.rversion.version; + if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) { dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n"); v9ses->extended = 1; - } else { + } else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) { dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n"); v9ses->extended = 0; + } else { + retval = -EREMOTEIO; + goto FreeFcall; } n = fcall->params.rversion.msize; @@ -381,7 +397,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses, } if (v9ses->afid != ~0) { - if (v9fs_t_clunk(v9ses, v9ses->afid, NULL)) + if (v9fs_t_clunk(v9ses, v9ses->afid)) dprintk(DEBUG_ERROR, "clunk failed\n"); } @@ -403,13 +419,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses, void v9fs_session_close(struct v9fs_session_info *v9ses) { - if (v9ses->recvproc) { - send_sig(SIGKILL, v9ses->recvproc, 1); - wait_for_completion(&v9ses->proccmpl); + if (v9ses->mux) { + v9fs_mux_destroy(v9ses->mux); + v9ses->mux = NULL; } - if (v9ses->transport) + if (v9ses->transport) { v9ses->transport->close(v9ses->transport); + kfree(v9ses->transport); + v9ses->transport = NULL; + } __putname(v9ses->name); __putname(v9ses->remotename); @@ -420,8 +439,9 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) * and cancel all pending requests. */ void v9fs_session_cancel(struct v9fs_session_info *v9ses) { + dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses); v9ses->transport->status = Disconnected; - v9fs_mux_cancel_requests(v9ses, -EIO); + v9fs_mux_cancel(v9ses->mux, -EIO); } extern int v9fs_error_init(void); @@ -433,11 +453,17 @@ extern int v9fs_error_init(void); static int __init init_v9fs(void) { + int ret; + v9fs_error_init(); printk(KERN_INFO "Installing v9fs 9P2000 file system support\n"); - return register_filesystem(&v9fs_fs_type); + ret = v9fs_mux_global_init(); + if (!ret) + ret = register_filesystem(&v9fs_fs_type); + + return ret; } /** @@ -447,6 +473,7 @@ static int __init init_v9fs(void) static void __exit exit_v9fs(void) { + v9fs_mux_global_exit(); unregister_filesystem(&v9fs_fs_type); } diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 45dcef42bdd6..f337da7a0eec 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -57,24 +57,14 @@ struct v9fs_session_info { /* book keeping */ struct v9fs_idpool fidpool; /* The FID pool for file descriptors */ - struct v9fs_idpool tidpool; /* The TID pool for transactions ids */ - /* transport information */ struct v9fs_transport *transport; + struct v9fs_mux_data *mux; int inprogress; /* session in progress => true */ int shutdown; /* session shutting down. no more attaches. */ unsigned char session_hung; - - /* mux private data */ - struct v9fs_fcall *curfcall; - wait_queue_head_t read_wait; - struct completion fcread; - struct completion proccmpl; - struct task_struct *recvproc; - - spinlock_t muxlock; - struct list_head mux_fcalls; + struct dentry *debugfs_dir; }; /* possible values of ->proto */ @@ -84,11 +74,14 @@ enum { PROTO_FD, }; +extern struct dentry *v9fs_debugfs_root; + int v9fs_session_init(struct v9fs_session_info *, const char *, char *); struct v9fs_session_info *v9fs_inode2v9ses(struct inode *); void v9fs_session_close(struct v9fs_session_info *v9ses); int v9fs_get_idpool(struct v9fs_idpool *p); void v9fs_put_idpool(int id, struct v9fs_idpool *p); +int v9fs_check_idpool(int id, struct v9fs_idpool *p); void v9fs_session_cancel(struct v9fs_session_info *v9ses); #define V9FS_MAGIC 0x01021997 diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 2f2cea7ee3e7..c78502ad00ed 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -45,9 +45,8 @@ extern struct dentry_operations v9fs_dentry_operations; struct inode *v9fs_get_inode(struct super_block *sb, int mode); ino_t v9fs_qid2ino(struct v9fs_qid *qid); -void v9fs_mistat2inode(struct v9fs_stat *, struct inode *, - struct super_block *); +void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *); int v9fs_dir_release(struct inode *inode, struct file *filp); int v9fs_file_open(struct inode *inode, struct file *file); -void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat); +void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat); void v9fs_dentry_release(struct dentry *); diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index a6aa947de0f9..2dd806dac9f1 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -40,7 +40,6 @@ #include "v9fs.h" #include "9p.h" #include "v9fs_vfs.h" -#include "conv.h" #include "fid.h" /** @@ -95,24 +94,22 @@ static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd) void v9fs_dentry_release(struct dentry *dentry) { + int err; + dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); if (dentry->d_fsdata != NULL) { struct list_head *fid_list = dentry->d_fsdata; struct v9fs_fid *temp = NULL; struct v9fs_fid *current_fid = NULL; - struct v9fs_fcall *fcall = NULL; list_for_each_entry_safe(current_fid, temp, fid_list, list) { - if (v9fs_t_clunk - (current_fid->v9ses, current_fid->fid, &fcall)) - dprintk(DEBUG_ERROR, "clunk failed: %s\n", - FCALL_ERROR(fcall)); + err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid); - v9fs_put_idpool(current_fid->fid, - ¤t_fid->v9ses->fidpool); + if (err < 0) + dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n", + err, dentry->d_iname); - kfree(fcall); v9fs_fid_destroy(current_fid); } diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 57a43b8feef5..ae6d032b9b59 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -37,8 +37,8 @@ #include "debug.h" #include "v9fs.h" #include "9p.h" -#include "v9fs_vfs.h" #include "conv.h" +#include "v9fs_vfs.h" #include "fid.h" /** @@ -74,20 +74,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) struct inode *inode = filp->f_dentry->d_inode; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); struct v9fs_fid *file = filp->private_data; - unsigned int i, n; + unsigned int i, n, s; int fid = -1; int ret = 0; - struct v9fs_stat *mi = NULL; + struct v9fs_stat stat; int over = 0; dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name); fid = file->fid; - mi = kmalloc(v9ses->maxdata, GFP_KERNEL); - if (!mi) - return -ENOMEM; - if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) { kfree(file->rdir_fcall); file->rdir_fcall = NULL; @@ -97,20 +93,20 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) n = file->rdir_fcall->params.rread.count; i = file->rdir_fpos; while (i < n) { - int s = v9fs_deserialize_stat(v9ses, - file->rdir_fcall->params.rread.data + i, - n - i, mi, v9ses->maxdata); + s = v9fs_deserialize_stat( + file->rdir_fcall->params.rread.data + i, + n - i, &stat, v9ses->extended); if (s == 0) { dprintk(DEBUG_ERROR, - "error while deserializing mistat\n"); + "error while deserializing stat\n"); ret = -EIO; goto FreeStructs; } - over = filldir(dirent, mi->name, strlen(mi->name), - filp->f_pos, v9fs_qid2ino(&mi->qid), - dt_type(mi)); + over = filldir(dirent, stat.name.str, stat.name.len, + filp->f_pos, v9fs_qid2ino(&stat.qid), + dt_type(&stat)); if (over) { file->rdir_fpos = i; @@ -130,7 +126,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) while (!over) { ret = v9fs_t_read(v9ses, fid, filp->f_pos, - v9ses->maxdata-V9FS_IOHDRSZ, &fcall); + v9ses->maxdata-V9FS_IOHDRSZ, &fcall); if (ret < 0) { dprintk(DEBUG_ERROR, "error while reading: %d: %p\n", ret, fcall); @@ -141,19 +137,18 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) n = ret; i = 0; while (i < n) { - int s = v9fs_deserialize_stat(v9ses, - fcall->params.rread.data + i, n - i, mi, - v9ses->maxdata); + s = v9fs_deserialize_stat(fcall->params.rread.data + i, + n - i, &stat, v9ses->extended); if (s == 0) { dprintk(DEBUG_ERROR, - "error while deserializing mistat\n"); + "error while deserializing stat\n"); return -EIO; } - over = filldir(dirent, mi->name, strlen(mi->name), - filp->f_pos, v9fs_qid2ino(&mi->qid), - dt_type(mi)); + over = filldir(dirent, stat.name.str, stat.name.len, + filp->f_pos, v9fs_qid2ino(&stat.qid), + dt_type(&stat)); if (over) { file->rdir_fcall = fcall; @@ -172,7 +167,6 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) FreeStructs: kfree(fcall); - kfree(mi); return ret; } @@ -193,18 +187,15 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) fid->fid); fidnum = fid->fid; - filemap_fdatawrite(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); if (fidnum >= 0) { dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen, fid->fid); - if (v9fs_t_clunk(v9ses, fidnum, NULL)) + if (v9fs_t_clunk(v9ses, fidnum)) dprintk(DEBUG_ERROR, "clunk failed\n"); - v9fs_put_idpool(fid->fid, &v9ses->fidpool); - kfree(fid->rdir_fcall); kfree(fid); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 89c849da8504..6852f0eb96ed 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -32,6 +32,7 @@ #include <linux/string.h> #include <linux/smp_lock.h> #include <linux/inet.h> +#include <linux/version.h> #include <linux/list.h> #include <asm/uaccess.h> #include <linux/idr.h> @@ -117,9 +118,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) result = v9fs_t_open(v9ses, newfid, open_mode, &fcall); if (result < 0) { - dprintk(DEBUG_ERROR, - "open failed, open_mode 0x%x: %s\n", open_mode, - FCALL_ERROR(fcall)); + PRINT_FCALL_ERROR("open failed", fcall); kfree(fcall); return result; } @@ -165,8 +164,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) return -ENOLCK; if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { - filemap_fdatawrite(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); invalidate_inode_pages(&inode->i_data); } @@ -257,7 +255,6 @@ v9fs_file_write(struct file *filp, const char __user * data, int result = -EIO; int rsize = 0; int total = 0; - char *buf; dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count, (int)*offset); @@ -265,28 +262,14 @@ v9fs_file_write(struct file *filp, const char __user * data, if (v9fid->iounit != 0 && rsize > v9fid->iounit) rsize = v9fid->iounit; - buf = kmalloc(v9ses->maxdata - V9FS_IOHDRSZ, GFP_KERNEL); - if (!buf) - return -ENOMEM; - do { if (count < rsize) rsize = count; - result = copy_from_user(buf, data, rsize); - if (result) { - dprintk(DEBUG_ERROR, "Problem copying from user\n"); - kfree(buf); - return -EFAULT; - } - - dump_data(buf, rsize); - result = v9fs_t_write(v9ses, fid, *offset, rsize, buf, &fcall); + result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall); if (result < 0) { - eprintk(KERN_ERR, "error while writing: %s(%d)\n", - FCALL_ERROR(fcall), result); + PRINT_FCALL_ERROR("error while writing", fcall); kfree(fcall); - kfree(buf); return result; } else *offset += result; @@ -306,7 +289,6 @@ v9fs_file_write(struct file *filp, const char __user * data, total += result; } while (count); - kfree(buf); return total; } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 0ea965c3bb7d..a17b28854288 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -40,7 +40,6 @@ #include "v9fs.h" #include "9p.h" #include "v9fs_vfs.h" -#include "conv.h" #include "fid.h" static struct inode_operations v9fs_dir_inode_operations; @@ -127,100 +126,32 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) } /** - * v9fs_blank_mistat - helper function to setup a 9P stat structure + * v9fs_blank_wstat - helper function to setup a 9P stat structure * @v9ses: 9P session info (for determining extended mode) - * @mistat: structure to initialize + * @wstat: structure to initialize * */ static void -v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat) +v9fs_blank_wstat(struct v9fs_wstat *wstat) { - mistat->type = ~0; - mistat->dev = ~0; - mistat->qid.type = ~0; - mistat->qid.version = ~0; - *((long long *)&mistat->qid.path) = ~0; - mistat->mode = ~0; - mistat->atime = ~0; - mistat->mtime = ~0; - mistat->length = ~0; - mistat->name = mistat->data; - mistat->uid = mistat->data; - mistat->gid = mistat->data; - mistat->muid = mistat->data; - if (v9ses->extended) { - mistat->n_uid = ~0; - mistat->n_gid = ~0; - mistat->n_muid = ~0; - mistat->extension = mistat->data; - } - *mistat->data = 0; -} - -/** - * v9fs_mistat2unix - convert mistat to unix stat - * @mistat: Plan 9 metadata (mistat) structure - * @buf: unix metadata (stat) structure to populate - * @sb: superblock - * - */ - -static void -v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf, - struct super_block *sb) -{ - struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL; - - buf->st_nlink = 1; - - buf->st_atime = mistat->atime; - buf->st_mtime = mistat->mtime; - buf->st_ctime = mistat->mtime; - - buf->st_uid = (unsigned short)-1; - buf->st_gid = (unsigned short)-1; - - if (v9ses && v9ses->extended) { - /* TODO: string to uid mapping via user-space daemon */ - if (mistat->n_uid != -1) - sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid); - - if (mistat->n_gid != -1) - sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid); - } - - if (buf->st_uid == (unsigned short)-1) - buf->st_uid = v9ses->uid; - if (buf->st_gid == (unsigned short)-1) - buf->st_gid = v9ses->gid; - - buf->st_mode = p9mode2unixmode(v9ses, mistat->mode); - if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) { - char type = 0; - int major = -1; - int minor = -1; - sscanf(mistat->extension, "%c %u %u", &type, &major, &minor); - switch (type) { - case 'c': - buf->st_mode &= ~S_IFBLK; - buf->st_mode |= S_IFCHR; - break; - case 'b': - break; - default: - dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n", - type, mistat->extension); - }; - buf->st_rdev = MKDEV(major, minor); - } else - buf->st_rdev = 0; - - buf->st_size = mistat->length; - - buf->st_blksize = sb->s_blocksize; - buf->st_blocks = - (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits; + wstat->type = ~0; + wstat->dev = ~0; + wstat->qid.type = ~0; + wstat->qid.version = ~0; + *((long long *)&wstat->qid.path) = ~0; + wstat->mode = ~0; + wstat->atime = ~0; + wstat->mtime = ~0; + wstat->length = ~0; + wstat->name = NULL; + wstat->uid = NULL; + wstat->gid = NULL; + wstat->muid = NULL; + wstat->n_uid = ~0; + wstat->n_gid = ~0; + wstat->n_muid = ~0; + wstat->extension = NULL; } /** @@ -312,12 +243,12 @@ v9fs_create(struct inode *dir, struct inode *file_inode = NULL; struct v9fs_fcall *fcall = NULL; struct v9fs_qid qid; - struct stat newstat; int dirfidnum = -1; long newfid = -1; int result = 0; unsigned int iounit = 0; int wfidno = -1; + int err; perm = unixmode2p9mode(v9ses, perm); @@ -349,57 +280,64 @@ v9fs_create(struct inode *dir, result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall); if (result < 0) { - dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); + PRINT_FCALL_ERROR("clone error", fcall); v9fs_put_idpool(newfid, &v9ses->fidpool); newfid = -1; goto CleanUpFid; } kfree(fcall); + fcall = NULL; result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name, perm, open_mode, &fcall); if (result < 0) { - dprintk(DEBUG_ERROR, "create fails: %s(%d)\n", - FCALL_ERROR(fcall), result); - + PRINT_FCALL_ERROR("create fails", fcall); goto CleanUpFid; } iounit = fcall->params.rcreate.iounit; qid = fcall->params.rcreate.qid; kfree(fcall); + fcall = NULL; - fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1); - dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate); - if (!fid) { - result = -ENOMEM; - goto CleanUpFid; - } + if (!(perm&V9FS_DMDIR)) { + fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1); + dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate); + if (!fid) { + result = -ENOMEM; + goto CleanUpFid; + } - fid->qid = qid; - fid->iounit = iounit; + fid->qid = qid; + fid->iounit = iounit; + } else { + err = v9fs_t_clunk(v9ses, newfid); + newfid = -1; + if (err < 0) + dprintk(DEBUG_ERROR, "clunk for mkdir failed: %d\n", err); + } /* walk to the newly created file and put the fid in the dentry */ wfidno = v9fs_get_idpool(&v9ses->fidpool); - if (newfid < 0) { + if (wfidno < 0) { eprintk(KERN_WARNING, "no free fids available\n"); return -ENOSPC; } result = v9fs_t_walk(v9ses, dirfidnum, wfidno, - (char *) file_dentry->d_name.name, NULL); + (char *) file_dentry->d_name.name, &fcall); if (result < 0) { - dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); + PRINT_FCALL_ERROR("clone error", fcall); v9fs_put_idpool(wfidno, &v9ses->fidpool); wfidno = -1; goto CleanUpFid; } + kfree(fcall); + fcall = NULL; if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) { - if (!v9fs_t_clunk(v9ses, newfid, &fcall)) { - v9fs_put_idpool(wfidno, &v9ses->fidpool); - } + v9fs_put_idpool(wfidno, &v9ses->fidpool); goto CleanUpFid; } @@ -409,62 +347,43 @@ v9fs_create(struct inode *dir, (perm & V9FS_DMDEVICE)) return 0; - result = v9fs_t_stat(v9ses, newfid, &fcall); + result = v9fs_t_stat(v9ses, wfidno, &fcall); if (result < 0) { - dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall), - result); + PRINT_FCALL_ERROR("stat error", fcall); goto CleanUpFid; } - v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb); - file_inode = v9fs_get_inode(sb, newstat.st_mode); + file_inode = v9fs_get_inode(sb, + p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode)); + if ((!file_inode) || IS_ERR(file_inode)) { dprintk(DEBUG_ERROR, "create inode failed\n"); result = -EBADF; goto CleanUpFid; } - v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb); + v9fs_stat2inode(&fcall->params.rstat.stat, file_inode, sb); kfree(fcall); fcall = NULL; file_dentry->d_op = &v9fs_dentry_operations; d_instantiate(file_dentry, file_inode); - if (perm & V9FS_DMDIR) { - if (!v9fs_t_clunk(v9ses, newfid, &fcall)) - v9fs_put_idpool(newfid, &v9ses->fidpool); - else - dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n", - FCALL_ERROR(fcall)); - kfree(fcall); - fid->fidopen = 0; - fid->fidcreate = 0; - d_drop(file_dentry); - } - return 0; CleanUpFid: kfree(fcall); + fcall = NULL; if (newfid >= 0) { - if (!v9fs_t_clunk(v9ses, newfid, &fcall)) - v9fs_put_idpool(newfid, &v9ses->fidpool); - else - dprintk(DEBUG_ERROR, "clunk failed: %s\n", - FCALL_ERROR(fcall)); - - kfree(fcall); + err = v9fs_t_clunk(v9ses, newfid); + if (err < 0) + dprintk(DEBUG_ERROR, "clunk failed: %d\n", err); } if (wfidno >= 0) { - if (!v9fs_t_clunk(v9ses, wfidno, &fcall)) - v9fs_put_idpool(wfidno, &v9ses->fidpool); - else - dprintk(DEBUG_ERROR, "clunk failed: %s\n", - FCALL_ERROR(fcall)); - - kfree(fcall); + err = v9fs_t_clunk(v9ses, wfidno); + if (err < 0) + dprintk(DEBUG_ERROR, "clunk failed: %d\n", err); } return result; } @@ -509,10 +428,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) } result = v9fs_t_remove(v9ses, fid, &fcall); - if (result < 0) - dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n", - FCALL_ERROR(fcall), result); - else { + if (result < 0) { + PRINT_FCALL_ERROR("remove fails", fcall); + } else { v9fs_put_idpool(fid, &v9ses->fidpool); v9fs_fid_destroy(v9fid); } @@ -567,7 +485,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, struct v9fs_fid *fid; struct inode *inode; struct v9fs_fcall *fcall = NULL; - struct stat newstat; int dirfidnum = -1; int newfid = -1; int result = 0; @@ -620,8 +537,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, goto FreeFcall; } - v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb); - inode = v9fs_get_inode(sb, newstat.st_mode); + inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses, + fcall->params.rstat.stat.mode)); if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) { eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n", @@ -631,7 +548,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, goto FreeFcall; } - inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid); + inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid); fid = v9fs_fid_create(dentry, v9ses, newfid, 0); if (fid == NULL) { @@ -640,10 +557,10 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, goto FreeFcall; } - fid->qid = fcall->params.rstat.stat->qid; + fid->qid = fcall->params.rstat.stat.qid; dentry->d_op = &v9fs_dentry_operations; - v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb); + v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb); d_add(dentry, inode); kfree(fcall); @@ -699,7 +616,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, v9fs_fid_lookup(old_dentry->d_parent); struct v9fs_fid *newdirfid = v9fs_fid_lookup(new_dentry->d_parent); - struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); + struct v9fs_wstat wstat; struct v9fs_fcall *fcall = NULL; int fid = -1; int olddirfidnum = -1; @@ -708,9 +625,6 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, dprintk(DEBUG_VFS, "\n"); - if (!mistat) - return -ENOMEM; - if ((!oldfid) || (!olddirfid) || (!newdirfid)) { dprintk(DEBUG_ERROR, "problem with arguments\n"); return -EBADF; @@ -734,33 +648,22 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto FreeFcallnBail; } - v9fs_blank_mistat(v9ses, mistat); + v9fs_blank_wstat(&wstat); + wstat.muid = v9ses->name; + wstat.name = (char *) new_dentry->d_name.name; - strcpy(mistat->data + 1, v9ses->name); - mistat->name = mistat->data + 1 + strlen(v9ses->name); - - if (new_dentry->d_name.len > - (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) { - dprintk(DEBUG_ERROR, "new name too long\n"); - goto FreeFcallnBail; - } - - strcpy(mistat->name, new_dentry->d_name.name); - retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall); + retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall); FreeFcallnBail: - kfree(mistat); - if (retval < 0) - dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", - FCALL_ERROR(fcall)); + PRINT_FCALL_ERROR("wstat error", fcall); kfree(fcall); return retval; } /** - * v9fs_vfs_getattr - retreive file metadata + * v9fs_vfs_getattr - retrieve file metadata * @mnt - mount information * @dentry - file to get attributes on * @stat - metadata structure to populate @@ -788,7 +691,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, if (err < 0) dprintk(DEBUG_ERROR, "stat error\n"); else { - v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode, + v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode, dentry->d_inode->i_sb); generic_fillattr(dentry->d_inode, stat); } @@ -809,57 +712,44 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); struct v9fs_fid *fid = v9fs_fid_lookup(dentry); struct v9fs_fcall *fcall = NULL; - struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); + struct v9fs_wstat wstat; int res = -EPERM; dprintk(DEBUG_VFS, "\n"); - if (!mistat) - return -ENOMEM; - if (!fid) { dprintk(DEBUG_ERROR, "Couldn't find fid associated with dentry\n"); return -EBADF; } - v9fs_blank_mistat(v9ses, mistat); + v9fs_blank_wstat(&wstat); if (iattr->ia_valid & ATTR_MODE) - mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode); + wstat.mode = unixmode2p9mode(v9ses, iattr->ia_mode); if (iattr->ia_valid & ATTR_MTIME) - mistat->mtime = iattr->ia_mtime.tv_sec; + wstat.mtime = iattr->ia_mtime.tv_sec; if (iattr->ia_valid & ATTR_ATIME) - mistat->atime = iattr->ia_atime.tv_sec; + wstat.atime = iattr->ia_atime.tv_sec; if (iattr->ia_valid & ATTR_SIZE) - mistat->length = iattr->ia_size; + wstat.length = iattr->ia_size; if (v9ses->extended) { - char *ptr = mistat->data+1; - - if (iattr->ia_valid & ATTR_UID) { - mistat->uid = ptr; - ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid); - mistat->n_uid = iattr->ia_uid; - } + if (iattr->ia_valid & ATTR_UID) + wstat.n_uid = iattr->ia_uid; - if (iattr->ia_valid & ATTR_GID) { - mistat->gid = ptr; - ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid); - mistat->n_gid = iattr->ia_gid; - } + if (iattr->ia_valid & ATTR_GID) + wstat.n_gid = iattr->ia_gid; } - res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall); + res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall); if (res < 0) - dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall)); + PRINT_FCALL_ERROR("wstat error", fcall); - kfree(mistat); kfree(fcall); - if (res >= 0) res = inode_setattr(dentry->d_inode, iattr); @@ -867,51 +757,47 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) } /** - * v9fs_mistat2inode - populate an inode structure with mistat info - * @mistat: Plan 9 metadata (mistat) structure + * v9fs_stat2inode - populate an inode structure with mistat info + * @stat: Plan 9 metadata (mistat) structure * @inode: inode to populate * @sb: superblock of filesystem * */ void -v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode, - struct super_block *sb) +v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, + struct super_block *sb) { + int n; + char ext[32]; struct v9fs_session_info *v9ses = sb->s_fs_info; inode->i_nlink = 1; - inode->i_atime.tv_sec = mistat->atime; - inode->i_mtime.tv_sec = mistat->mtime; - inode->i_ctime.tv_sec = mistat->mtime; + inode->i_atime.tv_sec = stat->atime; + inode->i_mtime.tv_sec = stat->mtime; + inode->i_ctime.tv_sec = stat->mtime; - inode->i_uid = -1; - inode->i_gid = -1; + inode->i_uid = v9ses->uid; + inode->i_gid = v9ses->gid; if (v9ses->extended) { - /* TODO: string to uid mapping via user-space daemon */ - inode->i_uid = mistat->n_uid; - inode->i_gid = mistat->n_gid; - - if (mistat->n_uid == -1) - sscanf(mistat->uid, "%x", &inode->i_uid); - - if (mistat->n_gid == -1) - sscanf(mistat->gid, "%x", &inode->i_gid); + inode->i_uid = stat->n_uid; + inode->i_gid = stat->n_gid; } - if (inode->i_uid == -1) - inode->i_uid = v9ses->uid; - if (inode->i_gid == -1) - inode->i_gid = v9ses->gid; - - inode->i_mode = p9mode2unixmode(v9ses, mistat->mode); + inode->i_mode = p9mode2unixmode(v9ses, stat->mode); if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { char type = 0; int major = -1; int minor = -1; - sscanf(mistat->extension, "%c %u %u", &type, &major, &minor); + + n = stat->extension.len; + if (n > sizeof(ext)-1) + n = sizeof(ext)-1; + memmove(ext, stat->extension.str, n); + ext[n] = 0; + sscanf(ext, "%c %u %u", &type, &major, &minor); switch (type) { case 'c': inode->i_mode &= ~S_IFBLK; @@ -920,14 +806,14 @@ v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode, case 'b': break; default: - dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n", - type, mistat->extension); + dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n", + type, stat->extension.len, stat->extension.str); }; inode->i_rdev = MKDEV(major, minor); } else inode->i_rdev = 0; - inode->i_size = mistat->length; + inode->i_size = stat->length; inode->i_blksize = sb->s_blocksize; inode->i_blocks = @@ -955,71 +841,6 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid) } /** - * v9fs_vfs_symlink - helper function to create symlinks - * @dir: directory inode containing symlink - * @dentry: dentry for symlink - * @symname: symlink data - * - * See 9P2000.u RFC for more information - * - */ - -static int -v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) -{ - int retval = -EPERM; - struct v9fs_fid *newfid; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); - struct v9fs_fcall *fcall = NULL; - struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); - - dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, - symname); - - if (!mistat) - return -ENOMEM; - - if (!v9ses->extended) { - dprintk(DEBUG_ERROR, "not extended\n"); - goto FreeFcall; - } - - /* issue a create */ - retval = v9fs_create(dir, dentry, S_IFLNK, 0); - if (retval != 0) - goto FreeFcall; - - newfid = v9fs_fid_lookup(dentry); - - /* issue a twstat */ - v9fs_blank_mistat(v9ses, mistat); - strcpy(mistat->data + 1, symname); - mistat->extension = mistat->data + 1; - retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); - if (retval < 0) { - dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", - FCALL_ERROR(fcall)); - goto FreeFcall; - } - - kfree(fcall); - - if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { - dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", - FCALL_ERROR(fcall)); - goto FreeFcall; - } - - d_drop(dentry); /* FID - will this also clunk? */ - - FreeFcall: - kfree(mistat); - kfree(fcall); - - return retval; -} - -/** * v9fs_readlink - read a symlink's location (internal version) * @dentry: dentry for symlink * @buffer: buffer to load symlink location into @@ -1058,16 +879,17 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) if (!fcall) return -EIO; - if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) { + if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) { retval = -EINVAL; goto FreeFcall; } /* copy extension buffer into buffer */ - if (strlen(fcall->params.rstat.stat->extension) < buflen) - buflen = strlen(fcall->params.rstat.stat->extension); + if (fcall->params.rstat.stat.extension.len < buflen) + buflen = fcall->params.rstat.stat.extension.len; - memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1); + memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1); + buffer[buflen-1] = 0; retval = buflen; @@ -1157,6 +979,77 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void __putname(s); } +static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, + int mode, const char *extension) +{ + int err, retval; + struct v9fs_session_info *v9ses; + struct v9fs_fcall *fcall; + struct v9fs_fid *fid; + struct v9fs_wstat wstat; + + v9ses = v9fs_inode2v9ses(dir); + retval = -EPERM; + fcall = NULL; + + if (!v9ses->extended) { + dprintk(DEBUG_ERROR, "not extended\n"); + goto free_mem; + } + + /* issue a create */ + retval = v9fs_create(dir, dentry, mode, 0); + if (retval != 0) + goto free_mem; + + fid = v9fs_fid_get_created(dentry); + if (!fid) { + dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n"); + goto free_mem; + } + + /* issue a Twstat */ + v9fs_blank_wstat(&wstat); + wstat.muid = v9ses->name; + wstat.extension = (char *) extension; + retval = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall); + if (retval < 0) { + PRINT_FCALL_ERROR("wstat error", fcall); + goto free_mem; + } + + err = v9fs_t_clunk(v9ses, fid->fid); + if (err < 0) { + dprintk(DEBUG_ERROR, "clunk failed: %d\n", err); + goto free_mem; + } + + d_drop(dentry); /* FID - will this also clunk? */ + +free_mem: + kfree(fcall); + return retval; +} + +/** + * v9fs_vfs_symlink - helper function to create symlinks + * @dir: directory inode containing symlink + * @dentry: dentry for symlink + * @symname: symlink data + * + * See 9P2000.u RFC for more information + * + */ + +static int +v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +{ + dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, + symname); + + return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname); +} + /** * v9fs_vfs_link - create a hardlink * @old_dentry: dentry for file to link to @@ -1173,64 +1066,24 @@ static int v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { - int retval = -EPERM; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); - struct v9fs_fcall *fcall = NULL; - struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); - struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry); - struct v9fs_fid *newfid = NULL; - char *symname = __getname(); + int retval; + struct v9fs_fid *oldfid; + char *name; dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, old_dentry->d_name.name); - if (!v9ses->extended) { - dprintk(DEBUG_ERROR, "not extended\n"); - goto FreeMem; - } - - /* get fid of old_dentry */ - sprintf(symname, "hardlink(%d)\n", oldfid->fid); - - /* issue a create */ - retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0); - if (retval != 0) - goto FreeMem; - - newfid = v9fs_fid_lookup(dentry); - if (!newfid) { - dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n"); - goto FreeMem; - } - - /* issue a twstat */ - v9fs_blank_mistat(v9ses, mistat); - strcpy(mistat->data + 1, symname); - mistat->extension = mistat->data + 1; - retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); - if (retval < 0) { - dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", - FCALL_ERROR(fcall)); - goto FreeMem; - } - - kfree(fcall); - - if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { - dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", - FCALL_ERROR(fcall)); - goto FreeMem; + oldfid = v9fs_fid_lookup(old_dentry); + if (!oldfid) { + dprintk(DEBUG_ERROR, "can't find oldfid\n"); + return -EPERM; } - d_drop(dentry); /* FID - will this also clunk? */ - - kfree(fcall); - fcall = NULL; + name = __getname(); + sprintf(name, "hardlink(%d)\n", oldfid->fid); + retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name); + __putname(name); - FreeMem: - kfree(mistat); - kfree(fcall); - __putname(symname); return retval; } @@ -1246,82 +1099,30 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, static int v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { - int retval = -EPERM; - struct v9fs_fid *newfid; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); - struct v9fs_fcall *fcall = NULL; - struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); - char *symname = __getname(); + int retval; + char *name; dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); - if (!mistat) - return -ENOMEM; - - if (!new_valid_dev(rdev)) { - retval = -EINVAL; - goto FreeMem; - } - - if (!v9ses->extended) { - dprintk(DEBUG_ERROR, "not extended\n"); - goto FreeMem; - } - - /* issue a create */ - retval = v9fs_create(dir, dentry, mode, 0); - - if (retval != 0) - goto FreeMem; - - newfid = v9fs_fid_lookup(dentry); - if (!newfid) { - dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n"); - retval = -EINVAL; - goto FreeMem; - } + if (!new_valid_dev(rdev)) + return -EINVAL; + name = __getname(); /* build extension */ if (S_ISBLK(mode)) - sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev)); + sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev)); else if (S_ISCHR(mode)) - sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev)); + sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev)); else if (S_ISFIFO(mode)) - ; /* DO NOTHING */ + *name = 0; else { - retval = -EINVAL; - goto FreeMem; - } - - if (!S_ISFIFO(mode)) { - /* issue a twstat */ - v9fs_blank_mistat(v9ses, mistat); - strcpy(mistat->data + 1, symname); - mistat->extension = mistat->data + 1; - retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); - if (retval < 0) { - dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", - FCALL_ERROR(fcall)); - goto FreeMem; - } + __putname(name); + return -EINVAL; } - /* need to update dcache so we show up */ - kfree(fcall); - - if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { - dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", - FCALL_ERROR(fcall)); - goto FreeMem; - } - - d_drop(dentry); /* FID - will this also clunk? */ - - FreeMem: - kfree(mistat); - kfree(fcall); - __putname(symname); + retval = v9fs_vfs_mkspecial(dir, dentry, mode, name); + __putname(name); return retval; } diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 82c5b0084079..2c4fa75be025 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -44,7 +44,6 @@ #include "v9fs.h" #include "9p.h" #include "v9fs_vfs.h" -#include "conv.h" #include "fid.h" static void v9fs_clear_inode(struct inode *); @@ -92,7 +91,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, sb->s_op = &v9fs_super_ops; sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | - MS_NODIRATIME | MS_NOATIME; + MS_NOATIME; } /** @@ -123,12 +122,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type dprintk(DEBUG_VFS, " \n"); - v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL); + v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); if (!v9ses) return ERR_PTR(-ENOMEM); if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { dprintk(DEBUG_ERROR, "problem initiating session\n"); + kfree(v9ses); return ERR_PTR(newfid); } @@ -157,7 +157,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type stat_result = v9fs_t_stat(v9ses, newfid, &fcall); if (stat_result < 0) { dprintk(DEBUG_ERROR, "stat error\n"); - v9fs_t_clunk(v9ses, newfid, NULL); + v9fs_t_clunk(v9ses, newfid); v9fs_put_idpool(newfid, &v9ses->fidpool); } else { /* Setup the Root Inode */ @@ -167,10 +167,10 @@ static struct super_block *v9fs_get_sb(struct file_system_type goto put_back_sb; } - root_fid->qid = fcall->params.rstat.stat->qid; + root_fid->qid = fcall->params.rstat.stat.qid; root->d_inode->i_ino = - v9fs_qid2ino(&fcall->params.rstat.stat->qid); - v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb); + v9fs_qid2ino(&fcall->params.rstat.stat.qid); + v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb); } kfree(fcall); diff --git a/fs/Kconfig b/fs/Kconfig index 382e3b2883d5..ef78e3a42d32 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -798,7 +798,7 @@ config PROC_KCORE config PROC_VMCORE bool "/proc/vmcore support (EXPERIMENTAL)" - depends on PROC_FS && EMBEDDED && EXPERIMENTAL && CRASH_DUMP + depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP help Exports the dump image of crashed kernel in ELF format. diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 175b2e8177c1..f3d3d81eb7e9 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -1,6 +1,6 @@ config BINFMT_ELF bool "Kernel support for ELF binaries" - depends on MMU + depends on MMU && (BROKEN || !FRV) default y ---help--- ELF (Executable and Linkable Format) is a format for libraries and diff --git a/fs/Makefile b/fs/Makefile index 73676111ebbe..1db711319c80 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -10,11 +10,11 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ - ioprio.o pnode.o + ioprio.o pnode.o drop_caches.o obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_EPOLL) += eventpoll.o -obj-$(CONFIG_COMPAT) += compat.o +obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o nfsd-$(CONFIG_NFSD) := nfsctl.o obj-y += $(nfsd-y) $(nfsd-m) diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 9ebe881c6786..44d439cb69f4 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -244,10 +244,10 @@ affs_put_inode(struct inode *inode) pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); affs_free_prealloc(inode); if (atomic_read(&inode->i_count) == 1) { - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (inode->i_size != AFFS_I(inode)->mmu_private) affs_truncate(inode); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 6682d6d7f294..5c61c24dab2a 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -137,7 +137,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page) #endif /* determine how many magic numbers there should be in this page */ - latter = dir->i_size - (page->index << PAGE_CACHE_SHIFT); + latter = dir->i_size - page_offset(page); if (latter >= PAGE_SIZE) qty = PAGE_SIZE; else diff --git a/fs/afs/volume.h b/fs/afs/volume.h index 1e691889c4c9..bfdcf19ba3f3 100644 --- a/fs/afs/volume.h +++ b/fs/afs/volume.h @@ -18,8 +18,6 @@ #include "kafsasyncd.h" #include "cache.h" -#define __packed __attribute__((packed)) - typedef enum { AFS_VLUPD_SLEEP, /* sleeping waiting for update timer to fire */ AFS_VLUPD_PENDING, /* on pending queue */ @@ -115,7 +113,7 @@ struct afs_volume struct cachefs_cookie *cache; /* caching cookie */ #endif afs_volid_t vid; /* volume ID */ - afs_voltype_t __packed type; /* type of volume */ + afs_voltype_t type; /* type of volume */ char type_force; /* force volume type (suppress R/O -> R/W) */ unsigned short nservers; /* number of server slots filled */ unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ @@ -29,7 +29,6 @@ #include <linux/highmem.h> #include <linux/workqueue.h> #include <linux/security.h> -#include <linux/rcuref.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> @@ -514,7 +513,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) /* Must be done under the lock to serialise against cancellation. * Call this aio_fput as it duplicates fput via the fput_work. */ - if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) { + if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); diff --git a/fs/attr.c b/fs/attr.c index 67bcd9b14ea5..b34732506f1d 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -67,20 +67,12 @@ EXPORT_SYMBOL(inode_change_ok); int inode_setattr(struct inode * inode, struct iattr * attr) { unsigned int ia_valid = attr->ia_valid; - int error = 0; - - if (ia_valid & ATTR_SIZE) { - if (attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); - if (error || (ia_valid == ATTR_SIZE)) - goto out; - } else { - /* - * We skipped the truncate but must still update - * timestamps - */ - ia_valid |= ATTR_MTIME|ATTR_CTIME; - } + + if (ia_valid & ATTR_SIZE && + attr->ia_size != i_size_read(inode)) { + int error = vmtruncate(inode, attr->ia_size); + if (error) + return error; } if (ia_valid & ATTR_UID) @@ -104,8 +96,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr) inode->i_mode = mode; } mark_inode_dirty(inode); -out: - return error; + + return 0; } EXPORT_SYMBOL(inode_setattr); diff --git a/fs/autofs/root.c b/fs/autofs/root.c index a1ab1c0ed215..808134a5a2fa 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -229,9 +229,9 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr dentry->d_flags |= DCACHE_AUTOFS_PENDING; d_add(dentry, NULL); - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); autofs_revalidate(dentry, nd); - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); /* * If we are still pending, check if we had to handle diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index fca83e28edcf..385bed09b0d8 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -209,7 +209,7 @@ static inline int simple_empty_nolock(struct dentry *dentry) struct dentry *child; int ret = 0; - list_for_each_entry(child, &dentry->d_subdirs, d_child) + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) if (simple_positive(child)) goto out; ret = 1; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index feb6ac427d05..dc39589df165 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -105,7 +105,7 @@ repeat: next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ if (!simple_positive(dentry)) { @@ -138,7 +138,7 @@ resume: } if (this_parent != top) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; goto resume; } @@ -163,7 +163,7 @@ repeat: next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ if (!simple_positive(dentry)) { @@ -199,7 +199,7 @@ cont: } if (this_parent != parent) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; goto resume; } @@ -238,7 +238,7 @@ static struct dentry *autofs4_expire(struct super_block *sb, /* On exit from the loop expire is set to a dgot dentry * to expire or it's NULL */ while ( next != &root->d_subdirs ) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ if ( !simple_positive(dentry) ) { @@ -302,7 +302,7 @@ next: expired, (int)expired->d_name.len, expired->d_name.name); spin_lock(&dcache_lock); list_del(&expired->d_parent->d_subdirs); - list_add(&expired->d_parent->d_subdirs, &expired->d_child); + list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child); spin_unlock(&dcache_lock); return expired; } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 818b37be5153..2d3082854a29 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -91,7 +91,7 @@ repeat: next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - don`t care */ if (!simple_positive(dentry)) { @@ -117,7 +117,7 @@ resume: if (this_parent != sbi->root) { struct dentry *dentry = this_parent; - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; spin_unlock(&dcache_lock); DPRINTK("parent dentry %p %.*s", diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 2a771ec66956..14aa70282e8c 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -86,7 +86,7 @@ static int autofs4_root_readdir(struct file *file, void *dirent, /* Update usage from here to top of tree, so that scan of top-level directories will give a useful result */ -static void autofs4_update_usage(struct dentry *dentry) +static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry) { struct dentry *top = dentry->d_sb->s_root; @@ -95,7 +95,7 @@ static void autofs4_update_usage(struct dentry *dentry) struct autofs_info *ino = autofs4_dentry_ino(dentry); if (ino) { - update_atime(dentry->d_inode); + touch_atime(mnt, dentry); ino->last_used = jiffies; } } @@ -143,7 +143,8 @@ static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t f } while(1) { - struct dentry *de = list_entry(list, struct dentry, d_child); + struct dentry *de = list_entry(list, + struct dentry, d_u.d_child); if (!d_unhashed(de) && de->d_inode) { spin_unlock(&dcache_lock); @@ -288,10 +289,10 @@ out: return autofs4_dcache_readdir(file, dirent, filldir); } -static int try_to_fill_dentry(struct dentry *dentry, - struct super_block *sb, - struct autofs_sb_info *sbi, int flags) +static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags) { + struct super_block *sb = mnt->mnt_sb; + struct autofs_sb_info *sbi = autofs4_sbi(sb); struct autofs_info *de_info = autofs4_dentry_ino(dentry); int status = 0; @@ -366,7 +367,7 @@ static int try_to_fill_dentry(struct dentry *dentry, /* We don't update the usages for the autofs daemon itself, this is necessary for recursive autofs mounts */ if (!autofs4_oz_mode(sbi)) - autofs4_update_usage(dentry); + autofs4_update_usage(mnt, dentry); spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; @@ -391,7 +392,7 @@ static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd) /* Pending dentry */ if (autofs4_ispending(dentry)) { if (!oz_mode) - status = try_to_fill_dentry(dentry, dir->i_sb, sbi, flags); + status = try_to_fill_dentry(nd->mnt, dentry, flags); return status; } @@ -408,14 +409,14 @@ static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd) dentry, dentry->d_name.len, dentry->d_name.name); spin_unlock(&dcache_lock); if (!oz_mode) - status = try_to_fill_dentry(dentry, dir->i_sb, sbi, flags); + status = try_to_fill_dentry(nd->mnt, dentry, flags); return status; } spin_unlock(&dcache_lock); /* Update the usage list */ if (!oz_mode) - autofs4_update_usage(dentry); + autofs4_update_usage(nd->mnt, dentry); return 1; } @@ -488,9 +489,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s d_add(dentry, NULL); if (dentry->d_op && dentry->d_op->d_revalidate) { - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); (dentry->d_op->d_revalidate)(dentry, nd); - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); } /* diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 72011826f0cb..f312103434d4 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -33,8 +33,6 @@ static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); static int load_aout_library(struct file*); static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file); -extern void dump_thread(struct pt_regs *, struct user *); - static struct linux_binfmt aout_format = { .module = THIS_MODULE, .load_binary = load_aout_binary, diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f36f2210204f..a4f6f57d91aa 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -58,7 +58,7 @@ extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); * If we don't support core dumping, then supply a NULL so we * don't even try. */ -#ifdef USE_ELF_CORE_DUMP +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file); #else #define elf_core_dump NULL @@ -288,11 +288,17 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) { unsigned long map_addr; + unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr); down_write(¤t->mm->mmap_sem); - map_addr = do_mmap(filep, ELF_PAGESTART(addr), - eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type, - eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); + /* mmap() will return -EINVAL if given a zero size, but a + * segment with zero filesize is perfectly valid */ + if (eppnt->p_filesz + pageoffset) + map_addr = do_mmap(filep, ELF_PAGESTART(addr), + eppnt->p_filesz + pageoffset, prot, type, + eppnt->p_offset - pageoffset); + else + map_addr = ELF_PAGESTART(addr); up_write(¤t->mm->mmap_sem); return(map_addr); } @@ -616,7 +622,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) goto out_free_file; retval = -ENOMEM; - elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz, + elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL); if (!elf_interpreter) goto out_free_file; @@ -1107,7 +1113,7 @@ out: * Note that some platforms still use traditional core dumps and not * the ELF core dump. Each platform can select it as appropriate. */ -#ifdef USE_ELF_CORE_DUMP +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) /* * ELF core dumper diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index e0344f69c79d..5b3076e8ee90 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -187,7 +187,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs goto error; /* read the name of the interpreter into memory */ - interpreter_name = (char *) kmalloc(phdr->p_filesz, GFP_KERNEL); + interpreter_name = kmalloc(phdr->p_filesz, GFP_KERNEL); if (!interpreter_name) goto error; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 9d6625829b99..b72dc31a0970 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -77,8 +77,6 @@ static int load_flat_shared_library(int id, struct lib_info *p); static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs); static int flat_core_dump(long signr, struct pt_regs * regs, struct file *file); -extern void dump_thread(struct pt_regs *, struct user *); - static struct linux_binfmt flat_format = { .module = THIS_MODULE, .load_binary = load_flat_binary, diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 2568eb41cb3a..9ccc7d8275b8 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -588,11 +588,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, case 2: set_bit(Enabled, &e->flags); break; case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root); - down(&root->d_inode->i_sem); + mutex_lock(&root->d_inode->i_mutex); kill_node(e); - up(&root->d_inode->i_sem); + mutex_unlock(&root->d_inode->i_mutex); dput(root); break; default: return res; @@ -622,7 +622,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, return PTR_ERR(e); root = dget(sb->s_root); - down(&root->d_inode->i_sem); + mutex_lock(&root->d_inode->i_mutex); dentry = lookup_one_len(e->name, root, strlen(e->name)); err = PTR_ERR(dentry); if (IS_ERR(dentry)) @@ -658,7 +658,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, out2: dput(dentry); out: - up(&root->d_inode->i_sem); + mutex_unlock(&root->d_inode->i_mutex); dput(root); if (err) { @@ -703,12 +703,12 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer, case 1: enabled = 0; break; case 2: enabled = 1; break; case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root); - down(&root->d_inode->i_sem); + mutex_lock(&root->d_inode->i_mutex); while (!list_empty(&entries)) kill_node(list_entry(entries.next, Node, list)); - up(&root->d_inode->i_sem); + mutex_unlock(&root->d_inode->i_mutex); dput(root); default: return res; } @@ -126,6 +126,7 @@ static void bio_fs_destructor(struct bio *bio) inline void bio_init(struct bio *bio) { bio->bi_next = NULL; + bio->bi_bdev = NULL; bio->bi_flags = 1 << BIO_UPTODATE; bio->bi_rw = 0; bio->bi_vcnt = 0; @@ -325,10 +326,31 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page if (unlikely(bio_flagged(bio, BIO_CLONED))) return 0; - if (bio->bi_vcnt >= bio->bi_max_vecs) + if (((bio->bi_size + len) >> 9) > max_sectors) return 0; - if (((bio->bi_size + len) >> 9) > max_sectors) + /* + * For filesystems with a blocksize smaller than the pagesize + * we will often be called with the same page as last time and + * a consecutive offset. Optimize this special case. + */ + if (bio->bi_vcnt > 0) { + struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; + + if (page == prev->bv_page && + offset == prev->bv_offset + prev->bv_len) { + prev->bv_len += len; + if (q->merge_bvec_fn && + q->merge_bvec_fn(q, bio, prev) < len) { + prev->bv_len -= len; + return 0; + } + + goto done; + } + } + + if (bio->bi_vcnt >= bio->bi_max_vecs) return 0; /* @@ -382,6 +404,7 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page bio->bi_vcnt++; bio->bi_phys_segments++; bio->bi_hw_segments++; + done: bio->bi_size += len; return len; } diff --git a/fs/block_dev.c b/fs/block_dev.c index e0df94c37b7e..6e50346fb1ee 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -202,7 +202,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) loff_t size; loff_t retval; - down(&bd_inode->i_sem); + mutex_lock(&bd_inode->i_mutex); size = i_size_read(bd_inode); switch (origin) { @@ -219,7 +219,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) } retval = offset; } - up(&bd_inode->i_sem); + mutex_unlock(&bd_inode->i_mutex); return retval; } diff --git a/fs/buffer.c b/fs/buffer.c index 5287be18633b..6466bc8a3dc7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -153,14 +153,8 @@ int sync_blockdev(struct block_device *bdev) { int ret = 0; - if (bdev) { - int err; - - ret = filemap_fdatawrite(bdev->bd_inode->i_mapping); - err = filemap_fdatawait(bdev->bd_inode->i_mapping); - if (!ret) - ret = err; - } + if (bdev) + ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); return ret; } EXPORT_SYMBOL(sync_blockdev); @@ -358,11 +352,11 @@ static long do_fsync(unsigned int fd, int datasync) * We need to protect against concurrent writers, * which could cause livelocks in fsync_buffers_list */ - down(&mapping->host->i_sem); + mutex_lock(&mapping->host->i_mutex); err = file->f_op->fsync(file, file->f_dentry, datasync); if (!ret) ret = err; - up(&mapping->host->i_sem); + mutex_unlock(&mapping->host->i_mutex); err = filemap_fdatawait(mapping); if (!ret) ret = err; @@ -1768,7 +1762,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, * handle that here by just cleaning them. */ - block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); head = page_buffers(page); bh = head; @@ -2160,11 +2154,12 @@ int block_read_full_page(struct page *page, get_block_t *get_block) * truncates. Uses prepare/commit_write to allow the filesystem to * deal with the hole. */ -int generic_cont_expand(struct inode *inode, loff_t size) +static int __generic_cont_expand(struct inode *inode, loff_t size, + pgoff_t index, unsigned int offset) { struct address_space *mapping = inode->i_mapping; struct page *page; - unsigned long index, offset, limit; + unsigned long limit; int err; err = -EFBIG; @@ -2176,24 +2171,24 @@ int generic_cont_expand(struct inode *inode, loff_t size) if (size > inode->i_sb->s_maxbytes) goto out; - offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ - - /* ugh. in prepare/commit_write, if from==to==start of block, we - ** skip the prepare. make sure we never send an offset for the start - ** of a block - */ - if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { - offset++; - } - index = size >> PAGE_CACHE_SHIFT; err = -ENOMEM; page = grab_cache_page(mapping, index); if (!page) goto out; err = mapping->a_ops->prepare_write(NULL, page, offset, offset); - if (!err) { - err = mapping->a_ops->commit_write(NULL, page, offset, offset); + if (err) { + /* + * ->prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. + */ + unlock_page(page); + page_cache_release(page); + vmtruncate(inode, inode->i_size); + goto out; } + + err = mapping->a_ops->commit_write(NULL, page, offset, offset); + unlock_page(page); page_cache_release(page); if (err > 0) @@ -2202,6 +2197,36 @@ out: return err; } +int generic_cont_expand(struct inode *inode, loff_t size) +{ + pgoff_t index; + unsigned int offset; + + offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */ + + /* ugh. in prepare/commit_write, if from==to==start of block, we + ** skip the prepare. make sure we never send an offset for the start + ** of a block + */ + if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { + /* caller must handle this extra byte. */ + offset++; + } + index = size >> PAGE_CACHE_SHIFT; + + return __generic_cont_expand(inode, size, index, offset); +} + +int generic_cont_expand_simple(struct inode *inode, loff_t size) +{ + loff_t pos = size - 1; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1; + + /* prepare/commit_write can handle even if from==to==start of block. */ + return __generic_cont_expand(inode, size, index, offset); +} + /* * For moronic filesystems that do not allow holes in file. * We may have to extend the file. @@ -2313,7 +2338,7 @@ int generic_commit_write(struct file *file, struct page *page, __block_commit_write(inode,page,from,to); /* * No need to use i_size_read() here, the i_size - * cannot change under us because we hold i_sem. + * cannot change under us because we hold i_mutex. */ if (pos > inode->i_size) { i_size_write(inode, pos); @@ -2610,7 +2635,7 @@ int block_truncate_page(struct address_space *mapping, pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned blocksize; - pgoff_t iblock; + sector_t iblock; unsigned length, pos; struct inode *inode = mapping->host; struct page *page; @@ -2626,7 +2651,7 @@ int block_truncate_page(struct address_space *mapping, return 0; length = blocksize - length; - iblock = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits); page = grab_cache_page(mapping, index); err = -ENOMEM; @@ -3145,6 +3170,7 @@ EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_commit_write); EXPORT_SYMBOL(generic_cont_expand); +EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(invalidate_bdev); EXPORT_SYMBOL(ll_rw_block); diff --git a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h index decd138f14d4..da2ad5b451ac 100644 --- a/fs/cifs/cifs_uniupr.h +++ b/fs/cifs/cifs_uniupr.h @@ -242,7 +242,7 @@ static signed char UniCaseRangeLff20[27] = { /* * Lower Case Range */ -const static struct UniCaseRange CifsUniLowerRange[] = { +static const struct UniCaseRange CifsUniLowerRange[] = { 0x0380, 0x03ab, UniCaseRangeL0380, 0x0400, 0x042f, UniCaseRangeL0400, 0x0490, 0x04cb, UniCaseRangeL0490, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 2a13a2bac8f1..e10213b7541e 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -860,9 +860,9 @@ static int cifs_oplock_thread(void * dummyarg) DeleteOplockQEntry(oplock_item); /* can not grab inode sem here since it would deadlock when oplock received on delete - since vfs_unlink holds the i_sem across + since vfs_unlink holds the i_mutex across the call */ - /* down(&inode->i_sem);*/ + /* mutex_lock(&inode->i_mutex);*/ if (S_ISREG(inode->i_mode)) { rc = filemap_fdatawrite(inode->i_mapping); if(CIFS_I(inode)->clientCanCacheRead == 0) { @@ -871,7 +871,7 @@ static int cifs_oplock_thread(void * dummyarg) } } else rc = 0; - /* up(&inode->i_sem);*/ + /* mutex_unlock(&inode->i_mutex);*/ if (rc) CIFS_I(inode)->write_behind_rc = rc; cFYI(1,("Oplock flush inode %p rc %d",inode,rc)); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 14a1c72ced92..5ade53d7bca8 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -127,8 +127,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, if (file->f_dentry->d_inode->i_mapping) { /* BB no need to lock inode until after invalidate since namei code should already have it locked? */ - filemap_fdatawrite(file->f_dentry->d_inode->i_mapping); - filemap_fdatawait(file->f_dentry->d_inode->i_mapping); + filemap_write_and_wait(file->f_dentry->d_inode->i_mapping); } cFYI(1, ("invalidating remote inode since open detected it " "changed")); @@ -419,8 +418,7 @@ static int cifs_reopen_file(struct inode *inode, struct file *file, pCifsInode = CIFS_I(inode); if (pCifsInode) { if (can_flush) { - filemap_fdatawrite(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); /* temporarily disable caching while we go to server to get inode info */ pCifsInode->clientCanCacheAll = FALSE; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 411c1f7f84da..3ebce9430f4a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1040,9 +1040,9 @@ int cifs_revalidate(struct dentry *direntry) } /* can not grab this sem since kernel filesys locking documentation - indicates i_sem may be taken by the kernel on lookup and rename - which could deadlock if we grab the i_sem here as well */ -/* down(&direntry->d_inode->i_sem);*/ + indicates i_mutex may be taken by the kernel on lookup and rename + which could deadlock if we grab the i_mutex here as well */ +/* mutex_lock(&direntry->d_inode->i_mutex);*/ /* need to write out dirty pages here */ if (direntry->d_inode->i_mapping) { /* do we need to lock inode until after invalidate completes @@ -1066,7 +1066,7 @@ int cifs_revalidate(struct dentry *direntry) } } } -/* up(&direntry->d_inode->i_sem); */ +/* mutex_unlock(&direntry->d_inode->i_mutex); */ kfree(full_path); FreeXid(xid); @@ -1148,8 +1148,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) /* BB check if we need to refresh inode from server now ? BB */ /* need to flush data before changing file size on server */ - filemap_fdatawrite(direntry->d_inode->i_mapping); - filemap_fdatawait(direntry->d_inode->i_mapping); + filemap_write_and_wait(direntry->d_inode->i_mapping); if (attrs->ia_valid & ATTR_SIZE) { /* To avoid spurious oplock breaks from server, in the case of diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 80072fd9b7fa..c607d923350a 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag) spin_lock(&dcache_lock); list_for_each(child, &parent->d_subdirs) { - de = list_entry(child, struct dentry, d_child); + de = list_entry(child, struct dentry, d_u.d_child); /* don't know what to do with negative dentries */ if ( ! de->d_inode ) continue; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 2391766e9c7c..8f1a517f8b4e 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -453,7 +453,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir) coda_vfs_stat.readdir++; host_inode = host_file->f_dentry->d_inode; - down(&host_inode->i_sem); + mutex_lock(&host_inode->i_mutex); host_file->f_pos = coda_file->f_pos; if (!host_file->f_op->readdir) { @@ -475,7 +475,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir) } out: coda_file->f_pos = host_file->f_pos; - up(&host_inode->i_sem); + mutex_unlock(&host_inode->i_mutex); return ret; } diff --git a/fs/coda/file.c b/fs/coda/file.c index e6bc022568f3..30b4630bd735 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -77,14 +77,14 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo return -EINVAL; host_inode = host_file->f_dentry->d_inode; - down(&coda_inode->i_sem); + mutex_lock(&coda_inode->i_mutex); ret = host_file->f_op->write(host_file, buf, count, ppos); coda_inode->i_size = host_inode->i_size; coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC; - up(&coda_inode->i_sem); + mutex_unlock(&coda_inode->i_mutex); return ret; } @@ -272,9 +272,9 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) if (host_file->f_op && host_file->f_op->fsync) { host_dentry = host_file->f_dentry; host_inode = host_dentry->d_inode; - down(&host_inode->i_sem); + mutex_lock(&host_inode->i_mutex); err = host_file->f_op->fsync(host_file, host_dentry, datasync); - up(&host_inode->i_sem); + mutex_unlock(&host_inode->i_mutex); } if ( !err && !datasync ) { diff --git a/fs/compat.c b/fs/compat.c index 55ac0324aaf1..271b75d1597f 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -494,9 +494,21 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, ret = sys_fcntl(fd, cmd, (unsigned long)&f); set_fs(old_fs); if (cmd == F_GETLK && ret == 0) { - if ((f.l_start >= COMPAT_OFF_T_MAX) || - ((f.l_start + f.l_len) > COMPAT_OFF_T_MAX)) + /* GETLK was successfule and we need to return the data... + * but it needs to fit in the compat structure. + * l_start shouldn't be too big, unless the original + * start + end is greater than COMPAT_OFF_T_MAX, in which + * case the app was asking for trouble, so we return + * -EOVERFLOW in that case. + * l_len could be too big, in which case we just truncate it, + * and only allow the app to see that part of the conflicting + * lock that might make sense to it anyway + */ + + if (f.l_start > COMPAT_OFF_T_MAX) ret = -EOVERFLOW; + if (f.l_len > COMPAT_OFF_T_MAX) + f.l_len = COMPAT_OFF_T_MAX; if (ret == 0) ret = put_compat_flock(&f, compat_ptr(arg)); } @@ -515,9 +527,11 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, (unsigned long)&f); set_fs(old_fs); if (cmd == F_GETLK64 && ret == 0) { - if ((f.l_start >= COMPAT_LOFF_T_MAX) || - ((f.l_start + f.l_len) > COMPAT_LOFF_T_MAX)) + /* need to return lock information - see above for commentary */ + if (f.l_start > COMPAT_LOFF_T_MAX) ret = -EOVERFLOW; + if (f.l_len > COMPAT_LOFF_T_MAX) + f.l_len = COMPAT_LOFF_T_MAX; if (ret == 0) ret = put_compat_flock64(&f, compat_ptr(arg)); } diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 43a2508ac696..890bc30fbe20 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -10,7 +10,6 @@ * ioctls. */ -#ifdef INCLUDES #include <linux/config.h> #include <linux/types.h> #include <linux/compat.h> @@ -81,13 +80,9 @@ #include <linux/capi.h> #include <scsi/scsi.h> -/* Ugly hack. */ -#undef __KERNEL__ #include <scsi/scsi_ioctl.h> -#define __KERNEL__ #include <scsi/sg.h> -#include <asm/types.h> #include <asm/uaccess.h> #include <linux/ethtool.h> #include <linux/mii.h> @@ -95,7 +90,6 @@ #include <linux/watchdog.h> #include <linux/dm-ioctl.h> -#include <asm/module.h> #include <linux/soundcard.h> #include <linux/lp.h> #include <linux/ppdev.h> @@ -128,11 +122,6 @@ #include <linux/dvb/frontend.h> #include <linux/dvb/video.h> -#undef INCLUDES -#endif - -#ifdef CODE - /* Aiee. Someone does not find a difference between int and long */ #define EXT2_IOC32_GETFLAGS _IOR('f', 1, int) #define EXT2_IOC32_SETFLAGS _IOW('f', 2, int) @@ -148,6 +137,12 @@ #define EXT2_IOC32_GETVERSION _IOR('v', 1, int) #define EXT2_IOC32_SETVERSION _IOW('v', 2, int) +static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, + unsigned long arg, struct file *f) +{ + return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); +} + static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg) { mm_segment_t old_fs = get_fs(); @@ -207,244 +202,6 @@ static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); } -struct video_tuner32 { - compat_int_t tuner; - char name[32]; - compat_ulong_t rangelow, rangehigh; - u32 flags; /* It is really u32 in videodev.h */ - u16 mode, signal; -}; - -static int get_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up) -{ - int i; - - if(get_user(kp->tuner, &up->tuner)) - return -EFAULT; - for(i = 0; i < 32; i++) - __get_user(kp->name[i], &up->name[i]); - __get_user(kp->rangelow, &up->rangelow); - __get_user(kp->rangehigh, &up->rangehigh); - __get_user(kp->flags, &up->flags); - __get_user(kp->mode, &up->mode); - __get_user(kp->signal, &up->signal); - return 0; -} - -static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up) -{ - int i; - - if(put_user(kp->tuner, &up->tuner)) - return -EFAULT; - for(i = 0; i < 32; i++) - __put_user(kp->name[i], &up->name[i]); - __put_user(kp->rangelow, &up->rangelow); - __put_user(kp->rangehigh, &up->rangehigh); - __put_user(kp->flags, &up->flags); - __put_user(kp->mode, &up->mode); - __put_user(kp->signal, &up->signal); - return 0; -} - -struct video_buffer32 { - compat_caddr_t base; - compat_int_t height, width, depth, bytesperline; -}; - -static int get_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up) -{ - u32 tmp; - - if (get_user(tmp, &up->base)) - return -EFAULT; - - /* This is actually a physical address stored - * as a void pointer. - */ - kp->base = (void *)(unsigned long) tmp; - - __get_user(kp->height, &up->height); - __get_user(kp->width, &up->width); - __get_user(kp->depth, &up->depth); - __get_user(kp->bytesperline, &up->bytesperline); - return 0; -} - -static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up) -{ - u32 tmp = (u32)((unsigned long)kp->base); - - if(put_user(tmp, &up->base)) - return -EFAULT; - __put_user(kp->height, &up->height); - __put_user(kp->width, &up->width); - __put_user(kp->depth, &up->depth); - __put_user(kp->bytesperline, &up->bytesperline); - return 0; -} - -struct video_clip32 { - s32 x, y, width, height; /* Its really s32 in videodev.h */ - compat_caddr_t next; -}; - -struct video_window32 { - u32 x, y, width, height, chromakey, flags; - compat_caddr_t clips; - compat_int_t clipcount; -}; - -/* You get back everything except the clips... */ -static int put_video_window32(struct video_window *kp, struct video_window32 __user *up) -{ - if(put_user(kp->x, &up->x)) - return -EFAULT; - __put_user(kp->y, &up->y); - __put_user(kp->width, &up->width); - __put_user(kp->height, &up->height); - __put_user(kp->chromakey, &up->chromakey); - __put_user(kp->flags, &up->flags); - __put_user(kp->clipcount, &up->clipcount); - return 0; -} - -#define VIDIOCGTUNER32 _IOWR('v',4, struct video_tuner32) -#define VIDIOCSTUNER32 _IOW('v',5, struct video_tuner32) -#define VIDIOCGWIN32 _IOR('v',9, struct video_window32) -#define VIDIOCSWIN32 _IOW('v',10, struct video_window32) -#define VIDIOCGFBUF32 _IOR('v',11, struct video_buffer32) -#define VIDIOCSFBUF32 _IOW('v',12, struct video_buffer32) -#define VIDIOCGFREQ32 _IOR('v',14, u32) -#define VIDIOCSFREQ32 _IOW('v',15, u32) - -enum { - MaxClips = (~0U-sizeof(struct video_window))/sizeof(struct video_clip) -}; - -static int do_set_window(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - struct video_window32 __user *up = compat_ptr(arg); - struct video_window __user *vw; - struct video_clip __user *p; - int nclips; - u32 n; - - if (get_user(nclips, &up->clipcount)) - return -EFAULT; - - /* Peculiar interface... */ - if (nclips < 0) - nclips = VIDEO_CLIPMAP_SIZE; - - if (nclips > MaxClips) - return -ENOMEM; - - vw = compat_alloc_user_space(sizeof(struct video_window) + - nclips * sizeof(struct video_clip)); - - p = nclips ? (struct video_clip __user *)(vw + 1) : NULL; - - if (get_user(n, &up->x) || put_user(n, &vw->x) || - get_user(n, &up->y) || put_user(n, &vw->y) || - get_user(n, &up->width) || put_user(n, &vw->width) || - get_user(n, &up->height) || put_user(n, &vw->height) || - get_user(n, &up->chromakey) || put_user(n, &vw->chromakey) || - get_user(n, &up->flags) || put_user(n, &vw->flags) || - get_user(n, &up->clipcount) || put_user(n, &vw->clipcount) || - get_user(n, &up->clips) || put_user(p, &vw->clips)) - return -EFAULT; - - if (nclips) { - struct video_clip32 __user *u = compat_ptr(n); - int i; - if (!u) - return -EINVAL; - for (i = 0; i < nclips; i++, u++, p++) { - s32 v; - if (get_user(v, &u->x) || - put_user(v, &p->x) || - get_user(v, &u->y) || - put_user(v, &p->y) || - get_user(v, &u->width) || - put_user(v, &p->width) || - get_user(v, &u->height) || - put_user(v, &p->height) || - put_user(NULL, &p->next)) - return -EFAULT; - } - } - - return sys_ioctl(fd, VIDIOCSWIN, (unsigned long)p); -} - -static int do_video_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - union { - struct video_tuner vt; - struct video_buffer vb; - struct video_window vw; - unsigned long vx; - } karg; - mm_segment_t old_fs = get_fs(); - void __user *up = compat_ptr(arg); - int err = 0; - - /* First, convert the command. */ - switch(cmd) { - case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break; - case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break; - case VIDIOCGWIN32: cmd = VIDIOCGWIN; break; - case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break; - case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break; - case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break; - case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break; - }; - - switch(cmd) { - case VIDIOCSTUNER: - case VIDIOCGTUNER: - err = get_video_tuner32(&karg.vt, up); - break; - - case VIDIOCSFBUF: - err = get_video_buffer32(&karg.vb, up); - break; - - case VIDIOCSFREQ: - err = get_user(karg.vx, (u32 __user *)up); - break; - }; - if(err) - goto out; - - set_fs(KERNEL_DS); - err = sys_ioctl(fd, cmd, (unsigned long)&karg); - set_fs(old_fs); - - if(err == 0) { - switch(cmd) { - case VIDIOCGTUNER: - err = put_video_tuner32(&karg.vt, up); - break; - - case VIDIOCGWIN: - err = put_video_window32(&karg.vw, up); - break; - - case VIDIOCGFBUF: - err = put_video_buffer32(&karg.vb, up); - break; - - case VIDIOCGFREQ: - err = put_user(((u32)karg.vx), (u32 __user *)up); - break; - }; - } -out: - return err; -} - struct compat_dmx_event { dmx_event_t event; compat_time_t timeStamp; @@ -2713,6 +2470,49 @@ static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg return -EINVAL; } +#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t) +#define RTC_IRQP_SET32 _IOW('p', 0x0c, compat_ulong_t) +#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t) +#define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t) + +static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg) +{ + mm_segment_t oldfs = get_fs(); + compat_ulong_t val32; + unsigned long kval; + int ret; + + switch (cmd) { + case RTC_IRQP_READ32: + case RTC_EPOCH_READ32: + set_fs(KERNEL_DS); + ret = sys_ioctl(fd, (cmd == RTC_IRQP_READ32) ? + RTC_IRQP_READ : RTC_EPOCH_READ, + (unsigned long)&kval); + set_fs(oldfs); + if (ret) + return ret; + val32 = kval; + return put_user(val32, (unsigned int __user *)arg); + case RTC_IRQP_SET32: + case RTC_EPOCH_SET32: + ret = get_user(val32, (unsigned int __user *)arg); + if (ret) + return ret; + kval = val32; + + set_fs(KERNEL_DS); + ret = sys_ioctl(fd, (cmd == RTC_IRQP_SET32) ? + RTC_IRQP_SET : RTC_EPOCH_SET, + (unsigned long)&kval); + set_fs(oldfs); + return ret; + default: + /* unreached */ + return -ENOIOCTLCMD; + } +} + #if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE) struct ncp_ioctl_request_32 { u32 function; @@ -2900,10 +2700,20 @@ static int do_ncp_setprivatedata(unsigned int fd, unsigned int cmd, unsigned lon } #endif -#undef CODE -#endif +#define HANDLE_IOCTL(cmd,handler) \ + { (cmd), (ioctl_trans_handler_t)(handler) }, + +/* pointer to compatible structure or no argument */ +#define COMPATIBLE_IOCTL(cmd) \ + { (cmd), do_ioctl32_pointer }, + +/* argument is an unsigned long integer, not a pointer */ +#define ULONG_IOCTL(cmd) \ + { (cmd), (ioctl_trans_handler_t)sys_ioctl }, -#ifdef DECLARES + +struct ioctl_trans ioctl_start[] = { +#include <linux/compat_ioctl.h> HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) #ifdef CONFIG_NET @@ -3015,14 +2825,6 @@ COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD) #ifdef CONFIG_JBD_DEBUG HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl) #endif -HANDLE_IOCTL(VIDIOCGTUNER32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCSTUNER32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCGWIN32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCSWIN32, do_set_window) -HANDLE_IOCTL(VIDIOCGFBUF32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCSFBUF32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCGFREQ32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCSFREQ32, do_video_ioctl) /* One SMB ioctl needs translations. */ #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid) @@ -3104,6 +2906,10 @@ HANDLE_IOCTL(SIOCSIWENCODE, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWENCODE, do_wireless_ioctl) HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl) HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl) +HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl) +HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl) +HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl) +HANDLE_IOCTL(RTC_EPOCH_SET32, rtc_ioctl) #if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE) HANDLE_IOCTL(NCP_IOC_NCPREQUEST_32, do_ncp_ncprequest) @@ -3120,6 +2926,6 @@ HANDLE_IOCTL(DMX_GET_EVENT, do_dmx_get_event) HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event) HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture) HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette) +}; -#undef DECLARES -#endif +int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index e48b539243a1..b668ec61527e 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -288,10 +288,10 @@ static struct dentry * configfs_lookup(struct inode *dir, /* * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are - * attributes and are removed by rmdir(). We recurse, taking i_sem + * attributes and are removed by rmdir(). We recurse, taking i_mutex * on all children that are candidates for default detach. If the * result is clean, then configfs_detach_group() will handle dropping - * i_sem. If there is an error, the caller will clean up the i_sem + * i_mutex. If there is an error, the caller will clean up the i_mutex * holders via configfs_detach_rollback(). */ static int configfs_detach_prep(struct dentry *dentry) @@ -309,8 +309,8 @@ static int configfs_detach_prep(struct dentry *dentry) if (sd->s_type & CONFIGFS_NOT_PINNED) continue; if (sd->s_type & CONFIGFS_USET_DEFAULT) { - down(&sd->s_dentry->d_inode->i_sem); - /* Mark that we've taken i_sem */ + mutex_lock(&sd->s_dentry->d_inode->i_mutex); + /* Mark that we've taken i_mutex */ sd->s_type |= CONFIGFS_USET_DROPPING; ret = configfs_detach_prep(sd->s_dentry); @@ -327,7 +327,7 @@ out: } /* - * Walk the tree, dropping i_sem wherever CONFIGFS_USET_DROPPING is + * Walk the tree, dropping i_mutex wherever CONFIGFS_USET_DROPPING is * set. */ static void configfs_detach_rollback(struct dentry *dentry) @@ -341,7 +341,7 @@ static void configfs_detach_rollback(struct dentry *dentry) if (sd->s_type & CONFIGFS_USET_DROPPING) { sd->s_type &= ~CONFIGFS_USET_DROPPING; - up(&sd->s_dentry->d_inode->i_sem); + mutex_unlock(&sd->s_dentry->d_inode->i_mutex); } } } @@ -424,11 +424,11 @@ static void detach_groups(struct config_group *group) /* * From rmdir/unregister, a configfs_detach_prep() pass - * has taken our i_sem for us. Drop it. + * has taken our i_mutex for us. Drop it. * From mkdir/register cleanup, there is no sem held. */ if (sd->s_type & CONFIGFS_USET_DROPPING) - up(&child->d_inode->i_sem); + mutex_unlock(&child->d_inode->i_mutex); d_delete(child); dput(child); @@ -493,11 +493,11 @@ static int populate_groups(struct config_group *group) /* FYI, we're faking mkdir here * I'm not sure we need this semaphore, as we're called * from our parent's mkdir. That holds our parent's - * i_sem, so afaik lookup cannot continue through our + * i_mutex, so afaik lookup cannot continue through our * parent to find us, let alone mess with our tree. - * That said, taking our i_sem is closer to mkdir + * That said, taking our i_mutex is closer to mkdir * emulation, and shouldn't hurt. */ - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); for (i = 0; group->default_groups[i]; i++) { new_group = group->default_groups[i]; @@ -507,7 +507,7 @@ static int populate_groups(struct config_group *group) break; } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); } if (ret) @@ -856,7 +856,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name) down_write(&configfs_rename_sem); parent = item->parent->dentry; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); if (!IS_ERR(new_dentry)) { @@ -872,7 +872,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name) error = -EEXIST; dput(new_dentry); } - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); up_write(&configfs_rename_sem); return error; @@ -884,9 +884,9 @@ static int configfs_dir_open(struct inode *inode, struct file *file) struct dentry * dentry = file->f_dentry; struct configfs_dirent * parent_sd = dentry->d_fsdata; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); file->private_data = configfs_new_dirent(parent_sd, NULL); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return file->private_data ? 0 : -ENOMEM; @@ -897,9 +897,9 @@ static int configfs_dir_close(struct inode *inode, struct file *file) struct dentry * dentry = file->f_dentry; struct configfs_dirent * cursor = file->private_data; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); list_del_init(&cursor->s_sibling); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); release_configfs_dirent(cursor); @@ -975,7 +975,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) { struct dentry * dentry = file->f_dentry; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -983,7 +983,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) if (offset >= 0) break; default: - up(&file->f_dentry->d_inode->i_sem); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { @@ -1007,7 +1007,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) list_add_tail(&cursor->s_sibling, p); } } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return offset; } @@ -1037,7 +1037,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) sd = configfs_sb->s_root->d_fsdata; link_group(to_config_group(sd->s_element), group); - down(&configfs_sb->s_root->d_inode->i_sem); + mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); name.name = group->cg_item.ci_name; name.len = strlen(name.name); @@ -1057,7 +1057,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) else d_delete(dentry); - up(&configfs_sb->s_root->d_inode->i_sem); + mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); if (dentry) { dput(dentry); @@ -1079,18 +1079,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) return; } - down(&configfs_sb->s_root->d_inode->i_sem); - down(&dentry->d_inode->i_sem); + mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); + mutex_lock(&dentry->d_inode->i_mutex); if (configfs_detach_prep(dentry)) { printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); } configfs_detach_group(&group->cg_item); dentry->d_inode->i_flags |= S_DEAD; - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); d_delete(dentry); - up(&configfs_sb->s_root->d_inode->i_sem); + mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); dput(dentry); diff --git a/fs/configfs/file.c b/fs/configfs/file.c index af1ffc9a15c0..c26cd61f13af 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -336,9 +336,9 @@ int configfs_add_file(struct dentry * dir, const struct configfs_attribute * att umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; int error = 0; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return error; } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 6b274c6d428f..6577c588de9d 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -122,7 +122,7 @@ const unsigned char * configfs_get_name(struct configfs_dirent *sd) /* * Unhashes the dentry corresponding to given configfs_dirent - * Called with parent inode's i_sem held. + * Called with parent inode's i_mutex held. */ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) { @@ -145,7 +145,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name) struct configfs_dirent * sd; struct configfs_dirent * parent_sd = dir->d_fsdata; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (!sd->s_element) continue; @@ -156,7 +156,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name) break; } } - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); } diff --git a/fs/dcache.c b/fs/dcache.c index 17e439138681..134d6775183f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -71,7 +71,7 @@ struct dentry_stat_t dentry_stat = { static void d_callback(struct rcu_head *head) { - struct dentry * dentry = container_of(head, struct dentry, d_rcu); + struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); if (dname_external(dentry)) kfree(dentry->d_name.name); @@ -86,7 +86,7 @@ static void d_free(struct dentry *dentry) { if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); - call_rcu(&dentry->d_rcu, d_callback); + call_rcu(&dentry->d_u.d_rcu, d_callback); } /* @@ -193,7 +193,7 @@ kill_it: { list_del(&dentry->d_lru); dentry_stat.nr_unused--; } - list_del(&dentry->d_child); + list_del(&dentry->d_u.d_child); dentry_stat.nr_dentry--; /* For d_free, below */ /*drops the locks, at that point nobody can reach this dentry */ dentry_iput(dentry); @@ -367,7 +367,7 @@ static inline void prune_one_dentry(struct dentry * dentry) struct dentry * parent; __d_drop(dentry); - list_del(&dentry->d_child); + list_del(&dentry->d_u.d_child); dentry_stat.nr_dentry--; /* For d_free, below */ dentry_iput(dentry); parent = dentry->d_parent; @@ -518,7 +518,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; /* Have we found a mount point ? */ if (d_mountpoint(dentry)) @@ -532,7 +532,7 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; goto resume; } @@ -569,7 +569,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; if (!list_empty(&dentry->d_lru)) { @@ -610,7 +610,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, found); * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; #ifdef DCACHE_DEBUG printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n", @@ -753,12 +753,12 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_parent = dget(parent); dentry->d_sb = parent->d_sb; } else { - INIT_LIST_HEAD(&dentry->d_child); + INIT_LIST_HEAD(&dentry->d_u.d_child); } spin_lock(&dcache_lock); if (parent) - list_add(&dentry->d_child, &parent->d_subdirs); + list_add(&dentry->d_u.d_child, &parent->d_subdirs); dentry_stat.nr_dentry++; spin_unlock(&dcache_lock); @@ -808,10 +808,14 @@ void d_instantiate(struct dentry *entry, struct inode * inode) * * Fill in inode information in the entry. On success, it returns NULL. * If an unhashed alias of "entry" already exists, then we return the - * aliased dentry instead. + * aliased dentry instead and drop one reference to inode. * * Note that in order to avoid conflicts with rename() etc, the caller * had better be holding the parent directory semaphore. + * + * This also assumes that the inode count has been incremented + * (or otherwise set) by the caller to indicate that it is now + * in use by the dcache. */ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) { @@ -838,6 +842,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) dget_locked(alias); spin_unlock(&dcache_lock); BUG_ON(!d_unhashed(alias)); + iput(inode); return alias; } list_add(&entry->d_alias, &inode->i_dentry); @@ -1310,8 +1315,8 @@ already_unhashed: /* Unhash the target: dput() will then get rid of it */ __d_drop(target); - list_del(&dentry->d_child); - list_del(&target->d_child); + list_del(&dentry->d_u.d_child); + list_del(&target->d_u.d_child); /* Switch the names.. */ switch_names(dentry, target); @@ -1322,15 +1327,15 @@ already_unhashed: if (IS_ROOT(dentry)) { dentry->d_parent = target->d_parent; target->d_parent = target; - INIT_LIST_HEAD(&target->d_child); + INIT_LIST_HEAD(&target->d_u.d_child); } else { do_switch(dentry->d_parent, target->d_parent); /* And add them back to the (new) parent lists */ - list_add(&target->d_child, &target->d_parent->d_subdirs); + list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); } - list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); + list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); spin_unlock(&target->d_lock); spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); @@ -1568,7 +1573,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; if (d_unhashed(dentry)||!dentry->d_inode) continue; @@ -1579,7 +1584,7 @@ resume: atomic_dec(&dentry->d_count); } if (this_parent != root) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; atomic_dec(&this_parent->d_count); this_parent = this_parent->d_parent; goto resume; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index a86ac4aeaedb..d4f1a2cddd47 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -146,7 +146,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode, } *dentry = NULL; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(dentry)) { if ((mode & S_IFMT) == S_IFDIR) @@ -155,7 +155,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode, error = debugfs_create(parent->d_inode, *dentry, mode); } else error = PTR_ERR(dentry); - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); return error; } @@ -273,7 +273,7 @@ void debugfs_remove(struct dentry *dentry) if (!parent || !parent->d_inode) return; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); if (debugfs_positive(dentry)) { if (dentry->d_inode) { if (S_ISDIR(dentry->d_inode->i_mode)) @@ -283,7 +283,7 @@ void debugfs_remove(struct dentry *dentry) dput(dentry); } } - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); simple_release_fs(&debugfs_mount, &debugfs_mount_count); } EXPORT_SYMBOL_GPL(debugfs_remove); diff --git a/fs/devfs/base.c b/fs/devfs/base.c index 1274422a5384..b621521e09d4 100644 --- a/fs/devfs/base.c +++ b/fs/devfs/base.c @@ -2162,27 +2162,27 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd) * * make sure that * d_instantiate always runs under lock - * we release i_sem lock before going to sleep + * we release i_mutex lock before going to sleep * * unfortunately sometimes d_revalidate is called with - * and sometimes without i_sem lock held. The following checks + * and sometimes without i_mutex lock held. The following checks * attempt to deduce when we need to add (and drop resp.) lock * here. This relies on current (2.6.2) calling coventions: * - * lookup_hash is always run under i_sem and is passing NULL + * lookup_hash is always run under i_mutex and is passing NULL * as nd * - * open(...,O_CREATE,...) calls _lookup_hash under i_sem + * open(...,O_CREATE,...) calls _lookup_hash under i_mutex * and sets flags to LOOKUP_OPEN|LOOKUP_CREATE * * all other invocations of ->d_revalidate seem to happen - * outside of i_sem + * outside of i_mutex */ need_lock = nd && (!(nd->flags & LOOKUP_CREATE) || (nd->flags & LOOKUP_PARENT)); if (need_lock) - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); if (is_devfsd_or_child(fs_info)) { devfs_handle_t de = lookup_info->de; @@ -2221,9 +2221,9 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd) add_wait_queue(&lookup_info->wait_queue, &wait); read_unlock(&parent->u.dir.lock); /* at this point it is always (hopefully) locked */ - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); schedule(); - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); /* * This does not need nor should remove wait from wait_queue. * Wait queue head is never reused - nothing is ever added to it @@ -2238,7 +2238,7 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd) out: if (need_lock) - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); return 1; } /* End Function devfs_d_revalidate_wait */ @@ -2284,9 +2284,9 @@ static struct dentry *devfs_lookup(struct inode *dir, struct dentry *dentry, /* Unlock directory semaphore, which will release any waiters. They will get the hashed dentry, and may be forced to wait for revalidation */ - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); wait_for_devfsd_finished(fs_info); /* If I'm not devfsd, must wait */ - down(&dir->i_sem); /* Grab it again because them's the rules */ + mutex_lock(&dir->i_mutex); /* Grab it again because them's the rules */ de = lookup_info.de; /* If someone else has been so kind as to make the inode, we go home early */ diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index f2be44d4491f..bfb8a230bac9 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -130,7 +130,7 @@ static struct dentry *get_node(int num) { char s[12]; struct dentry *root = devpts_root; - down(&root->d_inode->i_sem); + mutex_lock(&root->d_inode->i_mutex); return lookup_one_len(s, root, sprintf(s, "%d", num)); } @@ -161,7 +161,7 @@ int devpts_pty_new(struct tty_struct *tty) if (!IS_ERR(dentry) && !dentry->d_inode) d_instantiate(dentry, inode); - up(&devpts_root->d_inode->i_sem); + mutex_unlock(&devpts_root->d_inode->i_mutex); return 0; } @@ -178,7 +178,7 @@ struct tty_struct *devpts_get_tty(int number) dput(dentry); } - up(&devpts_root->d_inode->i_sem); + mutex_unlock(&devpts_root->d_inode->i_mutex); return tty; } @@ -196,7 +196,7 @@ void devpts_pty_kill(int number) } dput(dentry); } - up(&devpts_root->d_inode->i_sem); + mutex_unlock(&devpts_root->d_inode->i_mutex); } static int __init init_devpts_fs(void) diff --git a/fs/direct-io.c b/fs/direct-io.c index 3931e7f1e6bf..30dbbd1df511 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -56,7 +56,7 @@ * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems. * This determines whether we need to do the fancy locking which prevents * direct-IO from being able to read uninitialised disk blocks. If its zero - * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_sem is + * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is * not held for the entire direct write (taken briefly, initially, during a * direct read though, but its never held for the duration of a direct-IO). */ @@ -930,7 +930,7 @@ out: } /* - * Releases both i_sem and i_alloc_sem + * Releases both i_mutex and i_alloc_sem */ static ssize_t direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, @@ -1062,11 +1062,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, /* * All block lookups have been performed. For READ requests - * we can let i_sem go now that its achieved its purpose + * we can let i_mutex go now that its achieved its purpose * of protecting us from looking up uninitialized blocks. */ if ((rw == READ) && (dio->lock_type == DIO_LOCKING)) - up(&dio->inode->i_sem); + mutex_unlock(&dio->inode->i_mutex); /* * OK, all BIOs are submitted, so we can decrement bio_count to truly @@ -1145,18 +1145,18 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, * The locking rules are governed by the dio_lock_type parameter. * * DIO_NO_LOCKING (no locking, for raw block device access) - * For writes, i_sem is not held on entry; it is never taken. + * For writes, i_mutex is not held on entry; it is never taken. * * DIO_LOCKING (simple locking for regular files) - * For writes we are called under i_sem and return with i_sem held, even though + * For writes we are called under i_mutex and return with i_mutex held, even though * it is internally dropped. - * For reads, i_sem is not held on entry, but it is taken and dropped before + * For reads, i_mutex is not held on entry, but it is taken and dropped before * returning. * * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of * uninitialised data, allowing parallel direct readers and writers) - * For writes we are called without i_sem, return without it, never touch it. - * For reads, i_sem is held on entry and will be released before returning. + * For writes we are called without i_mutex, return without it, never touch it. + * For reads, i_mutex is held on entry and will be released before returning. * * Additional i_alloc_sem locking requirements described inline below. */ @@ -1214,11 +1214,11 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, * For block device access DIO_NO_LOCKING is used, * neither readers nor writers do any locking at all * For regular files using DIO_LOCKING, - * readers need to grab i_sem and i_alloc_sem - * writers need to grab i_alloc_sem only (i_sem is already held) + * readers need to grab i_mutex and i_alloc_sem + * writers need to grab i_alloc_sem only (i_mutex is already held) * For regular files using DIO_OWN_LOCKING, * neither readers nor writers take any locks here - * (i_sem is already held and release for writers here) + * (i_mutex is already held and release for writers here) */ dio->lock_type = dio_lock_type; if (dio_lock_type != DIO_NO_LOCKING) { @@ -1228,7 +1228,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, mapping = iocb->ki_filp->f_mapping; if (dio_lock_type != DIO_OWN_LOCKING) { - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); reader_with_isem = 1; } @@ -1240,7 +1240,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } if (dio_lock_type == DIO_OWN_LOCKING) { - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); reader_with_isem = 0; } } @@ -1266,7 +1266,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, out: if (reader_with_isem) - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (rw & WRITE) current->flags &= ~PF_SYNCWRITE; return retval; diff --git a/fs/dquot.c b/fs/dquot.c index 2a62b3dc20ec..cb6d5bfbdfd5 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -100,7 +100,7 @@ * operation is just reading pointers from inode (or not using them at all) the * read lock is enough. If pointers are altered function must hold write lock * (these locking rules also apply for S_NOQUOTA flag in the inode - note that - * for altering the flag i_sem is also needed). If operation is holding + * for altering the flag i_mutex is also needed). If operation is holding * reference to dquot in other way (e.g. quotactl ops) it must be guarded by * dqonoff_sem. * This locking assures that: @@ -117,9 +117,9 @@ * spinlock to internal buffers before writing. * * Lock ordering (including related VFS locks) is the following: - * i_sem > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem > + * i_mutex > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem > * > dquot->dq_lock > dqio_sem - * i_sem on quota files is special (it's below dqio_sem) + * i_mutex on quota files is special (it's below dqio_sem) */ static DEFINE_SPINLOCK(dq_list_lock); @@ -1369,11 +1369,11 @@ int vfs_quota_off(struct super_block *sb, int type) /* If quota was reenabled in the meantime, we have * nothing to do */ if (!sb_has_quota_enabled(sb, cnt)) { - down(&toputinode[cnt]->i_sem); + mutex_lock(&toputinode[cnt]->i_mutex); toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA); truncate_inode_pages(&toputinode[cnt]->i_data, 0); - up(&toputinode[cnt]->i_sem); + mutex_unlock(&toputinode[cnt]->i_mutex); mark_inode_dirty(toputinode[cnt]); iput(toputinode[cnt]); } @@ -1417,7 +1417,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id) write_inode_now(inode, 1); /* And now flush the block cache so that kernel sees the changes */ invalidate_bdev(sb->s_bdev, 0); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); down(&dqopt->dqonoff_sem); if (sb_has_quota_enabled(sb, type)) { error = -EBUSY; @@ -1449,7 +1449,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id) goto out_file_init; } up(&dqopt->dqio_sem); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); set_enable_flags(dqopt, type); add_dquot_ref(sb, type); @@ -1470,7 +1470,7 @@ out_lock: inode->i_flags |= oldflags; up_write(&dqopt->dqptr_sem); } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out_fmt: put_quota_format(fmt); diff --git a/fs/drop_caches.c b/fs/drop_caches.c new file mode 100644 index 000000000000..4e4762389bdc --- /dev/null +++ b/fs/drop_caches.c @@ -0,0 +1,68 @@ +/* + * Implement the manual drop-all-pagecache function + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/writeback.h> +#include <linux/sysctl.h> +#include <linux/gfp.h> + +/* A global variable is a bit ugly, but it keeps the code simple */ +int sysctl_drop_caches; + +static void drop_pagecache_sb(struct super_block *sb) +{ + struct inode *inode; + + spin_lock(&inode_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (inode->i_state & (I_FREEING|I_WILL_FREE)) + continue; + invalidate_inode_pages(inode->i_mapping); + } + spin_unlock(&inode_lock); +} + +void drop_pagecache(void) +{ + struct super_block *sb; + + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root) + drop_pagecache_sb(sb); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + spin_unlock(&sb_lock); +} + +void drop_slab(void) +{ + int nr_objects; + + do { + nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); + } while (nr_objects > 10); +} + +int drop_caches_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + proc_dointvec_minmax(table, write, file, buffer, length, ppos); + if (write) { + if (sysctl_drop_caches & 1) + drop_pagecache(); + if (sysctl_drop_caches & 2) + drop_slab(); + } + return 0; +} diff --git a/fs/exec.c b/fs/exec.c index 22533cce0611..b5bcf1aae0ab 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -324,7 +324,7 @@ void install_arg_page(struct vm_area_struct *vma, lru_cache_add_active(page); set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte( page, vma->vm_page_prot)))); - page_add_anon_rmap(page, vma, address); + page_add_new_anon_rmap(page, vma, address); pte_unmap_unlock(pte, ptl); /* no need for flush_tlb */ @@ -632,10 +632,10 @@ static inline int de_thread(struct task_struct *tsk) * synchronize with any firing (by calling del_timer_sync) * before we can safely let the old group leader die. */ - sig->real_timer.data = (unsigned long)current; + sig->real_timer.data = current; spin_unlock_irq(lock); - if (del_timer_sync(&sig->real_timer)) - add_timer(&sig->real_timer); + if (hrtimer_cancel(&sig->real_timer)) + hrtimer_restart(&sig->real_timer); spin_lock_irq(lock); } while (atomic_read(&sig->count) > count) { @@ -760,7 +760,7 @@ no_thread_group: spin_lock(&oldsighand->siglock); spin_lock(&newsighand->siglock); - current->sighand = newsighand; + rcu_assign_pointer(current->sighand, newsighand); recalc_sigpending(); spin_unlock(&newsighand->siglock); @@ -768,7 +768,7 @@ no_thread_group: write_unlock_irq(&tasklist_lock); if (atomic_dec_and_test(&oldsighand->count)) - kmem_cache_free(sighand_cachep, oldsighand); + sighand_free(oldsighand); } BUG_ON(!thread_group_leader(current)); @@ -1462,6 +1462,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { current->signal->flags = SIGNAL_GROUP_EXIT; current->signal->group_exit_code = exit_code; + current->signal->group_stop_count = 0; retval = 0; } spin_unlock_irq(¤t->sighand->siglock); @@ -1477,7 +1478,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) * Clear any false indication of pending signals that might * be seen by the filesystem code called to write the core file. */ - current->signal->group_stop_count = 0; clear_thread_flag(TIF_SIGPENDING); if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) @@ -1505,7 +1505,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) goto close_fail; if (!file->f_op->write) goto close_fail; - if (do_truncate(file->f_dentry, 0, file) != 0) + if (do_truncate(file->f_dentry, 0, 0, file) != 0) goto close_fail; retval = binfmt->core_dump(signr, regs, file); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c49d6254379a..5bfe40085fbc 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -177,9 +177,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, struct dentry *ppd; struct dentry *npd; - down(&pd->d_inode->i_sem); + mutex_lock(&pd->d_inode->i_mutex); ppd = CALL(nops,get_parent)(pd); - up(&pd->d_inode->i_sem); + mutex_unlock(&pd->d_inode->i_mutex); if (IS_ERR(ppd)) { err = PTR_ERR(ppd); @@ -201,9 +201,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, break; } dprintk("find_exported_dentry: found name: %s\n", nbuf); - down(&ppd->d_inode->i_sem); + mutex_lock(&ppd->d_inode->i_mutex); npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); - up(&ppd->d_inode->i_sem); + mutex_unlock(&ppd->d_inode->i_mutex); if (IS_ERR(npd)) { err = PTR_ERR(npd); dprintk("find_exported_dentry: lookup failed: %d\n", err); @@ -242,9 +242,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, struct dentry *nresult; err = CALL(nops,get_name)(target_dir, nbuf, result); if (!err) { - down(&target_dir->d_inode->i_sem); + mutex_lock(&target_dir->d_inode->i_mutex); nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf)); - up(&target_dir->d_inode->i_sem); + mutex_unlock(&target_dir->d_inode->i_mutex); if (!IS_ERR(nresult)) { if (nresult->d_inode) { dput(result); diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 6af2f4130290..239133d01d91 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -149,7 +149,7 @@ ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl, } /* - * inode->i_sem: don't care + * inode->i_mutex: don't care */ static struct posix_acl * ext2_get_acl(struct inode *inode, int type) @@ -211,7 +211,7 @@ ext2_get_acl(struct inode *inode, int type) } /* - * inode->i_sem: down + * inode->i_mutex: down */ static int ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) @@ -301,8 +301,8 @@ ext2_permission(struct inode *inode, int mask, struct nameidata *nd) /* * Initialize the ACLs of a new inode. Called from ext2_new_inode. * - * dir->i_sem: down - * inode->i_sem: up (access to inode is still exclusive) + * dir->i_mutex: down + * inode->i_mutex: up (access to inode is still exclusive) */ int ext2_init_acl(struct inode *inode, struct inode *dir) @@ -361,7 +361,7 @@ cleanup: * for directories) are added. There are no more bits available in the * file mode. * - * inode->i_sem: down + * inode->i_mutex: down */ int ext2_acl_chmod(struct inode *inode) diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c index 20145b74623f..e9983a0dd396 100644 --- a/fs/ext2/bitmap.c +++ b/fs/ext2/bitmap.c @@ -7,8 +7,12 @@ * Universite Pierre et Marie Curie (Paris VI) */ +#ifdef EXT2FS_DEBUG + #include <linux/buffer_head.h> +#include "ext2.h" + static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) @@ -23,3 +27,6 @@ unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) nibblemap[(map->b_data[i] >> 4) & 0xf]; return (sum); } + +#endif /* EXT2FS_DEBUG */ + diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index e977f8566d14..00de0a7312a2 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -53,7 +53,7 @@ struct ext2_inode_info { #ifdef CONFIG_EXT2_FS_XATTR /* * Extended attributes can be read independently of the main file - * data. Taking i_sem even when reading would cause contention + * data. Taking i_mutex even when reading would cause contention * between readers of EAs and writers of regular file data, so * instead we synchronize on xattr_sem when reading or changing * EAs. diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 522fa70dd8ea..8d6819846fc9 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1152,7 +1152,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, struct buffer_head tmp_bh; struct buffer_head *bh; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; @@ -1189,7 +1189,7 @@ out: inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return len - towrite; } diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 0099462d4271..a2ca3107d475 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -325,7 +325,7 @@ cleanup: /* * Inode operation listxattr() * - * dentry->d_inode->i_sem: don't care + * dentry->d_inode->i_mutex: don't care */ ssize_t ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) @@ -389,10 +389,6 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", name_index, name, value, (long)value_len); - if (IS_RDONLY(inode)) - return -EROFS; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; if (value == NULL) value_len = 0; if (name == NULL) diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 52b30ee6a25f..2c072bfea23b 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -38,8 +38,6 @@ ext2_xattr_trusted_get(struct inode *inode, const char *name, { if (strcmp(name, "") == 0) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, buffer, size); } @@ -50,8 +48,6 @@ ext2_xattr_trusted_set(struct inode *inode, const char *name, { if (strcmp(name, "") == 0) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name, value, size, flags); } diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index 0c03ea131a94..f383e7c3a7b5 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -35,16 +35,10 @@ static int ext2_xattr_user_get(struct inode *inode, const char *name, void *buffer, size_t size) { - int error; - if (strcmp(name, "") == 0) return -EINVAL; if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - error = permission(inode, MAY_READ, NULL); - if (error) - return error; - return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); } @@ -52,18 +46,10 @@ static int ext2_xattr_user_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - int error; - if (strcmp(name, "") == 0) return -EINVAL; if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - if ( !S_ISREG(inode->i_mode) && - (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) - return -EPERM; - error = permission(inode, MAY_WRITE, NULL); - if (error) - return error; return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, value, size, flags); diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 3ac38266fc9e..9ed132c96034 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -152,7 +152,7 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl, /* * Inode operation get_posix_acl(). * - * inode->i_sem: don't care + * inode->i_mutex: don't care */ static struct posix_acl * ext3_get_acl(struct inode *inode, int type) @@ -216,7 +216,7 @@ ext3_get_acl(struct inode *inode, int type) /* * Set the access or default ACL of an inode. * - * inode->i_sem: down unless called from ext3_new_inode + * inode->i_mutex: down unless called from ext3_new_inode */ static int ext3_set_acl(handle_t *handle, struct inode *inode, int type, @@ -306,8 +306,8 @@ ext3_permission(struct inode *inode, int mask, struct nameidata *nd) /* * Initialize the ACLs of a new inode. Called from ext3_new_inode. * - * dir->i_sem: down - * inode->i_sem: up (access to inode is still exclusive) + * dir->i_mutex: down + * inode->i_mutex: up (access to inode is still exclusive) */ int ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) @@ -368,7 +368,7 @@ cleanup: * for directories) are added. There are no more bits available in the * file mode. * - * inode->i_sem: down + * inode->i_mutex: down */ int ext3_acl_chmod(struct inode *inode) diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index ae1148c24c53..c6393fb4c35a 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -20,8 +20,6 @@ #include <linux/quotaops.h> #include <linux/buffer_head.h> -#include "bitmap.h" - /* * balloc.c contains the blocks allocation and deallocation routines */ diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c index 5b4ba3e246e6..cb16b4c5d5df 100644 --- a/fs/ext3/bitmap.c +++ b/fs/ext3/bitmap.c @@ -7,8 +7,11 @@ * Universite Pierre et Marie Curie (Paris VI) */ +#ifdef EXT3FS_DEBUG + #include <linux/buffer_head.h> -#include "bitmap.h" + +#include "ext3_fs.h" static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; @@ -24,3 +27,6 @@ unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars) nibblemap[(map->b_data[i] >> 4) & 0xf]; return (sum); } + +#endif /* EXT3FS_DEBUG */ + diff --git a/fs/ext3/bitmap.h b/fs/ext3/bitmap.h deleted file mode 100644 index 6ee503a6bb4e..000000000000 --- a/fs/ext3/bitmap.h +++ /dev/null @@ -1,8 +0,0 @@ -/* linux/fs/ext3/bitmap.c - * - * Copyright (C) 2005 Simtec Electronics - * Ben Dooks <ben@simtec.co.uk> - * -*/ - -extern unsigned long ext3_count_free (struct buffer_head *, unsigned int ); diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 9e4a24376210..dc826464f313 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -26,7 +26,6 @@ #include <asm/byteorder.h> -#include "bitmap.h" #include "xattr.h" #include "acl.h" @@ -651,7 +650,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { ext3_warning(sb, __FUNCTION__, - "bad orphan ino %lu! e2fsck was run?\n", ino); + "bad orphan ino %lu! e2fsck was run?", ino); goto out; } @@ -660,7 +659,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) bitmap_bh = read_inode_bitmap(sb, block_group); if (!bitmap_bh) { ext3_warning(sb, __FUNCTION__, - "inode bitmap error for orphan %lu\n", ino); + "inode bitmap error for orphan %lu", ino); goto out; } @@ -672,7 +671,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) !(inode = iget(sb, ino)) || is_bad_inode(inode) || NEXT_ORPHAN(inode) > max_ino) { ext3_warning(sb, __FUNCTION__, - "bad orphan inode %lu! e2fsck was run?\n", ino); + "bad orphan inode %lu! e2fsck was run?", ino); printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", bit, (unsigned long long)bitmap_bh->b_blocknr, ext3_test_bit(bit, bitmap_bh->b_data)); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index b3c690a3b54a..af193a304ee5 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1476,7 +1476,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, if (levels && (dx_get_count(frames->entries) == dx_get_limit(frames->entries))) { ext3_warning(sb, __FUNCTION__, - "Directory index full!\n"); + "Directory index full!"); err = -ENOSPC; goto cleanup; } diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 6104ad310507..1041dab6de2f 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -31,7 +31,7 @@ static int verify_group_input(struct super_block *sb, unsigned start = le32_to_cpu(es->s_blocks_count); unsigned end = start + input->blocks_count; unsigned group = input->group; - unsigned itend = input->inode_table + EXT3_SB(sb)->s_itb_per_group; + unsigned itend = input->inode_table + sbi->s_itb_per_group; unsigned overhead = ext3_bg_has_super(sb, group) ? (1 + ext3_bg_num_gdb(sb, group) + le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; @@ -340,7 +340,7 @@ static int verify_reserved_gdb(struct super_block *sb, while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ ext3_warning(sb, __FUNCTION__, - "reserved GDT %ld missing grp %d (%ld)\n", + "reserved GDT %ld missing grp %d (%ld)", blk, grp, grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); return -EINVAL; @@ -393,7 +393,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, if (EXT3_SB(sb)->s_sbh->b_blocknr != le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) { ext3_warning(sb, __FUNCTION__, - "won't resize using backup superblock at %llu\n", + "won't resize using backup superblock at %llu", (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr); return -EPERM; } @@ -417,7 +417,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, data = (__u32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { ext3_warning(sb, __FUNCTION__, - "new group %u GDT block %lu not reserved\n", + "new group %u GDT block %lu not reserved", input->group, gdblock); err = -EINVAL; goto exit_dind; @@ -540,7 +540,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, for (res = 0; res < reserved_gdb; res++, blk++) { if (le32_to_cpu(*data) != blk) { ext3_warning(sb, __FUNCTION__, - "reserved block %lu not at offset %ld\n", + "reserved block %lu not at offset %ld", blk, (long)(data - (__u32 *)dind->b_data)); err = -EINVAL; goto exit_bh; @@ -683,7 +683,7 @@ exit_err: if (err) { ext3_warning(sb, __FUNCTION__, "can't update backup for group %d (err %d), " - "forcing fsck on next reboot\n", group, err); + "forcing fsck on next reboot", group, err); sbi->s_mount_state &= ~EXT3_VALID_FS; sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS); mark_buffer_dirty(sbi->s_sbh); @@ -722,7 +722,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { ext3_warning(sb, __FUNCTION__, - "Can't resize non-sparse filesystem further\n"); + "Can't resize non-sparse filesystem further"); return -EPERM; } @@ -730,13 +730,13 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) if (!EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_RESIZE_INODE)){ ext3_warning(sb, __FUNCTION__, - "No reserved GDT blocks, can't resize\n"); + "No reserved GDT blocks, can't resize"); return -EPERM; } inode = iget(sb, EXT3_RESIZE_INO); if (!inode || is_bad_inode(inode)) { ext3_warning(sb, __FUNCTION__, - "Error opening resize inode\n"); + "Error opening resize inode"); iput(inode); return -ENOENT; } @@ -764,9 +764,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) } lock_super(sb); - if (input->group != EXT3_SB(sb)->s_groups_count) { + if (input->group != sbi->s_groups_count) { ext3_warning(sb, __FUNCTION__, - "multiple resizers run on filesystem!\n"); + "multiple resizers run on filesystem!"); err = -EBUSY; goto exit_journal; } @@ -799,7 +799,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) * data. So we need to be careful to set all of the relevant * group descriptor data etc. *before* we enable the group. * - * The key field here is EXT3_SB(sb)->s_groups_count: as long as + * The key field here is sbi->s_groups_count: as long as * that retains its old value, nobody is going to access the new * group. * @@ -859,7 +859,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) smp_wmb(); /* Update the global fs size fields */ - EXT3_SB(sb)->s_groups_count++; + sbi->s_groups_count++; ext3_journal_dirty_metadata(handle, primary); @@ -874,7 +874,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) percpu_counter_mod(&sbi->s_freeinodes_counter, EXT3_INODES_PER_GROUP(sb)); - ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + ext3_journal_dirty_metadata(handle, sbi->s_sbh); sb->s_dirt = 1; exit_journal: @@ -937,7 +937,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, if (last == 0) { ext3_warning(sb, __FUNCTION__, - "need to use ext2online to resize further\n"); + "need to use ext2online to resize further"); return -EPERM; } @@ -973,7 +973,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, lock_super(sb); if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { ext3_warning(sb, __FUNCTION__, - "multiple resizers run on filesystem!\n"); + "multiple resizers run on filesystem!"); err = -EBUSY; goto exit_put; } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 4e6730622d90..56bf76586019 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -43,7 +43,8 @@ #include "acl.h" #include "namei.h" -static int ext3_load_journal(struct super_block *, struct ext3_super_block *); +static int ext3_load_journal(struct super_block *, struct ext3_super_block *, + unsigned long journal_devnum); static int ext3_create_journal(struct super_block *, struct ext3_super_block *, int); static void ext3_commit_super (struct super_block * sb, @@ -628,7 +629,7 @@ enum { Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, - Opt_commit, Opt_journal_update, Opt_journal_inum, + Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, @@ -666,6 +667,7 @@ static match_table_t tokens = { {Opt_commit, "commit=%u"}, {Opt_journal_update, "journal=update"}, {Opt_journal_inum, "journal=%u"}, + {Opt_journal_dev, "journal_dev=%u"}, {Opt_abort, "abort"}, {Opt_data_journal, "data=journal"}, {Opt_data_ordered, "data=ordered"}, @@ -705,8 +707,9 @@ static unsigned long get_sb_block(void **data) return sb_block; } -static int parse_options (char * options, struct super_block *sb, - unsigned long * inum, unsigned long *n_blocks_count, int is_remount) +static int parse_options (char *options, struct super_block *sb, + unsigned long *inum, unsigned long *journal_devnum, + unsigned long *n_blocks_count, int is_remount) { struct ext3_sb_info *sbi = EXT3_SB(sb); char * p; @@ -839,6 +842,16 @@ static int parse_options (char * options, struct super_block *sb, return 0; *inum = option; break; + case Opt_journal_dev: + if (is_remount) { + printk(KERN_ERR "EXT3-fs: cannot specify " + "journal on remount\n"); + return 0; + } + if (match_int(&args[0], &option)) + return 0; + *journal_devnum = option; + break; case Opt_noload: set_opt (sbi->s_mount_opt, NOLOAD); break; @@ -1331,6 +1344,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) unsigned long logic_sb_block; unsigned long offset = 0; unsigned long journal_inum = 0; + unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; int blocksize; @@ -1411,7 +1425,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, RESERVATION); - if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0)) + if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, + NULL, 0)) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -1622,7 +1637,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) */ if (!test_opt(sb, NOLOAD) && EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { - if (ext3_load_journal(sb, es)) + if (ext3_load_journal(sb, es, journal_devnum)) goto failed_mount2; } else if (journal_inum) { if (ext3_create_journal(sb, es, journal_inum)) @@ -1902,15 +1917,24 @@ out_bdev: return NULL; } -static int ext3_load_journal(struct super_block * sb, - struct ext3_super_block * es) +static int ext3_load_journal(struct super_block *sb, + struct ext3_super_block *es, + unsigned long journal_devnum) { journal_t *journal; int journal_inum = le32_to_cpu(es->s_journal_inum); - dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); + dev_t journal_dev; int err = 0; int really_read_only; + if (journal_devnum && + journal_devnum != le32_to_cpu(es->s_journal_dev)) { + printk(KERN_INFO "EXT3-fs: external journal device major/minor " + "numbers have changed\n"); + journal_dev = new_decode_dev(journal_devnum); + } else + journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); + really_read_only = bdev_read_only(sb->s_bdev); /* @@ -1969,6 +1993,16 @@ static int ext3_load_journal(struct super_block * sb, EXT3_SB(sb)->s_journal = journal; ext3_clear_journal_err(sb, es); + + if (journal_devnum && + journal_devnum != le32_to_cpu(es->s_journal_dev)) { + es->s_journal_dev = cpu_to_le32(journal_devnum); + sb->s_dirt = 1; + + /* Make sure we flush the recovery flag to disk. */ + ext3_commit_super(sb, es, 1); + } + return 0; } @@ -2116,7 +2150,7 @@ int ext3_force_commit(struct super_block *sb) static void ext3_write_super (struct super_block * sb) { - if (down_trylock(&sb->s_lock) == 0) + if (mutex_trylock(&sb->s_lock) != 0) BUG(); sb->s_dirt = 0; } @@ -2197,7 +2231,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) /* * Allow the "check" option to be passed as a remount option. */ - if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) { + if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { err = -EINVAL; goto restore_opts; } @@ -2567,7 +2601,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; @@ -2610,7 +2644,7 @@ out: inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME; ext3_mark_inode_dirty(handle, inode); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return len - towrite; } diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 430de9f63be3..e8d60bf6b7df 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -140,7 +140,7 @@ ext3_xattr_handler(int name_index) /* * Inode operation listxattr() * - * dentry->d_inode->i_sem: don't care + * dentry->d_inode->i_mutex: don't care */ ssize_t ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) @@ -946,10 +946,6 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, }; int error; - if (IS_RDONLY(inode)) - return -EROFS; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; if (!name) return -EINVAL; if (strlen(name) > 255) diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c index f68bfd1cf519..7c693c94f14d 100644 --- a/fs/ext3/xattr_trusted.c +++ b/fs/ext3/xattr_trusted.c @@ -39,8 +39,6 @@ ext3_xattr_trusted_get(struct inode *inode, const char *name, { if (strcmp(name, "") == 0) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name, buffer, size); } @@ -51,8 +49,6 @@ ext3_xattr_trusted_set(struct inode *inode, const char *name, { if (strcmp(name, "") == 0) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name, value, size, flags); } diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c index e907cae7a07c..a85a0a17c4fd 100644 --- a/fs/ext3/xattr_user.c +++ b/fs/ext3/xattr_user.c @@ -37,16 +37,10 @@ static int ext3_xattr_user_get(struct inode *inode, const char *name, void *buffer, size_t size) { - int error; - if (strcmp(name, "") == 0) return -EINVAL; if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - error = permission(inode, MAY_READ, NULL); - if (error) - return error; - return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size); } @@ -54,19 +48,10 @@ static int ext3_xattr_user_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - int error; - if (strcmp(name, "") == 0) return -EINVAL; if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - if ( !S_ISREG(inode->i_mode) && - (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) - return -EPERM; - error = permission(inode, MAY_WRITE, NULL); - if (error) - return error; - return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name, value, size, flags); } diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 77c24fcf712a..1acc941245fb 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -295,7 +295,8 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) return dclus; } -int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys) +int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); @@ -303,9 +304,12 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys) int cluster, offset; *phys = 0; + *mapped_blocks = 0; if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) { - if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) + if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) { *phys = sector + sbi->dir_start; + *mapped_blocks = 1; + } return 0; } last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) @@ -318,7 +322,11 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys) cluster = fat_bmap_cluster(inode, cluster); if (cluster < 0) return cluster; - else if (cluster) + else if (cluster) { *phys = fat_clus_to_blknr(sbi, cluster) + offset; + *mapped_blocks = sbi->sec_per_clus - offset; + if (*mapped_blocks > last_block - sector) + *mapped_blocks = last_block - sector; + } return 0; } diff --git a/fs/fat/dir.c b/fs/fat/dir.c index ba824964b9bb..db0de5c621c7 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -45,8 +45,8 @@ static inline void fat_dir_readahead(struct inode *dir, sector_t iblock, if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO)) return; - bh = sb_getblk(sb, phys); - if (bh && !buffer_uptodate(bh)) { + bh = sb_find_get_block(sb, phys); + if (bh == NULL || !buffer_uptodate(bh)) { for (sec = 0; sec < sbi->sec_per_clus; sec++) sb_breadahead(sb, phys + sec); } @@ -68,8 +68,8 @@ static int fat__get_entry(struct inode *dir, loff_t *pos, { struct super_block *sb = dir->i_sb; sector_t phys, iblock; - int offset; - int err; + unsigned long mapped_blocks; + int err, offset; next: if (*bh) @@ -77,7 +77,7 @@ next: *bh = NULL; iblock = *pos >> sb->s_blocksize_bits; - err = fat_bmap(dir, iblock, &phys); + err = fat_bmap(dir, iblock, &phys, &mapped_blocks); if (err || !phys) return -1; /* beyond EOF or error */ @@ -418,7 +418,7 @@ EODir: return err; } -EXPORT_SYMBOL(fat_search_long); +EXPORT_SYMBOL_GPL(fat_search_long); struct fat_ioctl_filldir_callback { struct dirent __user *dirent; @@ -729,13 +729,13 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp, buf.dirent = d1; buf.result = 0; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); ret = -ENOENT; if (!IS_DEADDIR(inode)) { ret = __fat_readdir(inode, filp, &buf, fat_ioctl_filldir, short_only, both); } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (ret >= 0) ret = buf.result; return ret; @@ -780,7 +780,7 @@ int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, return -ENOENT; } -EXPORT_SYMBOL(fat_get_dotdot_entry); +EXPORT_SYMBOL_GPL(fat_get_dotdot_entry); /* See if directory is empty */ int fat_dir_empty(struct inode *dir) @@ -803,7 +803,7 @@ int fat_dir_empty(struct inode *dir) return result; } -EXPORT_SYMBOL(fat_dir_empty); +EXPORT_SYMBOL_GPL(fat_dir_empty); /* * fat_subdirs counts the number of sub-directories of dir. It can be run @@ -849,7 +849,7 @@ int fat_scan(struct inode *dir, const unsigned char *name, return -ENOENT; } -EXPORT_SYMBOL(fat_scan); +EXPORT_SYMBOL_GPL(fat_scan); static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) { @@ -936,7 +936,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) return 0; } -EXPORT_SYMBOL(fat_remove_entries); +EXPORT_SYMBOL_GPL(fat_remove_entries); static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, struct buffer_head **bhs, int nr_bhs) @@ -1048,7 +1048,7 @@ error: return err; } -EXPORT_SYMBOL(fat_alloc_new_dir); +EXPORT_SYMBOL_GPL(fat_alloc_new_dir); static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, int *nr_cluster, struct msdos_dir_entry **de, @@ -1264,4 +1264,4 @@ error_remove: return err; } -EXPORT_SYMBOL(fat_add_entries); +EXPORT_SYMBOL_GPL(fat_add_entries); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 4164cd54c4d1..a1a9e0451217 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -476,6 +476,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster) sbi->prev_free = entry; if (sbi->free_clusters != -1) sbi->free_clusters--; + sb->s_dirt = 1; cluster[idx_clus] = entry; idx_clus++; @@ -496,6 +497,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster) /* Couldn't allocate the free entries */ sbi->free_clusters = 0; + sb->s_dirt = 1; err = -ENOSPC; out: @@ -509,7 +511,6 @@ out: } for (i = 0; i < nr_bhs; i++) brelse(bhs[i]); - fat_clusters_flush(sb); if (err && idx_clus) fat_free_clusters(inode, cluster[0]); @@ -542,8 +543,10 @@ int fat_free_clusters(struct inode *inode, int cluster) } ops->ent_put(&fatent, FAT_ENT_FREE); - if (sbi->free_clusters != -1) + if (sbi->free_clusters != -1) { sbi->free_clusters++; + sb->s_dirt = 1; + } if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) { if (sb->s_flags & MS_SYNCHRONOUS) { @@ -578,7 +581,7 @@ error: return err; } -EXPORT_SYMBOL(fat_free_clusters); +EXPORT_SYMBOL_GPL(fat_free_clusters); int fat_count_free_clusters(struct super_block *sb) { @@ -605,6 +608,7 @@ int fat_count_free_clusters(struct super_block *sb) } while (fat_ent_next(sbi, &fatent)); } sbi->free_clusters = free; + sb->s_dirt = 1; fatent_brelse(&fatent); out: unlock_fat(sbi); diff --git a/fs/fat/file.c b/fs/fat/file.c index 7134403d5be2..d30876cf35f5 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -11,6 +11,7 @@ #include <linux/msdos_fs.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> +#include <linux/writeback.h> int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) @@ -40,7 +41,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, if (err) return err; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (IS_RDONLY(inode)) { err = -EROFS; @@ -102,7 +103,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; mark_inode_dirty(inode); up: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return err; } default: @@ -124,6 +125,24 @@ struct file_operations fat_file_operations = { .sendfile = generic_file_sendfile, }; +static int fat_cont_expand(struct inode *inode, loff_t size) +{ + struct address_space *mapping = inode->i_mapping; + loff_t start = inode->i_size, count = size - inode->i_size; + int err; + + err = generic_cont_expand_simple(inode, size); + if (err) + goto out; + + inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + if (IS_SYNC(inode)) + err = sync_page_range_nolock(inode, mapping, start, count); +out: + return err; +} + int fat_notify_change(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); @@ -132,11 +151,17 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr) lock_kernel(); - /* FAT cannot truncate to a longer file */ + /* + * Expand the file. Since inode_setattr() updates ->i_size + * before calling the ->truncate(), but FAT needs to fill the + * hole before it. + */ if (attr->ia_valid & ATTR_SIZE) { if (attr->ia_size > inode->i_size) { - error = -EPERM; - goto out; + error = fat_cont_expand(inode, attr->ia_size); + if (error || attr->ia_valid == ATTR_SIZE) + goto out; + attr->ia_valid &= ~ATTR_SIZE; } } @@ -173,7 +198,7 @@ out: return error; } -EXPORT_SYMBOL(fat_notify_change); +EXPORT_SYMBOL_GPL(fat_notify_change); /* Free all clusters after the skip'th cluster. */ static int fat_free(struct inode *inode, int skip) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index a0f9b9fe1307..e7f4aa7fc686 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -18,10 +18,12 @@ #include <linux/seq_file.h> #include <linux/msdos_fs.h> #include <linux/pagemap.h> +#include <linux/mpage.h> #include <linux/buffer_head.h> #include <linux/mount.h> #include <linux/vfs.h> #include <linux/parser.h> +#include <linux/uio.h> #include <asm/unaligned.h> #ifndef CONFIG_FAT_DEFAULT_IOCHARSET @@ -48,51 +50,97 @@ static int fat_add_cluster(struct inode *inode) return err; } -static int fat_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) +static int __fat_get_blocks(struct inode *inode, sector_t iblock, + unsigned long *max_blocks, + struct buffer_head *bh_result, int create) { struct super_block *sb = inode->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); sector_t phys; - int err; + unsigned long mapped_blocks; + int err, offset; - err = fat_bmap(inode, iblock, &phys); + err = fat_bmap(inode, iblock, &phys, &mapped_blocks); if (err) return err; if (phys) { map_bh(bh_result, sb, phys); + *max_blocks = min(mapped_blocks, *max_blocks); return 0; } if (!create) return 0; + if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) { fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)", MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private); return -EIO; } - if (!((unsigned long)iblock & (MSDOS_SB(sb)->sec_per_clus - 1))) { + + offset = (unsigned long)iblock & (sbi->sec_per_clus - 1); + if (!offset) { + /* TODO: multiple cluster allocation would be desirable. */ err = fat_add_cluster(inode); if (err) return err; } - MSDOS_I(inode)->mmu_private += sb->s_blocksize; - err = fat_bmap(inode, iblock, &phys); + /* available blocks on this cluster */ + mapped_blocks = sbi->sec_per_clus - offset; + + *max_blocks = min(mapped_blocks, *max_blocks); + MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits; + + err = fat_bmap(inode, iblock, &phys, &mapped_blocks); if (err) return err; - if (!phys) - BUG(); + BUG_ON(!phys); + BUG_ON(*max_blocks != mapped_blocks); set_buffer_new(bh_result); map_bh(bh_result, sb, phys); return 0; } +static int fat_get_blocks(struct inode *inode, sector_t iblock, + unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + struct super_block *sb = inode->i_sb; + int err; + + err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create); + if (err) + return err; + bh_result->b_size = max_blocks << sb->s_blocksize_bits; + return 0; +} + +static int fat_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + unsigned long max_blocks = 1; + return __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create); +} + static int fat_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, fat_get_block, wbc); } +static int fat_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + return mpage_writepages(mapping, wbc, fat_get_block); +} + static int fat_readpage(struct file *file, struct page *page) { - return block_read_full_page(page, fat_get_block); + return mpage_readpage(page, fat_get_block); +} + +static int fat_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, fat_get_block); } static int fat_prepare_write(struct file *file, struct page *page, @@ -115,6 +163,34 @@ static int fat_commit_write(struct file *file, struct page *page, return err; } +static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + + if (rw == WRITE) { + /* + * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(), + * so we need to update the ->mmu_private to block boundary. + * + * But we must fill the remaining area or hole by nul for + * updating ->mmu_private. + */ + loff_t size = offset + iov_length(iov, nr_segs); + if (MSDOS_I(inode)->mmu_private < size) + return -EINVAL; + } + + /* + * FAT need to use the DIO_LOCKING for avoiding the race + * condition of fat_get_block() and ->truncate(). + */ + return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, fat_get_blocks, NULL); +} + static sector_t _fat_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping, block, fat_get_block); @@ -122,10 +198,13 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block) static struct address_space_operations fat_aops = { .readpage = fat_readpage, + .readpages = fat_readpages, .writepage = fat_writepage, + .writepages = fat_writepages, .sync_page = block_sync_page, .prepare_write = fat_prepare_write, .commit_write = fat_commit_write, + .direct_IO = fat_direct_IO, .bmap = _fat_bmap }; @@ -182,7 +261,7 @@ void fat_attach(struct inode *inode, loff_t i_pos) spin_unlock(&sbi->inode_hash_lock); } -EXPORT_SYMBOL(fat_attach); +EXPORT_SYMBOL_GPL(fat_attach); void fat_detach(struct inode *inode) { @@ -193,7 +272,7 @@ void fat_detach(struct inode *inode) spin_unlock(&sbi->inode_hash_lock); } -EXPORT_SYMBOL(fat_detach); +EXPORT_SYMBOL_GPL(fat_detach); struct inode *fat_iget(struct super_block *sb, loff_t i_pos) { @@ -347,7 +426,7 @@ out: return inode; } -EXPORT_SYMBOL(fat_build_inode); +EXPORT_SYMBOL_GPL(fat_build_inode); static void fat_delete_inode(struct inode *inode) { @@ -374,12 +453,17 @@ static void fat_clear_inode(struct inode *inode) unlock_kernel(); } -static void fat_put_super(struct super_block *sb) +static void fat_write_super(struct super_block *sb) { - struct msdos_sb_info *sbi = MSDOS_SB(sb); + sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) fat_clusters_flush(sb); +} + +static void fat_put_super(struct super_block *sb) +{ + struct msdos_sb_info *sbi = MSDOS_SB(sb); if (sbi->nls_disk) { unload_nls(sbi->nls_disk); @@ -537,7 +621,7 @@ int fat_sync_inode(struct inode *inode) return fat_write_inode(inode, 1); } -EXPORT_SYMBOL(fat_sync_inode); +EXPORT_SYMBOL_GPL(fat_sync_inode); static int fat_show_options(struct seq_file *m, struct vfsmount *mnt); static struct super_operations fat_sops = { @@ -546,6 +630,7 @@ static struct super_operations fat_sops = { .write_inode = fat_write_inode, .delete_inode = fat_delete_inode, .put_super = fat_put_super, + .write_super = fat_write_super, .statfs = fat_statfs, .clear_inode = fat_clear_inode, .remount_fs = fat_remount, @@ -1347,7 +1432,7 @@ out_fail: return error; } -EXPORT_SYMBOL(fat_fill_super); +EXPORT_SYMBOL_GPL(fat_fill_super); int __init fat_cache_init(void); void fat_cache_destroy(void); diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 2a0df2122f5d..32fb0a3f1da4 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -33,7 +33,7 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...) } } -EXPORT_SYMBOL(fat_fs_panic); +EXPORT_SYMBOL_GPL(fat_fs_panic); /* Flushes the number of free clusters on FAT32 */ /* XXX: Need to write one per FSINFO block. Currently only writes 1 */ @@ -67,8 +67,6 @@ void fat_clusters_flush(struct super_block *sb) if (sbi->prev_free != -1) fsinfo->next_cluster = cpu_to_le32(sbi->prev_free); mark_buffer_dirty(bh); - if (sb->s_flags & MS_SYNCHRONOUS) - sync_dirty_buffer(bh); } brelse(bh); } @@ -194,7 +192,7 @@ void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date) *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9)); } -EXPORT_SYMBOL(fat_date_unix2dos); +EXPORT_SYMBOL_GPL(fat_date_unix2dos); int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { @@ -222,4 +220,4 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) return err; } -EXPORT_SYMBOL(fat_sync_bhs); +EXPORT_SYMBOL_GPL(fat_sync_bhs); diff --git a/fs/fcntl.c b/fs/fcntl.c index 863b46e0d78a..9903bde475f2 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -457,11 +457,11 @@ static void send_sigio_to_task(struct task_struct *p, else si.si_band = band_table[reason - POLL_IN]; si.si_fd = fd; - if (!send_group_sig_info(fown->signum, &si, p)) + if (!group_send_sig_info(fown->signum, &si, p)) break; /* fall-through: fall back on the old plain SIGIO signal */ case 0: - send_group_sig_info(SIGIO, SEND_SIG_PRIV, p); + group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); } } @@ -495,7 +495,7 @@ static void send_sigurg_to_task(struct task_struct *p, struct fown_struct *fown) { if (sigio_perm(p, fown, SIGURG)) - send_group_sig_info(SIGURG, SEND_SIG_PRIV, p); + group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); } int send_sigurg(struct fown_struct *fown) diff --git a/fs/fifo.c b/fs/fifo.c index 5455916241f0..923371b753ab 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -35,7 +35,7 @@ static int fifo_open(struct inode *inode, struct file *filp) int ret; ret = -ERESTARTSYS; - if (down_interruptible(PIPE_SEM(*inode))) + if (mutex_lock_interruptible(PIPE_MUTEX(*inode))) goto err_nolock_nocleanup; if (!inode->i_pipe) { @@ -119,7 +119,7 @@ static int fifo_open(struct inode *inode, struct file *filp) } /* Ok! */ - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; err_rd: @@ -139,7 +139,7 @@ err: free_pipe_info(inode); err_nocleanup: - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); err_nolock_nocleanup: return ret; diff --git a/fs/file_table.c b/fs/file_table.c index c3a5e2fd663b..6142250104a6 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -117,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp); void fastcall fput(struct file *file) { - if (rcuref_dec_and_test(&file->f_count)) + if (atomic_dec_and_test(&file->f_count)) __fput(file); } @@ -166,7 +166,7 @@ struct file fastcall *fget(unsigned int fd) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (!rcuref_inc_lf(&file->f_count)) { + if (!atomic_inc_not_zero(&file->f_count)) { /* File object ref couldn't be taken */ rcu_read_unlock(); return NULL; @@ -198,7 +198,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (rcuref_inc_lf(&file->f_count)) + if (atomic_inc_not_zero(&file->f_count)) *fput_needed = 1; else /* Didn't get the reference, someone's freed */ @@ -213,7 +213,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed) void put_filp(struct file *file) { - if (rcuref_dec_and_test(&file->f_count)) { + if (atomic_dec_and_test(&file->f_count)) { security_file_free(file); file_kill(file); file_free(file); diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index d0401dc68d41..6f5df1700e95 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -99,8 +99,8 @@ static int vxfs_immed_readpage(struct file *fp, struct page *pp) { struct vxfs_inode_info *vip = VXFS_INO(pp->mapping->host); - u_int64_t offset = pp->index << PAGE_CACHE_SHIFT; - caddr_t kaddr; + u_int64_t offset = (u_int64_t)pp->index << PAGE_CACHE_SHIFT; + caddr_t kaddr; kaddr = kmap(pp); memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8f873e621f41..e08ab4702d97 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -148,6 +148,26 @@ void fuse_release_background(struct fuse_req *req) spin_unlock(&fuse_lock); } +static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) +{ + int i; + struct fuse_init_out *arg = &req->misc.init_out; + + if (arg->major != FUSE_KERNEL_VERSION) + fc->conn_error = 1; + else { + fc->minor = arg->minor; + fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; + } + + /* After INIT reply is received other requests can go + out. So do (FUSE_MAX_OUTSTANDING - 1) number of + up()s on outstanding_sem. The last up() is done in + fuse_putback_request() */ + for (i = 1; i < FUSE_MAX_OUTSTANDING; i++) + up(&fc->outstanding_sem); +} + /* * This function is called when a request is finished. Either a reply * has arrived or it was interrupted (and not yet sent) or some error @@ -172,19 +192,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) up_read(&fc->sbput_sem); } wake_up(&req->waitq); - if (req->in.h.opcode == FUSE_INIT) { - int i; - - if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION) - fc->conn_error = 1; - - /* After INIT reply is received other requests can go - out. So do (FUSE_MAX_OUTSTANDING - 1) number of - up()s on outstanding_sem. The last up() is done in - fuse_putback_request() */ - for (i = 1; i < FUSE_MAX_OUTSTANDING; i++) - up(&fc->outstanding_sem); - } else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) { + if (req->in.h.opcode == FUSE_INIT) + process_init_reply(fc, req); + else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) { /* Special case for failed iget in CREATE */ u64 nodeid = req->in.h.nodeid; __fuse_get_request(req); @@ -357,7 +367,7 @@ void fuse_send_init(struct fuse_conn *fc) /* This is called from fuse_read_super() so there's guaranteed to be a request available */ struct fuse_req *req = do_get_request(fc); - struct fuse_init_in_out *arg = &req->misc.init_in_out; + struct fuse_init_in *arg = &req->misc.init_in; arg->major = FUSE_KERNEL_VERSION; arg->minor = FUSE_KERNEL_MINOR_VERSION; req->in.h.opcode = FUSE_INIT; @@ -365,8 +375,12 @@ void fuse_send_init(struct fuse_conn *fc) req->in.args[0].size = sizeof(*arg); req->in.args[0].value = arg; req->out.numargs = 1; - req->out.args[0].size = sizeof(*arg); - req->out.args[0].value = arg; + /* Variable length arguement used for backward compatibility + with interface version < 7.5. Rest of init_out is zeroed + by do_get_request(), so a short reply is not a problem */ + req->out.argvar = 1; + req->out.args[0].size = sizeof(struct fuse_init_out); + req->out.args[0].value = &req->misc.init_out; request_send_background(fc, req); } @@ -615,6 +629,7 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, struct fuse_copy_state cs; unsigned reqsize; + restart: spin_lock(&fuse_lock); fc = file->private_data; err = -EPERM; @@ -630,20 +645,25 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, req = list_entry(fc->pending.next, struct fuse_req, list); list_del_init(&req->list); - spin_unlock(&fuse_lock); in = &req->in; - reqsize = req->in.h.len; - fuse_copy_init(&cs, 1, req, iov, nr_segs); - err = -EINVAL; - if (iov_length(iov, nr_segs) >= reqsize) { - err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); - if (!err) - err = fuse_copy_args(&cs, in->numargs, in->argpages, - (struct fuse_arg *) in->args, 0); + reqsize = in->h.len; + /* If request is too large, reply with an error and restart the read */ + if (iov_length(iov, nr_segs) < reqsize) { + req->out.h.error = -EIO; + /* SETXATTR is special, since it may contain too large data */ + if (in->h.opcode == FUSE_SETXATTR) + req->out.h.error = -E2BIG; + request_end(fc, req); + goto restart; } + spin_unlock(&fuse_lock); + fuse_copy_init(&cs, 1, req, iov, nr_segs); + err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); + if (!err) + err = fuse_copy_args(&cs, in->numargs, in->argpages, + (struct fuse_arg *) in->args, 0); fuse_copy_finish(&cs); - spin_lock(&fuse_lock); req->locked = 0; if (!err && req->interrupted) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 51f5da652771..417bcee466f6 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -13,8 +13,16 @@ #include <linux/gfp.h> #include <linux/sched.h> #include <linux/namei.h> -#include <linux/mount.h> +/* + * FUSE caches dentries and attributes with separate timeout. The + * time in jiffies until the dentry/attributes are valid is stored in + * dentry->d_time and fuse_inode->i_time respectively. + */ + +/* + * Calculate the time in jiffies until a dentry/attributes are valid + */ static inline unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec) { @@ -22,6 +30,50 @@ static inline unsigned long time_to_jiffies(unsigned long sec, return jiffies + timespec_to_jiffies(&ts); } +/* + * Set dentry and possibly attribute timeouts from the lookup/mk* + * replies + */ +static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) +{ + entry->d_time = time_to_jiffies(o->entry_valid, o->entry_valid_nsec); + if (entry->d_inode) + get_fuse_inode(entry->d_inode)->i_time = + time_to_jiffies(o->attr_valid, o->attr_valid_nsec); +} + +/* + * Mark the attributes as stale, so that at the next call to + * ->getattr() they will be fetched from userspace + */ +void fuse_invalidate_attr(struct inode *inode) +{ + get_fuse_inode(inode)->i_time = jiffies - 1; +} + +/* + * Just mark the entry as stale, so that a next attempt to look it up + * will result in a new lookup call to userspace + * + * This is called when a dentry is about to become negative and the + * timeout is unknown (unlink, rmdir, rename and in some cases + * lookup) + */ +static void fuse_invalidate_entry_cache(struct dentry *entry) +{ + entry->d_time = jiffies - 1; +} + +/* + * Same as fuse_invalidate_entry_cache(), but also try to remove the + * dentry from the hash + */ +static void fuse_invalidate_entry(struct dentry *entry) +{ + d_invalidate(entry); + fuse_invalidate_entry_cache(entry); +} + static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, struct dentry *entry, struct fuse_entry_out *outarg) @@ -37,17 +89,34 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, req->out.args[0].value = outarg; } +/* + * Check whether the dentry is still valid + * + * If the entry validity timeout has expired and the dentry is + * positive, try to redo the lookup. If the lookup results in a + * different inode, then let the VFS invalidate the dentry and redo + * the lookup once more. If the lookup results in the same inode, + * then refresh the attributes, timeouts and mark the dentry valid. + */ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) { - if (!entry->d_inode || is_bad_inode(entry->d_inode)) + struct inode *inode = entry->d_inode; + + if (inode && is_bad_inode(inode)) return 0; else if (time_after(jiffies, entry->d_time)) { int err; struct fuse_entry_out outarg; - struct inode *inode = entry->d_inode; - struct fuse_inode *fi = get_fuse_inode(inode); - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_request(fc); + struct fuse_conn *fc; + struct fuse_req *req; + + /* Doesn't hurt to "reset" the validity timeout */ + fuse_invalidate_entry_cache(entry); + if (!inode) + return 0; + + fc = get_fuse_conn(inode); + req = fuse_get_request(fc); if (!req) return 0; @@ -55,6 +124,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) request_send(fc, req); err = req->out.h.error; if (!err) { + struct fuse_inode *fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode)) { fuse_send_forget(fc, req, outarg.nodeid, 1); return 0; @@ -66,18 +136,18 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) return 0; fuse_change_attributes(inode, &outarg.attr); - entry->d_time = time_to_jiffies(outarg.entry_valid, - outarg.entry_valid_nsec); - fi->i_time = time_to_jiffies(outarg.attr_valid, - outarg.attr_valid_nsec); + fuse_change_timeout(entry, &outarg); } return 1; } +/* + * Check if there's already a hashed alias of this directory inode. + * If yes, then lookup and mkdir must not create a new alias. + */ static int dir_alias(struct inode *inode) { if (S_ISDIR(inode->i_mode)) { - /* Don't allow creating an alias to a directory */ struct dentry *alias = d_find_alias(inode); if (alias) { dput(alias); @@ -96,8 +166,14 @@ static struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, }; -static int fuse_lookup_iget(struct inode *dir, struct dentry *entry, - struct inode **inodep) +static inline int valid_mode(int m) +{ + return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || + S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); +} + +static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, + struct nameidata *nd) { int err; struct fuse_entry_out outarg; @@ -106,53 +182,49 @@ static int fuse_lookup_iget(struct inode *dir, struct dentry *entry, struct fuse_req *req; if (entry->d_name.len > FUSE_NAME_MAX) - return -ENAMETOOLONG; + return ERR_PTR(-ENAMETOOLONG); req = fuse_get_request(fc); if (!req) - return -EINTR; + return ERR_PTR(-EINTR); fuse_lookup_init(req, dir, entry, &outarg); request_send(fc, req); err = req->out.h.error; - if (!err && invalid_nodeid(outarg.nodeid)) + if (!err && ((outarg.nodeid && invalid_nodeid(outarg.nodeid)) || + !valid_mode(outarg.attr.mode))) err = -EIO; - if (!err) { + if (!err && outarg.nodeid) { inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr); if (!inode) { fuse_send_forget(fc, req, outarg.nodeid, 1); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } } fuse_put_request(fc, req); if (err && err != -ENOENT) - return err; + return ERR_PTR(err); - if (inode) { - struct fuse_inode *fi = get_fuse_inode(inode); - entry->d_time = time_to_jiffies(outarg.entry_valid, - outarg.entry_valid_nsec); - fi->i_time = time_to_jiffies(outarg.attr_valid, - outarg.attr_valid_nsec); + if (inode && dir_alias(inode)) { + iput(inode); + return ERR_PTR(-EIO); } - + d_add(entry, inode); entry->d_op = &fuse_dentry_operations; - *inodep = inode; - return 0; -} - -void fuse_invalidate_attr(struct inode *inode) -{ - get_fuse_inode(inode)->i_time = jiffies - 1; -} - -static void fuse_invalidate_entry(struct dentry *entry) -{ - d_invalidate(entry); - entry->d_time = jiffies - 1; + if (!err) + fuse_change_timeout(entry, &outarg); + else + fuse_invalidate_entry_cache(entry); + return NULL; } +/* + * Atomic create+open operation + * + * If the filesystem doesn't support this, then fall back to separate + * 'mknod' + 'open' requests. + */ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct nameidata *nd) { @@ -163,7 +235,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct fuse_open_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; - struct fuse_inode *fi; struct fuse_file *ff; struct file *file; int flags = nd->intent.open.flags - 1; @@ -172,10 +243,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (fc->no_create) goto out; - err = -ENAMETOOLONG; - if (entry->d_name.len > FUSE_NAME_MAX) - goto out; - err = -EINTR; req = fuse_get_request(fc); if (!req) @@ -220,17 +287,15 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); ff->fh = outopen.fh; + /* Special release, with inode = NULL, this will + trigger a 'forget' request when the release is + complete */ fuse_send_release(fc, ff, outentry.nodeid, NULL, flags, 0); goto out_put_request; } fuse_put_request(fc, req); - entry->d_time = time_to_jiffies(outentry.entry_valid, - outentry.entry_valid_nsec); - fi = get_fuse_inode(inode); - fi->i_time = time_to_jiffies(outentry.attr_valid, - outentry.attr_valid_nsec); - d_instantiate(entry, inode); + fuse_change_timeout(entry, &outentry); file = lookup_instantiate_filp(nd, entry, generic_file_open); if (IS_ERR(file)) { ff->fh = outopen.fh; @@ -248,13 +313,15 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, return err; } +/* + * Code shared between mknod, mkdir, symlink and link + */ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, struct inode *dir, struct dentry *entry, int mode) { struct fuse_entry_out outarg; struct inode *inode; - struct fuse_inode *fi; int err; req->in.h.nodeid = get_node_id(dir); @@ -268,10 +335,13 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, fuse_put_request(fc, req); return err; } - if (invalid_nodeid(outarg.nodeid)) { - fuse_put_request(fc, req); - return -EIO; - } + err = -EIO; + if (invalid_nodeid(outarg.nodeid)) + goto out_put_request; + + if ((outarg.attr.mode ^ mode) & S_IFMT) + goto out_put_request; + inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr); if (!inode) { @@ -280,22 +350,19 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, } fuse_put_request(fc, req); - /* Don't allow userspace to do really stupid things... */ - if (((inode->i_mode ^ mode) & S_IFMT) || dir_alias(inode)) { + if (dir_alias(inode)) { iput(inode); return -EIO; } - entry->d_time = time_to_jiffies(outarg.entry_valid, - outarg.entry_valid_nsec); - - fi = get_fuse_inode(inode); - fi->i_time = time_to_jiffies(outarg.attr_valid, - outarg.attr_valid_nsec); - d_instantiate(entry, inode); + fuse_change_timeout(entry, &outarg); fuse_invalidate_attr(dir); return 0; + + out_put_request: + fuse_put_request(fc, req); + return err; } static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, @@ -355,12 +422,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, { struct fuse_conn *fc = get_fuse_conn(dir); unsigned len = strlen(link) + 1; - struct fuse_req *req; - - if (len > FUSE_SYMLINK_MAX) - return -ENAMETOOLONG; - - req = fuse_get_request(fc); + struct fuse_req *req = fuse_get_request(fc); if (!req) return -EINTR; @@ -399,6 +461,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) inode->i_nlink = 0; fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); + fuse_invalidate_entry_cache(entry); } else if (err == -EINTR) fuse_invalidate_entry(entry); return err; @@ -424,6 +487,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) if (!err) { entry->d_inode->i_nlink = 0; fuse_invalidate_attr(dir); + fuse_invalidate_entry_cache(entry); } else if (err == -EINTR) fuse_invalidate_entry(entry); return err; @@ -459,6 +523,10 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, fuse_invalidate_attr(olddir); if (olddir != newdir) fuse_invalidate_attr(newdir); + + /* newent will end up negative */ + if (newent->d_inode) + fuse_invalidate_entry_cache(newent); } else if (err == -EINTR) { /* If request was interrupted, DEITY only knows if the rename actually took place. If the invalidation @@ -566,6 +634,15 @@ static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) return 0; } +/* + * Check whether the inode attributes are still valid + * + * If the attribute validity timeout has expired, then fetch the fresh + * attributes with a 'getattr' request + * + * I'm not sure why cached attributes are never returned for the root + * inode, this is probably being too cautious. + */ static int fuse_revalidate(struct dentry *entry) { struct inode *inode = entry->d_inode; @@ -613,6 +690,19 @@ static int fuse_access(struct inode *inode, int mask) return err; } +/* + * Check permission. The two basic access models of FUSE are: + * + * 1) Local access checking ('default_permissions' mount option) based + * on file mode. This is the plain old disk filesystem permission + * modell. + * + * 2) "Remote" access checking, where server is responsible for + * checking permission in each inode operation. An exception to this + * is if ->permission() was invoked from sys_access() in which case an + * access request is sent. Execute permission is still checked + * locally based on file mode. + */ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) { struct fuse_conn *fc = get_fuse_conn(inode); @@ -631,14 +721,10 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) err = generic_permission(inode, mask, NULL); } - /* FIXME: Need some mechanism to revoke permissions: - currently if the filesystem suddenly changes the - file mode, we will not be informed about it, and - continue to allow access to the file/directory. - - This is actually not so grave, since the user can - simply keep access to the file/directory anyway by - keeping it open... */ + /* Note: the opposite of the above test does not + exist. So if permissions are revoked this won't be + noticed immediately, only after the attribute + timeout has expired */ return err; } else { @@ -691,7 +777,12 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) struct page *page; struct inode *inode = file->f_dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_request(fc); + struct fuse_req *req; + + if (is_bad_inode(inode)) + return -EIO; + + req = fuse_get_request(fc); if (!req) return -EINTR; @@ -806,6 +897,15 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) } } +/* + * Set attributes, and at the same time refresh them. + * + * Truncation is slightly complicated, because the 'truncate' request + * may fail, in which case we don't want to touch the mapping. + * vmtruncate() doesn't allow for this case. So do the rlimit + * checking by hand and call vmtruncate() only after the file has + * actually been truncated. + */ static int fuse_setattr(struct dentry *entry, struct iattr *attr) { struct inode *inode = entry->d_inode; @@ -883,23 +983,6 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, return err; } -static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, - struct nameidata *nd) -{ - struct inode *inode; - int err; - - err = fuse_lookup_iget(dir, entry, &inode); - if (err) - return ERR_PTR(err); - if (inode && dir_alias(inode)) { - iput(inode); - return ERR_PTR(-EIO); - } - d_add(entry, inode); - return NULL; -} - static int fuse_setxattr(struct dentry *entry, const char *name, const void *value, size_t size, int flags) { @@ -909,9 +992,6 @@ static int fuse_setxattr(struct dentry *entry, const char *name, struct fuse_setxattr_in inarg; int err; - if (size > FUSE_XATTR_SIZE_MAX) - return -E2BIG; - if (fc->no_setxattr) return -EOPNOTSUPP; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2ca86141d13a..63d2980df5c9 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -163,6 +163,9 @@ static int fuse_flush(struct file *file) struct fuse_flush_in inarg; int err; + if (is_bad_inode(inode)) + return -EIO; + if (fc->no_flush) return 0; @@ -199,6 +202,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, struct fuse_fsync_in inarg; int err; + if (is_bad_inode(inode)) + return -EIO; + if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) return 0; @@ -272,16 +278,22 @@ static int fuse_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT; - struct fuse_req *req = fuse_get_request(fc); - int err = -EINTR; + struct fuse_req *req; + int err; + + err = -EIO; + if (is_bad_inode(inode)) + goto out; + + err = -EINTR; + req = fuse_get_request(fc); if (!req) goto out; req->out.page_zeroing = 1; req->num_pages = 1; req->pages[0] = page; - fuse_send_read(req, file, inode, pos, PAGE_CACHE_SIZE); + fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE); err = req->out.h.error; fuse_put_request(fc, req); if (!err) @@ -295,7 +307,7 @@ static int fuse_readpage(struct file *file, struct page *page) static int fuse_send_readpages(struct fuse_req *req, struct file *file, struct inode *inode) { - loff_t pos = (loff_t) req->pages[0]->index << PAGE_CACHE_SHIFT; + loff_t pos = page_offset(req->pages[0]); size_t count = req->num_pages << PAGE_CACHE_SHIFT; unsigned i; req->out.page_zeroing = 1; @@ -345,6 +357,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_readpages_data data; int err; + + if (is_bad_inode(inode)) + return -EIO; + data.file = file; data.inode = inode; data.req = fuse_get_request(fc); @@ -402,8 +418,13 @@ static int fuse_commit_write(struct file *file, struct page *page, unsigned count = to - offset; struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset; - struct fuse_req *req = fuse_get_request(fc); + loff_t pos = page_offset(page) + offset; + struct fuse_req *req; + + if (is_bad_inode(inode)) + return -EIO; + + req = fuse_get_request(fc); if (!req) return -EINTR; @@ -454,7 +475,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - npages = min(npages, FUSE_MAX_PAGES_PER_REQ); + npages = min(max(npages, 1), FUSE_MAX_PAGES_PER_REQ); down_read(¤t->mm->mmap_sem); npages = get_user_pages(current, current->mm, user_addr, npages, write, 0, req->pages, NULL); @@ -475,12 +496,16 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, size_t nmax = write ? fc->max_write : fc->max_read; loff_t pos = *ppos; ssize_t res = 0; - struct fuse_req *req = fuse_get_request(fc); + struct fuse_req *req; + + if (is_bad_inode(inode)) + return -EIO; + + req = fuse_get_request(fc); if (!req) return -EINTR; while (count) { - size_t tmp; size_t nres; size_t nbytes = min(count, nmax); int err = fuse_get_user_pages(req, buf, nbytes, !write); @@ -488,8 +513,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, res = err; break; } - tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset; - nbytes = min(nbytes, tmp); + nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; + nbytes = min(count, nbytes); if (write) nres = fuse_send_write(req, file, inode, pos, nbytes); else @@ -535,9 +560,9 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, struct inode *inode = file->f_dentry->d_inode; ssize_t res; /* Don't allow parallel writes to the same file */ - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); res = fuse_direct_io(file, buf, count, ppos, 1); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return res; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0ea5301f86be..74c8d098a14a 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -21,6 +21,9 @@ /** If more requests are outstanding, then the operation will block */ #define FUSE_MAX_OUTSTANDING 10 +/** It could be as large as PATH_MAX, but would that have any uses? */ +#define FUSE_NAME_MAX 1024 + /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem module will check permissions based on the file mode. Otherwise no permission checking is done in the kernel */ @@ -108,9 +111,6 @@ struct fuse_out { struct fuse_arg args[3]; }; -struct fuse_req; -struct fuse_conn; - /** * A request to the client */ @@ -159,7 +159,8 @@ struct fuse_req { union { struct fuse_forget_in forget_in; struct fuse_release_in release_in; - struct fuse_init_in_out init_in_out; + struct fuse_init_in init_in; + struct fuse_init_out init_out; } misc; /** page vector */ @@ -272,6 +273,9 @@ struct fuse_conn { /** Is create not implemented by fs? */ unsigned no_create : 1; + /** Negotiated minor version */ + unsigned minor; + /** Backing dev info */ struct backing_dev_info bdi; }; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e69a546844d0..04c80cc957a3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -135,12 +135,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) fuse_init_common(inode); init_special_inode(inode, inode->i_mode, new_decode_dev(attr->rdev)); - } else { - /* Don't let user create weird files */ - inode->i_mode = S_IFREG; - fuse_init_common(inode); - fuse_init_file_inode(inode); - } + } else + BUG(); } static int fuse_inode_eq(struct inode *inode, void *_nodeidp) @@ -218,6 +214,7 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr { stbuf->f_type = FUSE_SUPER_MAGIC; stbuf->f_bsize = attr->bsize; + stbuf->f_frsize = attr->frsize; stbuf->f_blocks = attr->blocks; stbuf->f_bfree = attr->bfree; stbuf->f_bavail = attr->bavail; @@ -238,10 +235,12 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf) if (!req) return -EINTR; + memset(&outarg, 0, sizeof(outarg)); req->in.numargs = 0; req->in.h.opcode = FUSE_STATFS; req->out.numargs = 1; - req->out.args[0].size = sizeof(outarg); + req->out.args[0].size = + fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); req->out.args[0].value = &outarg; request_send(fc, req); err = req->out.h.error; @@ -482,7 +481,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->max_read = d.max_read; if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages) fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE; - fc->max_write = FUSE_MAX_IN / 2; err = -ENOMEM; root = get_root_inode(sb, d.rootmode); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index d499393a8ae7..050a49276499 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -547,13 +547,13 @@ static int hfs_file_release(struct inode *inode, struct file *file) if (atomic_read(&file->f_count) != 0) return 0; if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) { - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); hfs_file_truncate(inode); //if (inode->i_flags & S_DEAD) { // hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); // hfs_delete_inode(inode); //} - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } return 0; } diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index c7d316455fa0..9fb51632303c 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -29,7 +29,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma return size; dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); - down(&HFSPLUS_SB(sb).alloc_file->i_sem); + mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; page = read_cache_page(mapping, offset / PAGE_CACHE_BITS, (filler_t *)mapping->a_ops->readpage, NULL); @@ -143,7 +143,7 @@ done: sb->s_dirt = 1; dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); out: - up(&HFSPLUS_SB(sb).alloc_file->i_sem); + mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); return start; } @@ -164,7 +164,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) if ((offset + count) > HFSPLUS_SB(sb).total_blocks) return -2; - down(&HFSPLUS_SB(sb).alloc_file->i_sem); + mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; pnr = offset / PAGE_CACHE_BITS; page = read_cache_page(mapping, pnr, (filler_t *)mapping->a_ops->readpage, NULL); @@ -215,7 +215,7 @@ out: kunmap(page); HFSPLUS_SB(sb).free_blocks += len; sb->s_dirt = 1; - up(&HFSPLUS_SB(sb).alloc_file->i_sem); + mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); return 0; } diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index df16fcbff3fb..0fa1ab6250bf 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -143,9 +143,6 @@ struct hfsplus_sb_info { unsigned long flags; - atomic_t inode_cnt; - u32 last_inode_cnt; - struct hlist_head rsrc_inodes; }; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index fc98583cf045..7acff6c5464f 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -182,11 +182,6 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent igrab(dir); hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes); mark_inode_dirty(inode); - { - void hfsplus_inode_check(struct super_block *sb); - atomic_inc(&HFSPLUS_SB(sb).inode_cnt); - hfsplus_inode_check(sb); - } out: d_add(dentry, inode); return NULL; @@ -276,13 +271,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) if (atomic_read(&file->f_count) != 0) return 0; if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); hfsplus_file_truncate(inode); if (inode->i_flags & S_DEAD) { hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); hfsplus_delete_inode(inode); } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } return 0; } @@ -317,11 +312,6 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode) if (!inode) return NULL; - { - void hfsplus_inode_check(struct super_block *sb); - atomic_inc(&HFSPLUS_SB(sb).inode_cnt); - hfsplus_inode_check(sb); - } inode->i_ino = HFSPLUS_SB(sb).next_cnid++; inode->i_mode = mode; inode->i_uid = current->fsuid; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 8093351bd7c3..d791780def50 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -22,29 +22,12 @@ static void hfsplus_destroy_inode(struct inode *inode); #include "hfsplus_fs.h" -void hfsplus_inode_check(struct super_block *sb) -{ -#if 0 - u32 cnt = atomic_read(&HFSPLUS_SB(sb).inode_cnt); - u32 last_cnt = HFSPLUS_SB(sb).last_inode_cnt; - - if (cnt <= (last_cnt / 2) || - cnt >= (last_cnt * 2)) { - HFSPLUS_SB(sb).last_inode_cnt = cnt; - printk("inode_check: %u,%u,%u\n", cnt, last_cnt, - HFSPLUS_SB(sb).cat_tree ? HFSPLUS_SB(sb).cat_tree->node_hash_cnt : 0); - } -#endif -} - static void hfsplus_read_inode(struct inode *inode) { struct hfs_find_data fd; struct hfsplus_vh *vhdr; int err; - atomic_inc(&HFSPLUS_SB(inode->i_sb).inode_cnt); - hfsplus_inode_check(inode->i_sb); INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); init_MUTEX(&HFSPLUS_I(inode).extents_lock); HFSPLUS_I(inode).flags = 0; @@ -155,12 +138,10 @@ static int hfsplus_write_inode(struct inode *inode, int unused) static void hfsplus_clear_inode(struct inode *inode) { dprint(DBG_INODE, "hfsplus_clear_inode: %lu\n", inode->i_ino); - atomic_dec(&HFSPLUS_SB(inode->i_sb).inode_cnt); if (HFSPLUS_IS_RSRC(inode)) { HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; iput(HFSPLUS_I(inode).rsrc_inode); } - hfsplus_inode_check(inode->i_sb); } static void hfsplus_write_super(struct super_block *sb) @@ -320,7 +301,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) /* temporarily use utf8 to correctly find the hidden dir below */ nls = sbi->nls; sbi->nls = load_nls("utf8"); - if (!nls) { + if (!sbi->nls) { printk("HFS+: unable to load nls for utf8\n"); err = -EINVAL; goto cleanup; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 0217c3a04441..5591f9623aa2 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -32,19 +32,19 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence) /*printk("dir lseek\n");*/ if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; - down(&i->i_sem); + mutex_lock(&i->i_mutex); pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1; while (pos != new_off) { if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh); else goto fail; if (pos == 12) goto fail; } - up(&i->i_sem); + mutex_unlock(&i->i_mutex); ok: unlock_kernel(); return filp->f_pos = new_off; fail: - up(&i->i_sem); + mutex_unlock(&i->i_mutex); /*printk("illegal lseek: %016llx\n", new_off);*/ unlock_kernel(); return -ESPIPE; diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index 52930915bad8..a44dc5897399 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -171,12 +171,12 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, err = -ENOMEM; parent = HPPFS_I(ino)->proc_dentry; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); proc_dentry = d_lookup(parent, &dentry->d_name); if(proc_dentry == NULL){ proc_dentry = d_alloc(parent, &dentry->d_name); if(proc_dentry == NULL){ - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); goto out; } new = (*parent->d_inode->i_op->lookup)(parent->d_inode, @@ -186,7 +186,7 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, proc_dentry = new; } } - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); if(IS_ERR(proc_dentry)) return(proc_dentry); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8c1cef3bb677..ff1b7d108bd0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -100,9 +100,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) loff_t len, vma_len; int ret; - if ((vma->vm_flags & (VM_MAYSHARE | VM_WRITE)) == VM_WRITE) - return -EINVAL; - if (vma->vm_pgoff & (HPAGE_SIZE / PAGE_SIZE - 1)) return -EINVAL; @@ -121,7 +118,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma_len = (loff_t)(vma->vm_end - vma->vm_start); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); file_accessed(file); vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; @@ -136,7 +133,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) if (inode->i_size < len) inode->i_size = len; out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return ret; } diff --git a/fs/inode.c b/fs/inode.c index d8d04bd72b59..108138d4e909 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -22,6 +22,7 @@ #include <linux/cdev.h> #include <linux/bootmem.h> #include <linux/inotify.h> +#include <linux/mount.h> /* * This is needed for the following functions: @@ -192,7 +193,7 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); - sema_init(&inode->i_sem, 1); + mutex_init(&inode->i_mutex); init_rwsem(&inode->i_alloc_sem); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); rwlock_init(&inode->i_data.tree_lock); @@ -770,7 +771,7 @@ EXPORT_SYMBOL(igrab); * * Note, @test is called with the inode_lock held, so can't sleep. */ -static inline struct inode *ifind(struct super_block *sb, +static struct inode *ifind(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), void *data, const int wait) { @@ -804,7 +805,7 @@ static inline struct inode *ifind(struct super_block *sb, * * Otherwise NULL is returned. */ -static inline struct inode *ifind_fast(struct super_block *sb, +static struct inode *ifind_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino) { struct inode *inode; @@ -1176,22 +1177,33 @@ sector_t bmap(struct inode * inode, sector_t block) EXPORT_SYMBOL(bmap); /** - * update_atime - update the access time + * touch_atime - update the access time + * @mnt: mount the inode is accessed on * @inode: inode accessed * * Update the accessed time on an inode and mark it for writeback. * This function automatically handles read only file systems and media, * as well as the "noatime" flag and inode specific "noatime" markers. */ -void update_atime(struct inode *inode) +void touch_atime(struct vfsmount *mnt, struct dentry *dentry) { + struct inode *inode = dentry->d_inode; struct timespec now; - if (IS_NOATIME(inode)) + if (IS_RDONLY(inode)) return; - if (IS_NODIRATIME(inode) && S_ISDIR(inode->i_mode)) + + if ((inode->i_flags & S_NOATIME) || + (inode->i_sb->s_flags & MS_NOATIME) || + ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) return; - if (IS_RDONLY(inode)) + + /* + * We may have a NULL vfsmount when coming from NFSD + */ + if (mnt && + ((mnt->mnt_flags & MNT_NOATIME) || + ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))) return; now = current_fs_time(inode->i_sb); @@ -1201,19 +1213,23 @@ void update_atime(struct inode *inode) } } -EXPORT_SYMBOL(update_atime); +EXPORT_SYMBOL(touch_atime); /** - * inode_update_time - update mtime and ctime time - * @inode: inode accessed - * @ctime_too: update ctime too + * file_update_time - update mtime and ctime time + * @file: file accessed * - * Update the mtime time on an inode and mark it for writeback. - * When ctime_too is specified update the ctime too. + * Update the mtime and ctime members of an inode and mark the inode + * for writeback. Note that this function is meant exclusively for + * usage in the file write path of filesystems, and filesystems may + * choose to explicitly ignore update via this function with the + * S_NOCTIME inode flag, e.g. for network filesystem where these + * timestamps are handled by the server. */ -void inode_update_time(struct inode *inode, int ctime_too) +void file_update_time(struct file *file) { + struct inode *inode = file->f_dentry->d_inode; struct timespec now; int sync_it = 0; @@ -1227,16 +1243,15 @@ void inode_update_time(struct inode *inode, int ctime_too) sync_it = 1; inode->i_mtime = now; - if (ctime_too) { - if (!timespec_equal(&inode->i_ctime, &now)) - sync_it = 1; - inode->i_ctime = now; - } + if (!timespec_equal(&inode->i_ctime, &now)) + sync_it = 1; + inode->i_ctime = now; + if (sync_it) mark_inode_dirty_sync(inode); } -EXPORT_SYMBOL(inode_update_time); +EXPORT_SYMBOL(file_update_time); int inode_needs_sync(struct inode *inode) { diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 014a51fd00d7..cb3cef525c3b 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -24,29 +24,75 @@ #include <linux/slab.h> /* - * Unlink a buffer from a transaction. + * Unlink a buffer from a transaction checkpoint list. * * Called with j_list_lock held. */ -static inline void __buffer_unlink(struct journal_head *jh) +static void __buffer_unlink_first(struct journal_head *jh) { transaction_t *transaction; transaction = jh->b_cp_transaction; - jh->b_cp_transaction = NULL; jh->b_cpnext->b_cpprev = jh->b_cpprev; jh->b_cpprev->b_cpnext = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) + if (transaction->t_checkpoint_list == jh) { transaction->t_checkpoint_list = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) - transaction->t_checkpoint_list = NULL; + if (transaction->t_checkpoint_list == jh) + transaction->t_checkpoint_list = NULL; + } +} + +/* + * Unlink a buffer from a transaction checkpoint(io) list. + * + * Called with j_list_lock held. + */ + +static inline void __buffer_unlink(struct journal_head *jh) +{ + transaction_t *transaction; + + transaction = jh->b_cp_transaction; + + __buffer_unlink_first(jh); + if (transaction->t_checkpoint_io_list == jh) { + transaction->t_checkpoint_io_list = jh->b_cpnext; + if (transaction->t_checkpoint_io_list == jh) + transaction->t_checkpoint_io_list = NULL; + } +} + +/* + * Move a buffer from the checkpoint list to the checkpoint io list + * + * Called with j_list_lock held + */ + +static inline void __buffer_relink_io(struct journal_head *jh) +{ + transaction_t *transaction; + + transaction = jh->b_cp_transaction; + __buffer_unlink_first(jh); + + if (!transaction->t_checkpoint_io_list) { + jh->b_cpnext = jh->b_cpprev = jh; + } else { + jh->b_cpnext = transaction->t_checkpoint_io_list; + jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; + jh->b_cpprev->b_cpnext = jh; + jh->b_cpnext->b_cpprev = jh; + } + transaction->t_checkpoint_io_list = jh; } /* * Try to release a checkpointed buffer from its transaction. - * Returns 1 if we released it. + * Returns 1 if we released it and 2 if we also released the + * whole transaction. + * * Requires j_list_lock * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ @@ -57,12 +103,11 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { JBUFFER_TRACE(jh, "remove from checkpoint list"); - __journal_remove_checkpoint(jh); + ret = __journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); - ret = 1; } else { jbd_unlock_bh_state(bh); } @@ -117,83 +162,53 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) } /* - * Clean up a transaction's checkpoint list. - * - * We wait for any pending IO to complete and make sure any clean - * buffers are removed from the transaction. - * - * Return 1 if we performed any actions which might have destroyed the - * checkpoint. (journal_remove_checkpoint() deletes the transaction when - * the last checkpoint buffer is cleansed) + * Clean up transaction's list of buffers submitted for io. + * We wait for any pending IO to complete and remove any clean + * buffers. Note that we take the buffers in the opposite ordering + * from the one in which they were submitted for IO. * * Called with j_list_lock held. */ -static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) + +static void __wait_cp_io(journal_t *journal, transaction_t *transaction) { - struct journal_head *jh, *next_jh, *last_jh; + struct journal_head *jh; struct buffer_head *bh; - int ret = 0; - - assert_spin_locked(&journal->j_list_lock); - jh = transaction->t_checkpoint_list; - if (!jh) - return 0; - - last_jh = jh->b_cpprev; - next_jh = jh; - do { - jh = next_jh; + tid_t this_tid; + int released = 0; + + this_tid = transaction->t_tid; +restart: + /* Didn't somebody clean up the transaction in the meanwhile */ + if (journal->j_checkpoint_transactions != transaction || + transaction->t_tid != this_tid) + return; + while (!released && transaction->t_checkpoint_io_list) { + jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); + if (!jbd_trylock_bh_state(bh)) { + jbd_sync_bh(journal, bh); + spin_lock(&journal->j_list_lock); + goto restart; + } if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); - goto out_return_1; - } - - /* - * This is foul - */ - if (!jbd_trylock_bh_state(bh)) { - jbd_sync_bh(journal, bh); - goto out_return_1; + spin_lock(&journal->j_list_lock); + goto restart; } - - if (jh->b_transaction != NULL) { - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; - - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - log_start_commit(journal, tid); - log_wait_commit(journal, tid); - goto out_return_1; - } - /* - * AKPM: I think the buffer_jbddirty test is redundant - it - * shouldn't have NULL b_transaction? + * Now in whatever state the buffer currently is, we know that + * it has been written out and so we can drop it from the list */ - next_jh = jh->b_cpnext; - if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { - BUFFER_TRACE(bh, "remove from checkpoint"); - __journal_remove_checkpoint(jh); - jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); - __brelse(bh); - ret = 1; - } else { - jbd_unlock_bh_state(bh); - } - } while (jh != last_jh); - - return ret; -out_return_1: - spin_lock(&journal->j_list_lock); - return 1; + released = __journal_remove_checkpoint(jh); + jbd_unlock_bh_state(bh); + } } #define NR_BATCH 64 @@ -203,9 +218,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - spin_unlock(&journal->j_list_lock); ll_rw_block(SWRITE, *batch_count, bhs); - spin_lock(&journal->j_list_lock); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); @@ -221,19 +234,46 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. * - * Called with j_list_lock held. + * Called with j_list_lock held and drops it if 1 is returned * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ -static int __flush_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count, - int *drop_count) +static int __process_buffer(journal_t *journal, struct journal_head *jh, + struct buffer_head **bhs, int *batch_count) { struct buffer_head *bh = jh2bh(jh); int ret = 0; - if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) { - J_ASSERT_JH(jh, jh->b_transaction == NULL); + if (buffer_locked(bh)) { + get_bh(bh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + put_bh(bh); + ret = 1; + } + else if (jh->b_transaction != NULL) { + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + log_start_commit(journal, tid); + log_wait_commit(journal, tid); + ret = 1; + } + else if (!buffer_dirty(bh)) { + J_ASSERT_JH(jh, !buffer_jbddirty(bh)); + BUFFER_TRACE(bh, "remove from checkpoint"); + __journal_remove_checkpoint(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + journal_remove_journal_head(bh); + put_bh(bh); + ret = 1; + } + else { /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. @@ -246,45 +286,30 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh, J_ASSERT_BH(bh, !buffer_jwrite(bh)); set_buffer_jwrite(bh); bhs[*batch_count] = bh; + __buffer_relink_io(jh); jbd_unlock_bh_state(bh); (*batch_count)++; if (*batch_count == NR_BATCH) { + spin_unlock(&journal->j_list_lock); __flush_batch(journal, bhs, batch_count); ret = 1; } - } else { - int last_buffer = 0; - if (jh->b_cpnext == jh) { - /* We may be about to drop the transaction. Tell the - * caller that the lists have changed. - */ - last_buffer = 1; - } - if (__try_to_free_cp_buf(jh)) { - (*drop_count)++; - ret = last_buffer; - } } return ret; } /* - * Perform an actual checkpoint. We don't write out only enough to - * satisfy the current blocked requests: rather we submit a reasonably - * sized chunk of the outstanding data to disk at once for - * efficiency. __log_wait_for_space() will retry if we didn't free enough. + * Perform an actual checkpoint. We take the first transaction on the + * list of transactions to be checkpointed and send all its buffers + * to disk. We submit larger chunks of data at once. * - * However, we _do_ take into account the amount requested so that once - * the IO has been queued, we can return as soon as enough of it has - * completed to disk. - * * The journal should be locked before calling this function. */ int log_do_checkpoint(journal_t *journal) { + transaction_t *transaction; + tid_t this_tid; int result; - int batch_count = 0; - struct buffer_head *bhs[NR_BATCH]; jbd_debug(1, "Start checkpoint\n"); @@ -299,79 +324,70 @@ int log_do_checkpoint(journal_t *journal) return result; /* - * OK, we need to start writing disk blocks. Try to free up a - * quarter of the log in a single checkpoint if we can. + * OK, we need to start writing disk blocks. Take one transaction + * and write it. */ + spin_lock(&journal->j_list_lock); + if (!journal->j_checkpoint_transactions) + goto out; + transaction = journal->j_checkpoint_transactions; + this_tid = transaction->t_tid; +restart: /* - * AKPM: check this code. I had a feeling a while back that it - * degenerates into a busy loop at unmount time. + * If someone cleaned up this transaction while we slept, we're + * done (maybe it's a new transaction, but it fell at the same + * address). */ - spin_lock(&journal->j_list_lock); - while (journal->j_checkpoint_transactions) { - transaction_t *transaction; - struct journal_head *jh, *last_jh, *next_jh; - int drop_count = 0; - int cleanup_ret, retry = 0; - tid_t this_tid; - - transaction = journal->j_checkpoint_transactions; - this_tid = transaction->t_tid; - jh = transaction->t_checkpoint_list; - last_jh = jh->b_cpprev; - next_jh = jh; - do { + if (journal->j_checkpoint_transactions == transaction || + transaction->t_tid == this_tid) { + int batch_count = 0; + struct buffer_head *bhs[NR_BATCH]; + struct journal_head *jh; + int retry = 0; + + while (!retry && transaction->t_checkpoint_list) { struct buffer_head *bh; - jh = next_jh; - next_jh = jh->b_cpnext; + jh = transaction->t_checkpoint_list; bh = jh2bh(jh); if (!jbd_trylock_bh_state(bh)) { jbd_sync_bh(journal, bh); - spin_lock(&journal->j_list_lock); retry = 1; break; } - retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); - if (cond_resched_lock(&journal->j_list_lock)) { + retry = __process_buffer(journal, jh, bhs, + &batch_count); + if (!retry && + lock_need_resched(&journal->j_list_lock)) { + spin_unlock(&journal->j_list_lock); retry = 1; break; } - } while (jh != last_jh && !retry); + } if (batch_count) { + if (!retry) { + spin_unlock(&journal->j_list_lock); + retry = 1; + } __flush_batch(journal, bhs, &batch_count); - retry = 1; } + if (retry) { + spin_lock(&journal->j_list_lock); + goto restart; + } /* - * If someone cleaned up this transaction while we slept, we're - * done - */ - if (journal->j_checkpoint_transactions != transaction) - break; - if (retry) - continue; - /* - * Maybe it's a new transaction, but it fell at the same - * address - */ - if (transaction->t_tid != this_tid) - continue; - /* - * We have walked the whole transaction list without - * finding anything to write to disk. We had better be - * able to make some progress or we are in trouble. + * Now we have cleaned up the first transaction's checkpoint + * list. Let's clean up the second one. */ - cleanup_ret = __cleanup_transaction(journal, transaction); - J_ASSERT(drop_count != 0 || cleanup_ret != 0); - if (journal->j_checkpoint_transactions != transaction) - break; + __wait_cp_io(journal, transaction); } +out: spin_unlock(&journal->j_list_lock); result = cleanup_journal_tail(journal); if (result < 0) return result; - return 0; } @@ -456,52 +472,91 @@ int cleanup_journal_tail(journal_t *journal) /* Checkpoint list management */ /* + * journal_clean_one_cp_list + * + * Find all the written-back checkpoint buffers in the given list and release them. + * + * Called with the journal locked. + * Called with j_list_lock held. + * Returns number of bufers reaped (for debug) + */ + +static int journal_clean_one_cp_list(struct journal_head *jh, int *released) +{ + struct journal_head *last_jh; + struct journal_head *next_jh = jh; + int ret, freed = 0; + + *released = 0; + if (!jh) + return 0; + + last_jh = jh->b_cpprev; + do { + jh = next_jh; + next_jh = jh->b_cpnext; + /* Use trylock because of the ranking */ + if (jbd_trylock_bh_state(jh2bh(jh))) { + ret = __try_to_free_cp_buf(jh); + if (ret) { + freed++; + if (ret == 2) { + *released = 1; + return freed; + } + } + } + /* + * This function only frees up some memory if possible so we + * dont have an obligation to finish processing. Bail out if + * preemption requested: + */ + if (need_resched()) + return freed; + } while (jh != last_jh); + + return freed; +} + +/* * journal_clean_checkpoint_list * * Find all the written-back checkpoint buffers in the journal and release them. * * Called with the journal locked. * Called with j_list_lock held. - * Returns number of bufers reaped (for debug) + * Returns number of buffers reaped (for debug) */ int __journal_clean_checkpoint_list(journal_t *journal) { transaction_t *transaction, *last_transaction, *next_transaction; - int ret = 0; + int ret = 0, released; transaction = journal->j_checkpoint_transactions; - if (transaction == 0) + if (!transaction) goto out; last_transaction = transaction->t_cpprev; next_transaction = transaction; do { - struct journal_head *jh; - transaction = next_transaction; next_transaction = transaction->t_cpnext; - jh = transaction->t_checkpoint_list; - if (jh) { - struct journal_head *last_jh = jh->b_cpprev; - struct journal_head *next_jh = jh; - - do { - jh = next_jh; - next_jh = jh->b_cpnext; - /* Use trylock because of the ranknig */ - if (jbd_trylock_bh_state(jh2bh(jh))) - ret += __try_to_free_cp_buf(jh); - /* - * This function only frees up some memory - * if possible so we dont have an obligation - * to finish processing. Bail out if preemption - * requested: - */ - if (need_resched()) - goto out; - } while (jh != last_jh); - } + ret += journal_clean_one_cp_list(transaction-> + t_checkpoint_list, &released); + if (need_resched()) + goto out; + if (released) + continue; + /* + * It is essential that we are as careful as in the case of + * t_checkpoint_list with removing the buffer from the list as + * we can possibly see not yet submitted buffers on io_list + */ + ret += journal_clean_one_cp_list(transaction-> + t_checkpoint_io_list, &released); + if (need_resched()) + goto out; } while (transaction != last_transaction); out: return ret; @@ -516,18 +571,22 @@ out: * buffer updates committed in that transaction have safely been stored * elsewhere on disk. To achieve this, all of the buffers in a * transaction need to be maintained on the transaction's checkpoint - * list until they have been rewritten, at which point this function is + * lists until they have been rewritten, at which point this function is * called to remove the buffer from the existing transaction's - * checkpoint list. + * checkpoint lists. + * + * The function returns 1 if it frees the transaction, 0 otherwise. * * This function is called with the journal locked. * This function is called with j_list_lock held. + * This function is called with jbd_lock_bh_state(jh2bh(jh)) */ -void __journal_remove_checkpoint(struct journal_head *jh) +int __journal_remove_checkpoint(struct journal_head *jh) { transaction_t *transaction; journal_t *journal; + int ret = 0; JBUFFER_TRACE(jh, "entry"); @@ -538,8 +597,10 @@ void __journal_remove_checkpoint(struct journal_head *jh) journal = transaction->t_journal; __buffer_unlink(jh); + jh->b_cp_transaction = NULL; - if (transaction->t_checkpoint_list != NULL) + if (transaction->t_checkpoint_list != NULL || + transaction->t_checkpoint_io_list != NULL) goto out; JBUFFER_TRACE(jh, "transaction has no more buffers"); @@ -565,8 +626,10 @@ void __journal_remove_checkpoint(struct journal_head *jh) /* Just in case anybody was waiting for more transactions to be checkpointed... */ wake_up(&journal->j_wait_logspace); + ret = 1; out: JBUFFER_TRACE(jh, "exit"); + return ret; } /* @@ -628,6 +691,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); + J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(transaction->t_updates == 0); J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 3dcc6d2162cb..fc3855a1aef3 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -757,7 +757,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page) read_len = 0; result = 0; - offset = page->index << PAGE_CACHE_SHIFT; + offset = page_offset(page); kmap(page); buf = page_address(page); @@ -1415,7 +1415,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count, * This will never trigger with sane page sizes. leave it in * anyway, since I'm thinking about how to merge larger writes * (the current idea is to poke a thread that does the actual - * I/O and starts by doing a down(&inode->i_sem). then we + * I/O and starts by doing a mutex_lock(&inode->i_mutex). then we * would need to get the page cache pages and have a list of * I/O requests and do write-merging here. * -- prumpf @@ -1545,7 +1545,7 @@ jffs_commit_write(struct file *filp, struct page *page, { void *addr = page_address(page) + from; /* XXX: PAGE_CACHE_SHIFT or PAGE_SHIFT */ - loff_t pos = (page->index<<PAGE_CACHE_SHIFT) + from; + loff_t pos = page_offset(page) + from; return jffs_file_write(filp, addr, to-from, &pos); } /* jffs_commit_write() */ diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 68000a50ceb6..2967b7393415 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -302,8 +302,7 @@ int dbSync(struct inode *ipbmap) /* * write out dirty pages of bmap */ - filemap_fdatawrite(ipbmap->i_mapping); - filemap_fdatawait(ipbmap->i_mapping); + filemap_write_and_wait(ipbmap->i_mapping); diWriteSpecial(ipbmap, 0); diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 28201b194f53..31b4aa13dd4b 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -265,8 +265,7 @@ int diSync(struct inode *ipimap) /* * write out dirty pages of imap */ - filemap_fdatawrite(ipimap->i_mapping); - filemap_fdatawait(ipimap->i_mapping); + filemap_write_and_wait(ipimap->i_mapping); diWriteSpecial(ipimap, 0); @@ -565,8 +564,7 @@ void diFreeSpecial(struct inode *ip) jfs_err("diFreeSpecial called with NULL ip!"); return; } - filemap_fdatawrite(ip->i_mapping); - filemap_fdatawait(ip->i_mapping); + filemap_write_and_wait(ip->i_mapping); truncate_inode_pages(ip->i_mapping, 0); iput(ip); } diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index c0fd7b3eadc6..dc21a5bd54d4 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -58,7 +58,7 @@ struct jfs_inode_info { /* * rdwrlock serializes xtree between reads & writes and synchronizes * changes to special inodes. It's use would be redundant on - * directories since the i_sem taken in the VFS is sufficient. + * directories since the i_mutex taken in the VFS is sufficient. */ struct rw_semaphore rdwrlock; /* @@ -68,7 +68,7 @@ struct jfs_inode_info { * inode is blocked in txBegin or TxBeginAnon */ struct semaphore commit_sem; - /* xattr_sem allows us to access the xattrs without taking i_sem */ + /* xattr_sem allows us to access the xattrs without taking i_mutex */ struct rw_semaphore xattr_sem; lid_t xtlid; /* lid of xtree lock on directory */ #ifdef CONFIG_JFS_POSIX_ACL diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index b660c93c92de..2ddb6b892bcf 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -1231,10 +1231,8 @@ int txCommit(tid_t tid, /* transaction identifier */ * when we don't need to worry about it at all. * * if ((!S_ISDIR(ip->i_mode)) - * && (tblk->flag & COMMIT_DELETE) == 0) { - * filemap_fdatawrite(ip->i_mapping); - * filemap_fdatawait(ip->i_mapping); - * } + * && (tblk->flag & COMMIT_DELETE) == 0) + * filemap_write_and_wait(ip->i_mapping); */ /* diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index 5cf91785b541..21eaf7ac0fcb 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c @@ -108,8 +108,7 @@ int jfs_umount(struct super_block *sb) * Make sure all metadata makes it to disk before we mark * the superblock as clean */ - filemap_fdatawrite(sbi->direct_inode->i_mapping); - filemap_fdatawait(sbi->direct_inode->i_mapping); + filemap_write_and_wait(sbi->direct_inode->i_mapping); /* * ensure all file system file pages are propagated to their @@ -161,8 +160,7 @@ int jfs_umount_rw(struct super_block *sb) * mark the superblock clean before everything is flushed to * disk. */ - filemap_fdatawrite(sbi->direct_inode->i_mapping); - filemap_fdatawait(sbi->direct_inode->i_mapping); + filemap_write_and_wait(sbi->direct_inode->i_mapping); updateSuper(sb, FM_CLEAN); diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index c6dc254d3253..45180361871c 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -376,8 +376,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) * by txCommit(); */ filemap_fdatawait(ipbmap->i_mapping); - filemap_fdatawrite(ipbmap->i_mapping); - filemap_fdatawait(ipbmap->i_mapping); + filemap_write_and_wait(ipbmap->i_mapping); diWriteSpecial(ipbmap, 0); newPage = nPages; /* first new page number */ diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 4226af3ea91b..8d31f1336431 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -502,8 +502,7 @@ out_no_rw: jfs_err("jfs_umount failed with return code %d", rc); } out_mount_failed: - filemap_fdatawrite(sbi->direct_inode->i_mapping); - filemap_fdatawait(sbi->direct_inode->i_mapping); + filemap_write_and_wait(sbi->direct_inode->i_mapping); truncate_inode_pages(sbi->direct_inode->i_mapping, 0); make_bad_inode(sbi->direct_inode); iput(sbi->direct_inode); diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 23aa5066b5a4..952da5f917cd 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -83,21 +83,6 @@ struct ea_buffer { #define EA_NEW 0x0004 #define EA_MALLOC 0x0008 -/* Namespaces */ -#define XATTR_SYSTEM_PREFIX "system." -#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1) - -#define XATTR_USER_PREFIX "user." -#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1) - -#define XATTR_OS2_PREFIX "os2." -#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1) - -/* XATTR_SECURITY_PREFIX is defined in include/linux/xattr.h */ -#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1) - -#define XATTR_TRUSTED_PREFIX "trusted." -#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1) /* * These three routines are used to recognize on-disk extended attributes @@ -773,36 +758,23 @@ static int can_set_system_xattr(struct inode *inode, const char *name, static int can_set_xattr(struct inode *inode, const char *name, const void *value, size_t value_len) { - if (IS_RDONLY(inode)) - return -EROFS; - - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - - if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0) - /* - * "system.*" - */ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) return can_set_system_xattr(inode, name, value, value_len); - if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) - return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); - -#ifdef CONFIG_JFS_SECURITY - if (strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) - == 0) - return 0; /* Leave it to the security module */ -#endif - - if((strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) != 0) && - (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) != 0)) + /* + * Don't allow setting an attribute in an unknown namespace. + */ + if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) && + strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && + strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && + strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)) return -EOPNOTSUPP; if (!S_ISREG(inode->i_mode) && (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX)) return -EPERM; - return permission(inode, MAY_WRITE, NULL); + return 0; } int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name, @@ -972,22 +944,6 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, return rc; } -static int can_get_xattr(struct inode *inode, const char *name) -{ -#ifdef CONFIG_JFS_SECURITY - if(strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) - return 0; -#endif - - if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) - return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); - - if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0) - return 0; - - return permission(inode, MAY_READ, NULL); -} - ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, size_t buf_size) { @@ -998,12 +954,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, ssize_t size; int namelen = strlen(name); char *os2name = NULL; - int rc; char *value; - if ((rc = can_get_xattr(inode, name))) - return rc; - if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1, GFP_KERNEL); diff --git a/fs/libfs.c b/fs/libfs.c index 58101dff2c66..63c020e6589e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -74,7 +74,7 @@ int dcache_dir_close(struct inode *inode, struct file *file) loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) { - down(&file->f_dentry->d_inode->i_sem); + mutex_lock(&file->f_dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -82,7 +82,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) if (offset >= 0) break; default: - up(&file->f_dentry->d_inode->i_sem); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { @@ -93,20 +93,20 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) loff_t n = file->f_pos - 2; spin_lock(&dcache_lock); - list_del(&cursor->d_child); + list_del(&cursor->d_u.d_child); p = file->f_dentry->d_subdirs.next; while (n && p != &file->f_dentry->d_subdirs) { struct dentry *next; - next = list_entry(p, struct dentry, d_child); + next = list_entry(p, struct dentry, d_u.d_child); if (!d_unhashed(next) && next->d_inode) n--; p = p->next; } - list_add_tail(&cursor->d_child, p); + list_add_tail(&cursor->d_u.d_child, p); spin_unlock(&dcache_lock); } } - up(&file->f_dentry->d_inode->i_sem); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); return offset; } @@ -126,7 +126,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) { struct dentry *dentry = filp->f_dentry; struct dentry *cursor = filp->private_data; - struct list_head *p, *q = &cursor->d_child; + struct list_head *p, *q = &cursor->d_u.d_child; ino_t ino; int i = filp->f_pos; @@ -153,7 +153,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) } for (p=q->next; p != &dentry->d_subdirs; p=p->next) { struct dentry *next; - next = list_entry(p, struct dentry, d_child); + next = list_entry(p, struct dentry, d_u.d_child); if (d_unhashed(next) || !next->d_inode) continue; @@ -261,7 +261,7 @@ int simple_empty(struct dentry *dentry) int ret = 0; spin_lock(&dcache_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_child) + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) if (simple_positive(child)) goto out; ret = 1; @@ -356,7 +356,7 @@ int simple_commit_write(struct file *file, struct page *page, /* * No need to use i_size_read() here, the i_size - * cannot change under us because we hold the i_sem. + * cannot change under us because we hold the i_mutex. */ if (pos > inode->i_size) i_size_write(inode, pos); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index c5a33648e9fd..145524039577 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -26,11 +26,12 @@ static int nlmclnt_test(struct nlm_rqst *, struct file_lock *); static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *); static int nlmclnt_unlock(struct nlm_rqst *, struct file_lock *); -static void nlmclnt_unlock_callback(struct rpc_task *); -static void nlmclnt_cancel_callback(struct rpc_task *); static int nlm_stat_to_errno(u32 stat); static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host); +static const struct rpc_call_ops nlmclnt_unlock_ops; +static const struct rpc_call_ops nlmclnt_cancel_ops; + /* * Cookie counter for NLM requests */ @@ -221,8 +222,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) goto done; } clnt->cl_softrtry = nfssrv->client->cl_softrtry; - clnt->cl_intr = nfssrv->client->cl_intr; - clnt->cl_chatty = nfssrv->client->cl_chatty; + clnt->cl_intr = nfssrv->client->cl_intr; } /* Keep the old signal mask */ @@ -399,8 +399,7 @@ in_grace_period: /* * Generic NLM call, async version. */ -int -nlmsvc_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) +int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) { struct nlm_host *host = req->a_host; struct rpc_clnt *clnt; @@ -419,13 +418,12 @@ nlmsvc_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) msg.rpc_proc = &clnt->cl_procinfo[proc]; /* bootstrap and kick off the async RPC call */ - status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req); + status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req); return status; } -static int -nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) +static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) { struct nlm_host *host = req->a_host; struct rpc_clnt *clnt; @@ -448,7 +446,7 @@ nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) /* Increment host refcount */ nlm_get_host(host); /* bootstrap and kick off the async RPC call */ - status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req); + status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req); if (status < 0) nlm_release_host(host); return status; @@ -664,7 +662,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) if (req->a_flags & RPC_TASK_ASYNC) { status = nlmclnt_async_call(req, NLMPROC_UNLOCK, - nlmclnt_unlock_callback); + &nlmclnt_unlock_ops); /* Hrmf... Do the unlock early since locks_remove_posix() * really expects us to free the lock synchronously */ do_vfs_lock(fl); @@ -692,10 +690,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) return -ENOLCK; } -static void -nlmclnt_unlock_callback(struct rpc_task *task) +static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) { - struct nlm_rqst *req = (struct nlm_rqst *) task->tk_calldata; + struct nlm_rqst *req = data; int status = req->a_res.status; if (RPC_ASSASSINATED(task)) @@ -722,6 +719,10 @@ die: rpc_restart_call(task); } +static const struct rpc_call_ops nlmclnt_unlock_ops = { + .rpc_call_done = nlmclnt_unlock_callback, +}; + /* * Cancel a blocked lock request. * We always use an async RPC call for this in order not to hang a @@ -750,8 +751,7 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl) nlmclnt_setlockargs(req, fl); - status = nlmclnt_async_call(req, NLMPROC_CANCEL, - nlmclnt_cancel_callback); + status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops); if (status < 0) { nlmclnt_release_lockargs(req); kfree(req); @@ -765,10 +765,9 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl) return status; } -static void -nlmclnt_cancel_callback(struct rpc_task *task) +static void nlmclnt_cancel_callback(struct rpc_task *task, void *data) { - struct nlm_rqst *req = (struct nlm_rqst *) task->tk_calldata; + struct nlm_rqst *req = data; if (RPC_ASSASSINATED(task)) goto die; @@ -807,6 +806,10 @@ retry_cancel: rpc_delay(task, 30 * HZ); } +static const struct rpc_call_ops nlmclnt_cancel_ops = { + .rpc_call_done = nlmclnt_cancel_callback, +}; + /* * Convert an NLM status code to a generic kernel errno */ diff --git a/fs/lockd/host.c b/fs/lockd/host.c index c4c8601096e0..82f7a0b1d8ae 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -177,7 +177,7 @@ nlm_bind_host(struct nlm_host *host) if ((clnt = host->h_rpcclnt) != NULL) { xprt = clnt->cl_xprt; if (time_after_eq(jiffies, host->h_nextrebind)) { - clnt->cl_port = 0; + rpc_force_rebind(clnt); host->h_nextrebind = jiffies + NLM_HOST_REBIND; dprintk("lockd: next rebind in %ld jiffies\n", host->h_nextrebind - jiffies); @@ -217,7 +217,7 @@ nlm_rebind_host(struct nlm_host *host) { dprintk("lockd: rebind host %s\n", host->h_name); if (host->h_rpcclnt && time_after_eq(jiffies, host->h_nextrebind)) { - host->h_rpcclnt->cl_port = 0; + rpc_force_rebind(host->h_rpcclnt); host->h_nextrebind = jiffies + NLM_HOST_REBIND; } } diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 2d144abe84ad..0edc03e67966 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -123,7 +123,6 @@ nsm_create(void) if (IS_ERR(clnt)) goto out_err; clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; clnt->cl_oneshot = 1; return clnt; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 12a857c29e25..71a30b416d1a 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -178,6 +178,8 @@ lockd(struct svc_rqst *rqstp) } + flush_signals(current); + /* * Check whether there's a new lockd process before * shutting down the hosts and clearing the slot. @@ -192,8 +194,6 @@ lockd(struct svc_rqst *rqstp) "lockd: new process, skipping host shutdown\n"); wake_up(&lockd_exit); - flush_signals(current); - /* Exit the RPC thread */ svc_exit_thread(rqstp); diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 489670e21769..4063095d849e 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -22,7 +22,8 @@ #define NLMDBG_FACILITY NLMDBG_CLIENT static u32 nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *); -static void nlm4svc_callback_exit(struct rpc_task *); + +static const struct rpc_call_ops nlm4svc_callback_ops; /* * Obtain client and file from arguments @@ -470,7 +471,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp, } - /* * This is the generic lockd callback for async RPC calls */ @@ -494,7 +494,7 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) call->a_host = host; memcpy(&call->a_args, resp, sizeof(*resp)); - if (nlmsvc_async_call(call, proc, nlm4svc_callback_exit) < 0) + if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0) goto error; return rpc_success; @@ -504,10 +504,9 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) return rpc_system_err; } -static void -nlm4svc_callback_exit(struct rpc_task *task) +static void nlm4svc_callback_exit(struct rpc_task *task, void *data) { - struct nlm_rqst *call = (struct nlm_rqst *) task->tk_calldata; + struct nlm_rqst *call = data; if (task->tk_status < 0) { dprintk("lockd: %4d callback failed (errno = %d)\n", @@ -517,6 +516,10 @@ nlm4svc_callback_exit(struct rpc_task *task) kfree(call); } +static const struct rpc_call_ops nlm4svc_callback_ops = { + .rpc_call_done = nlm4svc_callback_exit, +}; + /* * NLM Server procedures. */ diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 49f959796b66..9cfced65d4a2 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -41,7 +41,8 @@ static void nlmsvc_insert_block(struct nlm_block *block, unsigned long); static int nlmsvc_remove_block(struct nlm_block *block); -static void nlmsvc_grant_callback(struct rpc_task *task); + +static const struct rpc_call_ops nlmsvc_grant_ops; /* * The list of blocked locks to retry @@ -226,31 +227,27 @@ failed: * It is the caller's responsibility to check whether the file * can be closed hereafter. */ -static void +static int nlmsvc_delete_block(struct nlm_block *block, int unlock) { struct file_lock *fl = &block->b_call.a_args.lock.fl; struct nlm_file *file = block->b_file; struct nlm_block **bp; + int status = 0; dprintk("lockd: deleting block %p...\n", block); /* Remove block from list */ nlmsvc_remove_block(block); - if (fl->fl_next) - posix_unblock_lock(file->f_file, fl); - if (unlock) { - fl->fl_type = F_UNLCK; - posix_lock_file(file->f_file, fl); - block->b_granted = 0; - } + if (unlock) + status = posix_unblock_lock(file->f_file, fl); /* If the block is in the middle of a GRANT callback, * don't kill it yet. */ if (block->b_incall) { nlmsvc_insert_block(block, NLM_NEVER); block->b_done = 1; - return; + return status; } /* Remove block from file's list of blocks */ @@ -265,6 +262,7 @@ nlmsvc_delete_block(struct nlm_block *block, int unlock) nlm_release_host(block->b_host); nlmclnt_freegrantargs(&block->b_call); kfree(block); + return status; } /* @@ -275,6 +273,7 @@ int nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action) { struct nlm_block *block, *next; + /* XXX: Will everything get cleaned up if we don't unlock here? */ down(&file->f_sema); for (block = file->f_blocks; block; block = next) { @@ -444,6 +443,7 @@ u32 nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) { struct nlm_block *block; + int status = 0; dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n", file->f_file->f_dentry->d_inode->i_sb->s_id, @@ -454,9 +454,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) down(&file->f_sema); if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) - nlmsvc_delete_block(block, 1); + status = nlmsvc_delete_block(block, 1); up(&file->f_sema); - return nlm_granted; + return status ? nlm_lck_denied : nlm_granted; } /* @@ -562,7 +562,7 @@ callback: /* Call the client */ nlm_get_host(block->b_call.a_host); if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG, - nlmsvc_grant_callback) < 0) + &nlmsvc_grant_ops) < 0) nlm_release_host(block->b_call.a_host); up(&file->f_sema); } @@ -575,10 +575,9 @@ callback: * chain once more in order to have it removed by lockd itself (which can * then sleep on the file semaphore without disrupting e.g. the nfs client). */ -static void -nlmsvc_grant_callback(struct rpc_task *task) +static void nlmsvc_grant_callback(struct rpc_task *task, void *data) { - struct nlm_rqst *call = (struct nlm_rqst *) task->tk_calldata; + struct nlm_rqst *call = data; struct nlm_block *block; unsigned long timeout; struct sockaddr_in *peer_addr = RPC_PEERADDR(task->tk_client); @@ -614,6 +613,10 @@ nlmsvc_grant_callback(struct rpc_task *task) nlm_release_host(call->a_host); } +static const struct rpc_call_ops nlmsvc_grant_ops = { + .rpc_call_done = nlmsvc_grant_callback, +}; + /* * We received a GRANT_RES callback. Try to find the corresponding * block. @@ -633,11 +636,12 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status file->f_count++; down(&file->f_sema); - if ((block = nlmsvc_find_block(cookie,&rqstp->rq_addr)) != NULL) { + block = nlmsvc_find_block(cookie, &rqstp->rq_addr); + if (block) { if (status == NLM_LCK_DENIED_GRACE_PERIOD) { /* Try again in a couple of seconds */ nlmsvc_insert_block(block, 10 * HZ); - block = NULL; + up(&file->f_sema); } else { /* Lock is now held by client, or has been rejected. * In both cases, the block should be removed. */ @@ -648,8 +652,6 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status nlmsvc_delete_block(block, 1); } } - if (!block) - up(&file->f_sema); nlm_release_file(file); } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 757e344cf200..3bc437e0cf5b 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -23,7 +23,8 @@ #define NLMDBG_FACILITY NLMDBG_CLIENT static u32 nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *); -static void nlmsvc_callback_exit(struct rpc_task *); + +static const struct rpc_call_ops nlmsvc_callback_ops; #ifdef CONFIG_LOCKD_V4 static u32 @@ -518,7 +519,7 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) call->a_host = host; memcpy(&call->a_args, resp, sizeof(*resp)); - if (nlmsvc_async_call(call, proc, nlmsvc_callback_exit) < 0) + if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0) goto error; return rpc_success; @@ -528,10 +529,9 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) return rpc_system_err; } -static void -nlmsvc_callback_exit(struct rpc_task *task) +static void nlmsvc_callback_exit(struct rpc_task *task, void *data) { - struct nlm_rqst *call = (struct nlm_rqst *) task->tk_calldata; + struct nlm_rqst *call = data; if (task->tk_status < 0) { dprintk("lockd: %4d callback failed (errno = %d)\n", @@ -541,6 +541,10 @@ nlmsvc_callback_exit(struct rpc_task *task) kfree(call); } +static const struct rpc_call_ops nlmsvc_callback_ops = { + .rpc_call_done = nlmsvc_callback_exit, +}; + /* * NLM Server procedures. */ diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index ae4d6b426c62..fdcf105a5303 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -354,7 +354,9 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp) return 0; argp->state = ntohl(*p++); /* Preserve the address in network byte order */ - argp->addr = *p++; + argp->addr = *p++; + argp->vers = *p++; + argp->proto = *p++; return xdr_argsize_check(rqstp, p); } diff --git a/fs/locks.c b/fs/locks.c index 250ef53d25ef..909eab8fb1d0 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -154,7 +154,7 @@ static struct file_lock *locks_alloc_lock(void) } /* Free a lock which is not in use. */ -static inline void locks_free_lock(struct file_lock *fl) +static void locks_free_lock(struct file_lock *fl) { if (fl == NULL) { BUG(); @@ -475,8 +475,7 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) /* * Check whether two locks have the same owner. */ -static inline int -posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) +static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) { if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner) return fl2->fl_lmops == fl1->fl_lmops && @@ -487,7 +486,7 @@ posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) /* Remove waiter from blocker's block list. * When blocker ends up pointing to itself then the list is empty. */ -static inline void __locks_delete_block(struct file_lock *waiter) +static void __locks_delete_block(struct file_lock *waiter) { list_del_init(&waiter->fl_block); list_del_init(&waiter->fl_link); @@ -1958,22 +1957,18 @@ EXPORT_SYMBOL(posix_block_lock); * * lockd needs to block waiting for locks. */ -void +int posix_unblock_lock(struct file *filp, struct file_lock *waiter) { - /* - * A remote machine may cancel the lock request after it's been - * granted locally. If that happens, we need to delete the lock. - */ + int status = 0; + lock_kernel(); - if (waiter->fl_next) { + if (waiter->fl_next) __locks_delete_block(waiter); - unlock_kernel(); - } else { - unlock_kernel(); - waiter->fl_type = F_UNLCK; - posix_lock_file(filp, waiter); - } + else + status = -ENOENT; + unlock_kernel(); + return status; } EXPORT_SYMBOL(posix_unblock_lock); diff --git a/fs/mpage.c b/fs/mpage.c index f1d2d02bd4c8..e431cb3878d6 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -184,7 +184,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, if (page_has_buffers(page)) goto confused; - block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); + block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = (i_size_read(inode) + blocksize - 1) >> blkbits; bh.b_page = page; @@ -466,7 +466,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, * The page has no buffers: map it to disk */ BUG_ON(!PageUptodate(page)); - block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); + block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = (i_size - 1) >> blkbits; map_bh.b_page = page; for (page_block = 0; page_block < blocks_per_page; ) { diff --git a/fs/namei.c b/fs/namei.c index 6dbbd42d8b95..0a8f073435af 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -438,7 +438,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s struct dentry * result; struct inode *dir = parent->d_inode; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); /* * First re-do the cached lookup just in case it was created * while we waited for the directory semaphore.. @@ -464,7 +464,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s else result = dentry; } - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); return result; } @@ -472,7 +472,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); if (result->d_op && result->d_op->d_revalidate) { if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { dput(result); @@ -1366,7 +1366,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) struct dentry *p; if (p1 == p2) { - down(&p1->d_inode->i_sem); + mutex_lock(&p1->d_inode->i_mutex); return NULL; } @@ -1374,30 +1374,30 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) for (p = p1; p->d_parent != p; p = p->d_parent) { if (p->d_parent == p2) { - down(&p2->d_inode->i_sem); - down(&p1->d_inode->i_sem); + mutex_lock(&p2->d_inode->i_mutex); + mutex_lock(&p1->d_inode->i_mutex); return p; } } for (p = p2; p->d_parent != p; p = p->d_parent) { if (p->d_parent == p1) { - down(&p1->d_inode->i_sem); - down(&p2->d_inode->i_sem); + mutex_lock(&p1->d_inode->i_mutex); + mutex_lock(&p2->d_inode->i_mutex); return p; } } - down(&p1->d_inode->i_sem); - down(&p2->d_inode->i_sem); + mutex_lock(&p1->d_inode->i_mutex); + mutex_lock(&p2->d_inode->i_mutex); return NULL; } void unlock_rename(struct dentry *p1, struct dentry *p2) { - up(&p1->d_inode->i_sem); + mutex_unlock(&p1->d_inode->i_mutex); if (p1 != p2) { - up(&p2->d_inode->i_sem); + mutex_unlock(&p2->d_inode->i_mutex); up(&p1->d_inode->i_sb->s_vfs_rename_sem); } } @@ -1491,7 +1491,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) if (!error) { DQUOT_INIT(inode); - error = do_truncate(dentry, 0, NULL); + error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL); } put_write_access(inode); if (error) @@ -1563,14 +1563,14 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) dir = nd->dentry; nd->flags &= ~LOOKUP_PARENT; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); path.dentry = lookup_hash(nd); path.mnt = nd->mnt; do_last: error = PTR_ERR(path.dentry); if (IS_ERR(path.dentry)) { - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); goto exit; } @@ -1579,7 +1579,7 @@ do_last: if (!IS_POSIXACL(dir->d_inode)) mode &= ~current->fs->umask; error = vfs_create(dir->d_inode, path.dentry, mode, nd); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); dput(nd->dentry); nd->dentry = path.dentry; if (error) @@ -1593,7 +1593,7 @@ do_last: /* * It already exists. */ - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); error = -EEXIST; if (flag & O_EXCL) @@ -1665,7 +1665,7 @@ do_link: goto exit; } dir = nd->dentry; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); path.dentry = lookup_hash(nd); path.mnt = nd->mnt; __putname(nd->last.name); @@ -1680,13 +1680,13 @@ do_link: * Simple function to lookup and return a dentry and create it * if it doesn't exist. Is SMP-safe. * - * Returns with nd->dentry->d_inode->i_sem locked. + * Returns with nd->dentry->d_inode->i_mutex locked. */ struct dentry *lookup_create(struct nameidata *nd, int is_dir) { struct dentry *dentry = ERR_PTR(-EEXIST); - down(&nd->dentry->d_inode->i_sem); + mutex_lock(&nd->dentry->d_inode->i_mutex); /* * Yucky last component or no last component at all? * (foo/., foo/.., /////) @@ -1784,7 +1784,7 @@ asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev) } dput(dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); path_release(&nd); out: putname(tmp); @@ -1836,7 +1836,7 @@ asmlinkage long sys_mkdir(const char __user * pathname, int mode) error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); dput(dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); path_release(&nd); out: putname(tmp); @@ -1885,7 +1885,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) DQUOT_INIT(dir); - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); dentry_unhash(dentry); if (d_mountpoint(dentry)) error = -EBUSY; @@ -1897,7 +1897,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_flags |= S_DEAD; } } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); if (!error) { d_delete(dentry); } @@ -1932,14 +1932,14 @@ asmlinkage long sys_rmdir(const char __user * pathname) error = -EBUSY; goto exit1; } - down(&nd.dentry->d_inode->i_sem); + mutex_lock(&nd.dentry->d_inode->i_mutex); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { error = vfs_rmdir(nd.dentry->d_inode, dentry); dput(dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); exit1: path_release(&nd); exit: @@ -1959,7 +1959,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) DQUOT_INIT(dir); - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); if (d_mountpoint(dentry)) error = -EBUSY; else { @@ -1967,7 +1967,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) if (!error) error = dir->i_op->unlink(dir, dentry); } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { @@ -1979,7 +1979,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) /* * Make sure that the actual truncation of the file will occur outside its - * directory's i_sem. Truncate can take a long time if there is a lot of + * directory's i_mutex. Truncate can take a long time if there is a lot of * writeout happening, and we don't want to prevent access to the directory * while waiting on the I/O. */ @@ -2001,7 +2001,7 @@ asmlinkage long sys_unlink(const char __user * pathname) error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; - down(&nd.dentry->d_inode->i_sem); + mutex_lock(&nd.dentry->d_inode->i_mutex); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { @@ -2015,7 +2015,7 @@ asmlinkage long sys_unlink(const char __user * pathname) exit2: dput(dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ exit1: @@ -2075,7 +2075,7 @@ asmlinkage long sys_symlink(const char __user * oldname, const char __user * new error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); dput(dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); path_release(&nd); out: putname(to); @@ -2113,10 +2113,10 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de if (error) return error; - down(&old_dentry->d_inode->i_sem); + mutex_lock(&old_dentry->d_inode->i_mutex); DQUOT_INIT(dir); error = dir->i_op->link(old_dentry, dir, new_dentry); - up(&old_dentry->d_inode->i_sem); + mutex_unlock(&old_dentry->d_inode->i_mutex); if (!error) fsnotify_create(dir, new_dentry->d_name.name); return error; @@ -2157,7 +2157,7 @@ asmlinkage long sys_link(const char __user * oldname, const char __user * newnam error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); dput(new_dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); out_release: path_release(&nd); out: @@ -2178,7 +2178,7 @@ exit: * sb->s_vfs_rename_sem. We might be more accurate, but that's another * story. * c) we have to lock _three_ objects - parents and victim (if it exists). - * And that - after we got ->i_sem on parents (until then we don't know + * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change * only under ->s_vfs_rename_sem _and_ that parent of the object we @@ -2195,9 +2195,9 @@ exit: * stuff into VFS), but the former is not going away. Solution: the same * trick as in rmdir(). * e) conversion from fhandle to dentry may come in the wrong moment - when - * we are removing the target. Solution: we will have to grab ->i_sem + * we are removing the target. Solution: we will have to grab ->i_mutex * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on - * ->i_sem on parents, which works but leads to some truely excessive + * ->i_mutex on parents, which works but leads to some truely excessive * locking]. */ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, @@ -2222,7 +2222,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, target = new_dentry->d_inode; if (target) { - down(&target->i_sem); + mutex_lock(&target->i_mutex); dentry_unhash(new_dentry); } if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) @@ -2232,7 +2232,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (target) { if (!error) target->i_flags |= S_DEAD; - up(&target->i_sem); + mutex_unlock(&target->i_mutex); if (d_unhashed(new_dentry)) d_rehash(new_dentry); dput(new_dentry); @@ -2255,7 +2255,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, dget(new_dentry); target = new_dentry->d_inode; if (target) - down(&target->i_sem); + mutex_lock(&target->i_mutex); if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) error = -EBUSY; else @@ -2266,7 +2266,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, d_move(old_dentry, new_dentry); } if (target) - up(&target->i_sem); + mutex_unlock(&target->i_mutex); dput(new_dentry); return error; } diff --git a/fs/namespace.c b/fs/namespace.c index 2019899f2ab8..2ca6145f43d6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -355,14 +355,14 @@ static int show_vfsmnt(struct seq_file *m, void *v) { MS_SYNCHRONOUS, ",sync" }, { MS_DIRSYNC, ",dirsync" }, { MS_MANDLOCK, ",mand" }, - { MS_NOATIME, ",noatime" }, - { MS_NODIRATIME, ",nodiratime" }, { 0, NULL } }; static struct proc_fs_info mnt_info[] = { { MNT_NOSUID, ",nosuid" }, { MNT_NODEV, ",nodev" }, { MNT_NOEXEC, ",noexec" }, + { MNT_NOATIME, ",noatime" }, + { MNT_NODIRATIME, ",nodiratime" }, { 0, NULL } }; struct proc_fs_info *fs_infop; @@ -451,7 +451,7 @@ EXPORT_SYMBOL(may_umount); void release_mounts(struct list_head *head) { struct vfsmount *mnt; - while(!list_empty(head)) { + while (!list_empty(head)) { mnt = list_entry(head->next, struct vfsmount, mnt_hash); list_del_init(&mnt->mnt_hash); if (mnt->mnt_parent != mnt) { @@ -814,7 +814,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) return -ENOTDIR; err = -ENOENT; - down(&nd->dentry->d_inode->i_sem); + mutex_lock(&nd->dentry->d_inode->i_mutex); if (IS_DEADDIR(nd->dentry->d_inode)) goto out_unlock; @@ -826,7 +826,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) err = attach_recursive_mnt(mnt, nd, NULL); out_unlock: - up(&nd->dentry->d_inode->i_sem); + mutex_unlock(&nd->dentry->d_inode->i_mutex); if (!err) security_sb_post_addmount(mnt, nd); return err; @@ -962,7 +962,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name) goto out; err = -ENOENT; - down(&nd->dentry->d_inode->i_sem); + mutex_lock(&nd->dentry->d_inode->i_mutex); if (IS_DEADDIR(nd->dentry->d_inode)) goto out1; @@ -1004,7 +1004,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name) list_del_init(&old_nd.mnt->mnt_expire); spin_unlock(&vfsmount_lock); out1: - up(&nd->dentry->d_inode->i_sem); + mutex_unlock(&nd->dentry->d_inode->i_mutex); out: up_write(&namespace_sem); if (!err) @@ -1286,7 +1286,13 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, mnt_flags |= MNT_NODEV; if (flags & MS_NOEXEC) mnt_flags |= MNT_NOEXEC; - flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE); + if (flags & MS_NOATIME) + mnt_flags |= MNT_NOATIME; + if (flags & MS_NODIRATIME) + mnt_flags |= MNT_NODIRATIME; + + flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | + MS_NOATIME | MS_NODIRATIME); /* ... and get the mountpoint */ retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); @@ -1526,6 +1532,10 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) * pointed to by put_old must yield the same directory as new_root. No other * file system may be mounted on put_old. After all, new_root is a mountpoint. * + * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem. + * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives + * in this situation. + * * Notes: * - we don't move root/cwd if they are not at the root (reason: if something * cared enough to change them, it's probably wrong to force them elsewhere) @@ -1569,7 +1579,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root, user_nd.dentry = dget(current->fs->root); read_unlock(¤t->fs->lock); down_write(&namespace_sem); - down(&old_nd.dentry->d_inode->i_sem); + mutex_lock(&old_nd.dentry->d_inode->i_mutex); error = -EINVAL; if (IS_MNT_SHARED(old_nd.mnt) || IS_MNT_SHARED(new_nd.mnt->mnt_parent) || @@ -1622,7 +1632,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root, path_release(&root_parent); path_release(&parent_nd); out2: - up(&old_nd.dentry->d_inode->i_sem); + mutex_unlock(&old_nd.dentry->d_inode->i_mutex); up_write(&namespace_sem); path_release(&user_nd); path_release(&old_nd); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index a9f7a8ab1d59..cfd76f431dc0 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -365,7 +365,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dent = list_entry(next, struct dentry, d_child); + dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget_locked(dent); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 4947d9b11fc1..973b444d6914 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -262,7 +262,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * } vfree(bouncebuffer); - inode_update_time(inode, 1); + file_update_time(file); *ppos = pos; diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 9e4dc30c2435..799e5c2bec55 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -196,7 +196,7 @@ ncp_renew_dentries(struct dentry *parent) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_child); + dentry = list_entry(next, struct dentry, d_u.d_child); if (dentry->d_fsdata == NULL) ncp_age_dentry(server, dentry); @@ -218,7 +218,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_child); + dentry = list_entry(next, struct dentry, d_u.d_child); dentry->d_fsdata = NULL; ncp_age_dentry(server, dentry); next = next->next; diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 8b3bb715d177..ec61fd56a1a9 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -13,4 +13,5 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o +nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-objs := $(nfs-y) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 30cae3602867..fcd97406a778 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -34,6 +34,7 @@ static struct nfs_callback_data nfs_callback_info; static DECLARE_MUTEX(nfs_callback_sema); static struct svc_program nfs4_callback_program; +unsigned int nfs_callback_set_tcpport; unsigned short nfs_callback_tcpport; /* @@ -98,7 +99,7 @@ int nfs_callback_up(void) if (!serv) goto out_err; /* FIXME: We don't want to register this socket with the portmapper */ - ret = svc_makesock(serv, IPPROTO_TCP, 0); + ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport); if (ret < 0) goto out_destroy; if (!list_empty(&serv->sv_permsocks)) { diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index a0db2d4f9415..b252e7fe53a5 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -65,6 +65,7 @@ extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy); extern int nfs_callback_up(void); extern int nfs_callback_down(void); +extern unsigned int nfs_callback_set_tcpport; extern unsigned short nfs_callback_tcpport; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 65f1e19e4d19..462cfceb50c5 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -35,7 +35,9 @@ unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0) goto out_iput; res->size = i_size_read(inode); - res->change_attr = NFS_CHANGE_ATTR(inode); + res->change_attr = delegation->change_attr; + if (nfsi->npages != 0) + res->change_attr++; res->ctime = inode->i_ctime; res->mtime = inode->i_mtime; res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 618a327027b3..c6f07c1c71e6 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -8,6 +8,7 @@ */ #include <linux/config.h> #include <linux/completion.h> +#include <linux/kthread.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/spinlock.h> @@ -130,6 +131,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct sizeof(delegation->stateid.data)); delegation->type = res->delegation_type; delegation->maxsize = res->maxsize; + delegation->change_attr = nfsi->change_attr; delegation->cred = get_rpccred(cred); delegation->inode = inode; @@ -157,8 +159,6 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * { int res = 0; - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); nfs_free_delegation(delegation); return res; @@ -231,6 +231,49 @@ restart: spin_unlock(&clp->cl_lock); } +int nfs_do_expire_all_delegations(void *ptr) +{ + struct nfs4_client *clp = ptr; + struct nfs_delegation *delegation; + struct inode *inode; + + allow_signal(SIGKILL); +restart: + spin_lock(&clp->cl_lock); + if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0) + goto out; + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) + goto out; + list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + inode = igrab(delegation->inode); + if (inode == NULL) + continue; + spin_unlock(&clp->cl_lock); + nfs_inode_return_delegation(inode); + iput(inode); + goto restart; + } +out: + spin_unlock(&clp->cl_lock); + nfs4_put_client(clp); + module_put_and_exit(0); +} + +void nfs_expire_all_delegations(struct nfs4_client *clp) +{ + struct task_struct *task; + + __module_get(THIS_MODULE); + atomic_inc(&clp->cl_count); + task = kthread_run(nfs_do_expire_all_delegations, clp, + "%u.%u.%u.%u-delegreturn", + NIPQUAD(clp->cl_addr)); + if (!IS_ERR(task)) + return; + nfs4_put_client(clp); + module_put(THIS_MODULE); +} + /* * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. */ diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 2fcc30de924b..7a0b2bfce771 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -21,6 +21,7 @@ struct nfs_delegation { #define NFS_DELEGATION_NEED_RECLAIM 1 long flags; loff_t maxsize; + __u64 change_attr; }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); @@ -30,6 +31,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); void nfs_return_all_delegations(struct super_block *sb); +void nfs_expire_all_delegations(struct nfs4_client *clp); void nfs_handle_cb_pathdown(struct nfs4_client *clp); void nfs_delegation_mark_reclaim(struct nfs4_client *clp); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c0d1a214572c..a1554bead692 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -194,7 +194,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) spin_unlock(&inode->i_lock); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either - * through inode->i_sem or some other mechanism. + * through inode->i_mutex or some other mechanism. */ if (page->index == 0) invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1); @@ -573,7 +573,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) { - down(&filp->f_dentry->d_inode->i_sem); + mutex_lock(&filp->f_dentry->d_inode->i_mutex); switch (origin) { case 1: offset += filp->f_pos; @@ -589,7 +589,7 @@ loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; } out: - up(&filp->f_dentry->d_inode->i_sem); + mutex_unlock(&filp->f_dentry->d_inode->i_mutex); return offset; } @@ -1001,7 +1001,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) openflags &= ~(O_CREAT|O_TRUNC); /* - * Note: we're not holding inode->i_sem and so may be racing with + * Note: we're not holding inode->i_mutex and so may be racing with * operations that change the directory. We therefore save the * change attribute *before* we do the RPC call. */ @@ -1051,7 +1051,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) return dentry; if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR)) return NULL; - /* Note: caller is already holding the dir->i_sem! */ + /* Note: caller is already holding the dir->i_mutex! */ dentry = d_alloc(parent, &name); if (dentry == NULL) return NULL; @@ -1550,8 +1550,10 @@ go_ahead: } nfs_inode_return_delegation(old_inode); - if (new_inode) + if (new_inode != NULL) { + nfs_inode_return_delegation(new_inode); d_delete(new_dentry); + } nfs_begin_data_update(old_dir); nfs_begin_data_update(new_dir); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 079228817603..10ae377e68ff 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -122,9 +122,10 @@ nfs_free_user_pages(struct page **pages, int npages, int do_dirty) { int i; for (i = 0; i < npages; i++) { - if (do_dirty) - set_page_dirty_lock(pages[i]); - page_cache_release(pages[i]); + struct page *page = pages[i]; + if (do_dirty && !PageCompound(page)) + set_page_dirty_lock(page); + page_cache_release(page); } kfree(pages); } @@ -154,6 +155,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int struct list_head *list; struct nfs_direct_req *dreq; unsigned int reads = 0; + unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); if (!dreq) @@ -167,7 +169,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int list = &dreq->list; for(;;) { - struct nfs_read_data *data = nfs_readdata_alloc(); + struct nfs_read_data *data = nfs_readdata_alloc(rpages); if (unlikely(!data)) { while (!list_empty(list)) { @@ -268,8 +270,6 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, NFS_PROTO(inode)->read_setup(data); data->task.tk_cookie = (unsigned long) inode; - data->task.tk_calldata = data; - data->task.tk_release = nfs_readdata_release; data->complete = nfs_direct_read_result; lock_kernel(); @@ -433,7 +433,7 @@ static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_writeverf first_verf; struct nfs_write_data *wdata; - wdata = nfs_writedata_alloc(); + wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); if (!wdata) return -ENOMEM; @@ -662,10 +662,10 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t .iov_len = count, }; - dprintk("nfs: direct read(%s/%s, %lu@%lu)\n", + dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, - (unsigned long) count, (unsigned long) pos); + (unsigned long) count, (long long) pos); if (!is_sync_kiocb(iocb)) goto out; @@ -718,9 +718,7 @@ out: ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { - ssize_t retval = -EINVAL; - loff_t *ppos = &iocb->ki_pos; - unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + ssize_t retval; struct file *file = iocb->ki_filp; struct nfs_open_context *ctx = (struct nfs_open_context *) file->private_data; @@ -728,35 +726,32 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, struct inode *inode = mapping->host; struct iovec iov = { .iov_base = (char __user *)buf, - .iov_len = count, }; - dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n", + dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, - file->f_dentry->d_name.name, inode->i_ino, - (unsigned long) count, (unsigned long) pos); + file->f_dentry->d_name.name, + (unsigned long) count, (long long) pos); + retval = -EINVAL; if (!is_sync_kiocb(iocb)) goto out; - if (count < 0) - goto out; - if (pos < 0) + + retval = generic_write_checks(file, &pos, &count, 0); + if (retval) goto out; - retval = -EFAULT; - if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) + + retval = -EINVAL; + if ((ssize_t) count < 0) goto out; - retval = -EFBIG; - if (limit != RLIM_INFINITY) { - if (pos >= limit) { - send_sig(SIGXFSZ, current, 0); - goto out; - } - if (count > limit - (unsigned long) pos) - count = limit - (unsigned long) pos; - } retval = 0; if (!count) goto out; + iov.iov_len = count, + + retval = -EFAULT; + if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) + goto out; retval = nfs_sync_mapping(mapping); if (retval) @@ -766,7 +761,7 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, if (mapping->nrpages) invalidate_inode_pages2(mapping); if (retval > 0) - *ppos = pos + retval; + iocb->ki_pos = pos + retval; out: return retval; diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index ffb8df91dc34..821edd30333b 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -54,7 +54,11 @@ #define IDMAP_HASH_SZ 128 +/* Default cache timeout is 10 minutes */ +unsigned int nfs_idmap_cache_timeout = 600 * HZ; + struct idmap_hashent { + unsigned long ih_expires; __u32 ih_id; int ih_namelen; char ih_name[IDMAP_NAMESZ]; @@ -149,6 +153,8 @@ idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) return NULL; + if (time_after(jiffies, he->ih_expires)) + return NULL; return he; } @@ -164,6 +170,8 @@ idmap_lookup_id(struct idmap_hashtable *h, __u32 id) struct idmap_hashent *he = idmap_id_hash(h, id); if (he->ih_id != id || he->ih_namelen == 0) return NULL; + if (time_after(jiffies, he->ih_expires)) + return NULL; return he; } @@ -192,6 +200,7 @@ idmap_update_entry(struct idmap_hashent *he, const char *name, memcpy(he->ih_name, name, namelen); he->ih_name[namelen] = '\0'; he->ih_namelen = namelen; + he->ih_expires = jiffies + nfs_idmap_cache_timeout; } /* diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 432f41cd75e6..a77ee95b7efb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -40,6 +40,7 @@ #include <asm/uaccess.h> #include "nfs4_fs.h" +#include "callback.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -221,10 +222,10 @@ nfs_calc_block_size(u64 tsize) static inline unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) { - if (bsize < 1024) - bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; - else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) - bsize = NFS_MAX_FILE_IO_BUFFER_SIZE; + if (bsize < NFS_MIN_FILE_IO_SIZE) + bsize = NFS_DEF_FILE_IO_SIZE; + else if (bsize >= NFS_MAX_FILE_IO_SIZE) + bsize = NFS_MAX_FILE_IO_SIZE; return nfs_block_bits(bsize, nrbitsp); } @@ -307,20 +308,15 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); if (server->rsize > max_rpc_payload) server->rsize = max_rpc_payload; - if (server->wsize > max_rpc_payload) - server->wsize = max_rpc_payload; - + if (server->rsize > NFS_MAX_FILE_IO_SIZE) + server->rsize = NFS_MAX_FILE_IO_SIZE; server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->rpages > NFS_READ_MAXIOV) { - server->rpages = NFS_READ_MAXIOV; - server->rsize = server->rpages << PAGE_CACHE_SHIFT; - } + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + if (server->wsize > NFS_MAX_FILE_IO_SIZE) + server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->wpages > NFS_WRITE_MAXIOV) { - server->wpages = NFS_WRITE_MAXIOV; - server->wsize = server->wpages << PAGE_CACHE_SHIFT; - } if (sb->s_blocksize == 0) sb->s_blocksize = nfs_block_bits(server->wsize, @@ -417,7 +413,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) clnt->cl_intr = 1; clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; return clnt; @@ -575,11 +570,10 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf) buf->f_namelen = server->namelen; out: unlock_kernel(); - return 0; out_err: - printk(KERN_WARNING "nfs_statfs: statfs error = %d\n", -error); + dprintk("%s: statfs error = %d\n", __FUNCTION__, -error); buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; goto out; @@ -650,10 +644,7 @@ int nfs_sync_mapping(struct address_space *mapping) if (mapping->nrpages == 0) return 0; unmap_mapping_range(mapping, 0, 0, 0); - ret = filemap_fdatawrite(mapping); - if (ret != 0) - goto out; - ret = filemap_fdatawait(mapping); + ret = filemap_write_and_wait(mapping); if (ret != 0) goto out; ret = nfs_wb_all(mapping->host); @@ -870,8 +861,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) nfs_begin_data_update(inode); /* Write all dirty data if we're changing file permissions or size */ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { - if (filemap_fdatawrite(inode->i_mapping) == 0) - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); nfs_wb_all(inode); } /* @@ -958,11 +948,22 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; - if (__IS_FLG(inode, MS_NOATIME)) - need_atime = 0; - else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + /* Flush out writes to the server in order to update c/mtime */ + nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT); + + /* + * We may force a getattr if the user cares about atime. + * + * Note that we only have to check the vfsmount flags here: + * - NFS always sets S_NOATIME by so checking it would give a + * bogus result + * - NFS never sets MS_NOATIME or MS_NODIRATIME so there is + * no point in checking those. + */ + if ((mnt->mnt_flags & MNT_NOATIME) || + ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) need_atime = 0; - /* We may force a getattr if the user cares about atime */ + if (need_atime) err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); else @@ -1252,6 +1253,33 @@ void nfs_end_data_update(struct inode *inode) atomic_dec(&nfsi->data_updates); } +static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 + && nfsi->change_attr == fattr->pre_change_attr) { + nfsi->change_attr = fattr->change_attr; + nfsi->cache_change_attribute = jiffies; + } + + /* If we have atomic WCC data, we may update some attributes */ + if ((fattr->valid & NFS_ATTR_WCC) != 0) { + if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + nfsi->cache_change_attribute = jiffies; + } + if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + nfsi->cache_change_attribute = jiffies; + } + if (inode->i_size == fattr->pre_size && nfsi->npages == 0) { + inode->i_size = fattr->size; + nfsi->cache_change_attribute = jiffies; + } + } +} + /** * nfs_check_inode_attributes - verify consistency of the inode attribute cache * @inode - pointer to inode @@ -1268,22 +1296,20 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat int data_unstable; + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); - if (fattr->valid & NFS_ATTR_FATTR_V4) { - if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 - && nfsi->change_attr == fattr->pre_change_attr) - nfsi->change_attr = fattr->change_attr; - if (nfsi->change_attr != fattr->change_attr) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (!data_unstable) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } - } + /* Do atomic weak cache consistency updates */ + nfs_wcc_update_inode(inode, fattr); - if ((fattr->valid & NFS_ATTR_FATTR) == 0) { - return 0; + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && + nfsi->change_attr != fattr->change_attr) { + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } /* Has the inode gone and changed behind our back? */ @@ -1295,14 +1321,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - /* If we have atomic WCC data, we may update some attributes */ - if ((fattr->valid & NFS_ATTR_WCC) != 0) { - if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - } - /* Verify a few of the more important attributes */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR; @@ -1410,14 +1428,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; - if (nfsi->fileid != fattr->fileid) { - printk(KERN_ERR "%s: inode number mismatch\n" - "expected (%s/0x%Lx), got (%s/0x%Lx)\n", - __FUNCTION__, - inode->i_sb->s_id, (long long)nfsi->fileid, - inode->i_sb->s_id, (long long)fattr->fileid); - goto out_err; - } + if (nfsi->fileid != fattr->fileid) + goto out_fileid; /* * Make sure the inode's type hasn't changed. @@ -1436,6 +1448,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (data_stable) nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + /* Do atomic weak cache consistency updates */ + nfs_wcc_update_inode(inode, fattr); + /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); @@ -1539,6 +1554,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfs_invalidate_inode(inode); return -ESTALE; + + out_fileid: + printk(KERN_ERR "NFS: server %s error: fileid changed\n" + "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", + NFS_SERVER(inode)->hostname, inode->i_sb->s_id, + (long long)nfsi->fileid, (long long)fattr->fileid); + goto out_err; } /* @@ -1820,25 +1842,10 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, } clnt->cl_intr = 1; clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; clp->cl_rpcclient = clnt; - clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); - if (IS_ERR(clp->cl_cred)) { - up_write(&clp->cl_sem); - err = PTR_ERR(clp->cl_cred); - clp->cl_cred = NULL; - goto out_fail; - } memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); nfs_idmap_new(clp); } - if (list_empty(&clp->cl_superblocks)) { - err = nfs4_init_client(clp); - if (err != 0) { - up_write(&clp->cl_sem); - goto out_fail; - } - } list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); if (!IS_ERR(clnt)) @@ -2033,6 +2040,35 @@ static struct file_system_type nfs4_fs_type = { .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; +static int param_set_port(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) + return -EINVAL; + *((int *)kp->arg) = num; + return 0; +} + +module_param_call(callback_tcpport, param_set_port, param_get_int, + &nfs_callback_set_tcpport, 0644); + +static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + int jif = num * HZ; + if (endp == val || *endp || num < 0 || jif < num) + return -EINVAL; + *((int *)kp->arg) = jif; + return 0; +} + +module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, + &nfs_idmap_cache_timeout, 0644); + #define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ @@ -2040,8 +2076,25 @@ static struct file_system_type nfs4_fs_type = { nfsi->delegation_state = 0; \ init_rwsem(&nfsi->rwsem); \ } while(0) -#define register_nfs4fs() register_filesystem(&nfs4_fs_type) -#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) + +static inline int register_nfs4fs(void) +{ + int ret; + + ret = nfs_register_sysctl(); + if (ret != 0) + return ret; + ret = register_filesystem(&nfs4_fs_type); + if (ret != 0) + nfs_unregister_sysctl(); + return ret; +} + +static inline void unregister_nfs4fs(void) +{ + unregister_filesystem(&nfs4_fs_type); + nfs_unregister_sysctl(); +} #else #define nfs4_init_once(nfsi) \ do { } while (0) @@ -2166,11 +2219,11 @@ out: #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif - nfs_destroy_writepagecache(); #ifdef CONFIG_NFS_DIRECTIO -out0: nfs_destroy_directcache(); +out0: #endif + nfs_destroy_writepagecache(); out1: nfs_destroy_readpagecache(); out2: diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 0e82617f2de0..db99b8f678f8 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -82,7 +82,6 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, RPC_AUTH_UNIX); if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; clnt->cl_oneshot = 1; clnt->cl_intr = 1; } diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 59049e864ca7..7fc0560c89c9 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -146,23 +146,23 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) return p; } -#define SATTR(p, attr, flag, field) \ - *p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0 static inline u32 * xdr_encode_sattr(u32 *p, struct iattr *attr) { - SATTR(p, attr, ATTR_MODE, ia_mode); - SATTR(p, attr, ATTR_UID, ia_uid); - SATTR(p, attr, ATTR_GID, ia_gid); - SATTR(p, attr, ATTR_SIZE, ia_size); + const u32 not_set = __constant_htonl(0xFFFFFFFF); + + *p++ = (attr->ia_valid & ATTR_MODE) ? htonl(attr->ia_mode) : not_set; + *p++ = (attr->ia_valid & ATTR_UID) ? htonl(attr->ia_uid) : not_set; + *p++ = (attr->ia_valid & ATTR_GID) ? htonl(attr->ia_gid) : not_set; + *p++ = (attr->ia_valid & ATTR_SIZE) ? htonl(attr->ia_size) : not_set; if (attr->ia_valid & ATTR_ATIME_SET) { p = xdr_encode_time(p, &attr->ia_atime); } else if (attr->ia_valid & ATTR_ATIME) { p = xdr_encode_current_server_time(p, &attr->ia_atime); } else { - *p++ = ~(u32) 0; - *p++ = ~(u32) 0; + *p++ = not_set; + *p++ = not_set; } if (attr->ia_valid & ATTR_MTIME_SET) { @@ -170,12 +170,11 @@ xdr_encode_sattr(u32 *p, struct iattr *attr) } else if (attr->ia_valid & ATTR_MTIME) { p = xdr_encode_current_server_time(p, &attr->ia_mtime); } else { - *p++ = ~(u32) 0; - *p++ = ~(u32) 0; + *p++ = not_set; + *p++ = not_set; } return p; } -#undef SATTR /* * NFS encode functions diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 92c870d19ccd..ed67567f0556 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -68,27 +68,39 @@ nfs3_async_handle_jukebox(struct rpc_task *task) return 1; } -/* - * Bare-bones access to getattr: this is for nfs_read_super. - */ static int -nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) +do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) { int status; dprintk("%s: call fsinfo\n", __FUNCTION__); nfs_fattr_init(info->fattr); - status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); + status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0); dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status); if (!(info->fattr->valid & NFS_ATTR_FATTR)) { - status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0); + status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); } return status; } /* + * Bare-bones access to getattr: this is for nfs_read_super. + */ +static int +nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + status = do_proc_get_root(server->client, fhandle, info); + if (status && server->client_sys != server->client) + status = do_proc_get_root(server->client_sys, fhandle, info); + return status; +} + +/* * One function for each procedure in the NFS protocol. */ static int @@ -732,19 +744,23 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); -static void -nfs3_read_done(struct rpc_task *task) +static void nfs3_read_done(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; + struct nfs_read_data *data = calldata; if (nfs3_async_handle_jukebox(task)) return; /* Call back common NFS readpage processing */ if (task->tk_status >= 0) nfs_refresh_inode(data->inode, &data->fattr); - nfs_readpage_result(task); + nfs_readpage_result(task, calldata); } +static const struct rpc_call_ops nfs3_read_ops = { + .rpc_call_done = nfs3_read_done, + .rpc_release = nfs_readdata_release, +}; + static void nfs3_proc_read_setup(struct nfs_read_data *data) { @@ -762,23 +778,26 @@ nfs3_proc_read_setup(struct nfs_read_data *data) flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs3_read_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data); rpc_call_setup(task, &msg, 0); } -static void -nfs3_write_done(struct rpc_task *task) +static void nfs3_write_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data; + struct nfs_write_data *data = calldata; if (nfs3_async_handle_jukebox(task)) return; - data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_writeback_done(task); + nfs_writeback_done(task, calldata); } +static const struct rpc_call_ops nfs3_write_ops = { + .rpc_call_done = nfs3_write_done, + .rpc_release = nfs_writedata_release, +}; + static void nfs3_proc_write_setup(struct nfs_write_data *data, int how) { @@ -806,23 +825,26 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how) flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs3_write_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data); rpc_call_setup(task, &msg, 0); } -static void -nfs3_commit_done(struct rpc_task *task) +static void nfs3_commit_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data; + struct nfs_write_data *data = calldata; if (nfs3_async_handle_jukebox(task)) return; - data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_commit_done(task); + nfs_commit_done(task, calldata); } +static const struct rpc_call_ops nfs3_commit_ops = { + .rpc_call_done = nfs3_commit_done, + .rpc_release = nfs_commit_release, +}; + static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how) { @@ -840,7 +862,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how) flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs3_commit_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data); rpc_call_setup(task, &msg, 0); } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 0498bd36602c..b6c0b5012bce 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -182,7 +182,7 @@ xdr_encode_sattr(u32 *p, struct iattr *attr) { if (attr->ia_valid & ATTR_MODE) { *p++ = xdr_one; - *p++ = htonl(attr->ia_mode); + *p++ = htonl(attr->ia_mode & S_IALLUGO); } else { *p++ = xdr_zero; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b7f262dcb6e3..0f5e4e7cddec 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -38,7 +38,8 @@ struct idmap; ((err) != NFSERR_NOFILEHANDLE)) enum nfs4_client_state { - NFS4CLNT_OK = 0, + NFS4CLNT_STATE_RECOVER = 0, + NFS4CLNT_LEASE_EXPIRED, }; /* @@ -67,7 +68,6 @@ struct nfs4_client { atomic_t cl_count; struct rpc_clnt * cl_rpcclient; - struct rpc_cred * cl_cred; struct list_head cl_superblocks; /* List of nfs_server structs */ @@ -76,7 +76,6 @@ struct nfs4_client { struct work_struct cl_renewd; struct work_struct cl_recoverd; - wait_queue_head_t cl_waitq; struct rpc_wait_queue cl_rpcwaitq; /* used for the setclientid verifier */ @@ -182,8 +181,9 @@ struct nfs4_state { nfs4_stateid stateid; - unsigned int nreaders; - unsigned int nwriters; + unsigned int n_rdonly; + unsigned int n_wronly; + unsigned int n_rdwr; int state; /* State on the server (R,W, or RW) */ atomic_t count; }; @@ -210,10 +210,10 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); /* nfs4proc.c */ extern int nfs4_map_errors(int err); -extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); -extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); -extern int nfs4_proc_async_renew(struct nfs4_client *); -extern int nfs4_proc_renew(struct nfs4_client *); +extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *); +extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *); +extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *); +extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *); extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); @@ -237,8 +237,8 @@ extern void init_nfsv4_state(struct nfs_server *); extern void destroy_nfsv4_state(struct nfs_server *); extern struct nfs4_client *nfs4_get_client(struct in_addr *); extern void nfs4_put_client(struct nfs4_client *clp); -extern int nfs4_init_client(struct nfs4_client *clp); extern struct nfs4_client *nfs4_find_client(struct in_addr *); +struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp); extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f988a9417b13..984ca3454d04 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -57,11 +57,13 @@ #define NFS4_POLL_RETRY_MIN (1*HZ) #define NFS4_POLL_RETRY_MAX (15*HZ) -static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid); +struct nfs4_opendata; +static int _nfs4_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); +static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; @@ -173,8 +175,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, kunmap_atomic(start, KM_USER0); } -static void -renew_lease(struct nfs_server *server, unsigned long timestamp) +static void renew_lease(const struct nfs_server *server, unsigned long timestamp) { struct nfs4_client *clp = server->nfs4_state; spin_lock(&clp->cl_lock); @@ -194,21 +195,123 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf spin_unlock(&inode->i_lock); } +struct nfs4_opendata { + atomic_t count; + struct nfs_openargs o_arg; + struct nfs_openres o_res; + struct nfs_open_confirmargs c_arg; + struct nfs_open_confirmres c_res; + struct nfs_fattr f_attr; + struct nfs_fattr dir_attr; + struct dentry *dentry; + struct dentry *dir; + struct nfs4_state_owner *owner; + struct iattr attrs; + unsigned long timestamp; + int rpc_status; + int cancelled; +}; + +static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, + struct nfs4_state_owner *sp, int flags, + const struct iattr *attrs) +{ + struct dentry *parent = dget_parent(dentry); + struct inode *dir = parent->d_inode; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_opendata *p; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + goto err; + p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + if (p->o_arg.seqid == NULL) + goto err_free; + atomic_set(&p->count, 1); + p->dentry = dget(dentry); + p->dir = parent; + p->owner = sp; + atomic_inc(&sp->so_count); + p->o_arg.fh = NFS_FH(dir); + p->o_arg.open_flags = flags, + p->o_arg.clientid = server->nfs4_state->cl_clientid; + p->o_arg.id = sp->so_id; + p->o_arg.name = &dentry->d_name; + p->o_arg.server = server; + p->o_arg.bitmask = server->attr_bitmask; + p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; + p->o_res.f_attr = &p->f_attr; + p->o_res.dir_attr = &p->dir_attr; + p->o_res.server = server; + nfs_fattr_init(&p->f_attr); + nfs_fattr_init(&p->dir_attr); + if (flags & O_EXCL) { + u32 *s = (u32 *) p->o_arg.u.verifier.data; + s[0] = jiffies; + s[1] = current->pid; + } else if (flags & O_CREAT) { + p->o_arg.u.attrs = &p->attrs; + memcpy(&p->attrs, attrs, sizeof(p->attrs)); + } + p->c_arg.fh = &p->o_res.fh; + p->c_arg.stateid = &p->o_res.stateid; + p->c_arg.seqid = p->o_arg.seqid; + return p; +err_free: + kfree(p); +err: + dput(parent); + return NULL; +} + +static void nfs4_opendata_free(struct nfs4_opendata *p) +{ + if (p != NULL && atomic_dec_and_test(&p->count)) { + nfs_free_seqid(p->o_arg.seqid); + nfs4_put_state_owner(p->owner); + dput(p->dir); + dput(p->dentry); + kfree(p); + } +} + /* Helper for asynchronous RPC calls */ -static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin, - rpc_action tk_exit, void *calldata) +static int nfs4_call_async(struct rpc_clnt *clnt, + const struct rpc_call_ops *tk_ops, void *calldata) { struct rpc_task *task; - if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC))) + if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata))) return -ENOMEM; - - task->tk_calldata = calldata; - task->tk_action = tk_begin; rpc_execute(task); return 0; } +static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) +{ + sigset_t oldset; + int ret; + + rpc_clnt_sigmask(task->tk_client, &oldset); + ret = rpc_wait_for_completion_task(task); + rpc_clnt_sigunmask(task->tk_client, &oldset); + return ret; +} + +static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) +{ + switch (open_flags) { + case FMODE_WRITE: + state->n_wronly++; + break; + case FMODE_READ: + state->n_rdonly++; + break; + case FMODE_READ|FMODE_WRITE: + state->n_rdwr++; + } +} + static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) { struct inode *inode = state->inode; @@ -218,41 +321,134 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, spin_lock(&state->owner->so_lock); spin_lock(&inode->i_lock); memcpy(&state->stateid, stateid, sizeof(state->stateid)); - if ((open_flags & FMODE_WRITE)) - state->nwriters++; - if (open_flags & FMODE_READ) - state->nreaders++; + update_open_stateflags(state, open_flags); nfs4_state_set_mode_locked(state, state->state | open_flags); spin_unlock(&inode->i_lock); spin_unlock(&state->owner->so_lock); } +static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) +{ + struct inode *inode; + struct nfs4_state *state = NULL; + + if (!(data->f_attr.valid & NFS_ATTR_FATTR)) + goto out; + inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); + if (inode == NULL) + goto out; + state = nfs4_get_open_state(inode, data->owner); + if (state == NULL) + goto put_inode; + update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags); +put_inode: + iput(inode); +out: + return state; +} + +static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) +{ + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_open_context *ctx; + + spin_lock(&state->inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + if (ctx->state != state) + continue; + get_nfs_open_context(ctx); + spin_unlock(&state->inode->i_lock); + return ctx; + } + spin_unlock(&state->inode->i_lock); + return ERR_PTR(-ENOENT); +} + +static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid) +{ + int ret; + + opendata->o_arg.open_flags = openflags; + ret = _nfs4_proc_open(opendata); + if (ret != 0) + return ret; + memcpy(stateid->data, opendata->o_res.stateid.data, + sizeof(stateid->data)); + return 0; +} + +static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) +{ + nfs4_stateid stateid; + struct nfs4_state *newstate; + int mode = 0; + int delegation = 0; + int ret; + + /* memory barrier prior to reading state->n_* */ + smp_rmb(); + if (state->n_rdwr != 0) { + ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid); + if (ret != 0) + return ret; + mode |= FMODE_READ|FMODE_WRITE; + if (opendata->o_res.delegation_type != 0) + delegation = opendata->o_res.delegation_type; + smp_rmb(); + } + if (state->n_wronly != 0) { + ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid); + if (ret != 0) + return ret; + mode |= FMODE_WRITE; + if (opendata->o_res.delegation_type != 0) + delegation = opendata->o_res.delegation_type; + smp_rmb(); + } + if (state->n_rdonly != 0) { + ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid); + if (ret != 0) + return ret; + mode |= FMODE_READ; + } + clear_bit(NFS_DELEGATED_STATE, &state->flags); + if (mode == 0) + return 0; + if (opendata->o_res.delegation_type == 0) + opendata->o_res.delegation_type = delegation; + opendata->o_arg.open_flags |= mode; + newstate = nfs4_opendata_to_nfs4_state(opendata); + if (newstate != NULL) { + if (opendata->o_res.delegation_type != 0) { + struct nfs_inode *nfsi = NFS_I(newstate->inode); + int delegation_flags = 0; + if (nfsi->delegation) + delegation_flags = nfsi->delegation->flags; + if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM)) + nfs_inode_set_delegation(newstate->inode, + opendata->owner->so_cred, + &opendata->o_res); + else + nfs_inode_reclaim_delegation(newstate->inode, + opendata->owner->so_cred, + &opendata->o_res); + } + nfs4_close_state(newstate, opendata->o_arg.open_flags); + } + if (newstate != state) + return -ESTALE; + return 0; +} + /* * OPEN_RECLAIM: * reclaim state on the server after a reboot. */ -static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) { - struct inode *inode = state->inode; - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_delegation *delegation = NFS_I(inode)->delegation; - struct nfs_openargs o_arg = { - .fh = NFS_FH(inode), - .id = sp->so_id, - .open_flags = state->state, - .clientid = server->nfs4_state->cl_clientid, - .claim = NFS4_OPEN_CLAIM_PREVIOUS, - .bitmask = server->attr_bitmask, - }; - struct nfs_openres o_res = { - .server = server, /* Grrr */ - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], - .rpc_argp = &o_arg, - .rpc_resp = &o_res, - .rpc_cred = sp->so_cred, - }; + struct nfs_delegation *delegation = NFS_I(state->inode)->delegation; + struct nfs4_opendata *opendata; + int delegation_type = 0; int status; if (delegation != NULL) { @@ -262,38 +458,27 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st set_bit(NFS_DELEGATED_STATE, &state->flags); return 0; } - o_arg.u.delegation_type = delegation->type; + delegation_type = delegation->type; } - o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); - if (o_arg.seqid == NULL) + opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); + if (opendata == NULL) return -ENOMEM; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* Confirm the sequence as being established */ - nfs_confirm_seqid(&sp->so_seqid, status); - nfs_increment_open_seqid(status, o_arg.seqid); - if (status == 0) { - memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); - if (o_res.delegation_type != 0) { - nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); - /* Did the server issue an immediate delegation recall? */ - if (o_res.do_recall) - nfs_async_inode_return_delegation(inode, &o_res.stateid); - } - } - nfs_free_seqid(o_arg.seqid); - clear_bit(NFS_DELEGATED_STATE, &state->flags); - /* Ensure we update the inode attributes */ - NFS_CACHEINV(inode); + opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; + opendata->o_arg.fh = NFS_FH(state->inode); + nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh); + opendata->o_arg.u.delegation_type = delegation_type; + status = nfs4_open_recover(opendata, state); + nfs4_opendata_free(opendata); return status; } -static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_open_reclaim(sp, state); + err = _nfs4_do_open_reclaim(sp, state, dentry); if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -301,63 +486,36 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta return err; } +static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs_open_context *ctx; + int ret; + + ctx = nfs4_state_find_open_context(state); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + ret = nfs4_do_open_reclaim(sp, state, ctx->dentry); + put_nfs_open_context(ctx); + return ret; +} + static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) { struct nfs4_state_owner *sp = state->owner; - struct inode *inode = dentry->d_inode; - struct nfs_server *server = NFS_SERVER(inode); - struct dentry *parent = dget_parent(dentry); - struct nfs_openargs arg = { - .fh = NFS_FH(parent->d_inode), - .clientid = server->nfs4_state->cl_clientid, - .name = &dentry->d_name, - .id = sp->so_id, - .server = server, - .bitmask = server->attr_bitmask, - .claim = NFS4_OPEN_CLAIM_DELEGATE_CUR, - }; - struct nfs_openres res = { - .server = server, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], - .rpc_argp = &arg, - .rpc_resp = &res, - .rpc_cred = sp->so_cred, - }; - int status = 0; + struct nfs4_opendata *opendata; + int ret; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) - goto out; - if (state->state == 0) - goto out; - arg.seqid = nfs_alloc_seqid(&sp->so_seqid); - status = -ENOMEM; - if (arg.seqid == NULL) - goto out; - arg.open_flags = state->state; - memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs_increment_open_seqid(status, arg.seqid); - if (status != 0) - goto out_free; - if(res.rflags & NFS4_OPEN_RESULT_CONFIRM) { - status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode), - sp, &res.stateid, arg.seqid); - if (status != 0) - goto out_free; - } - nfs_confirm_seqid(&sp->so_seqid, 0); - if (status >= 0) { - memcpy(state->stateid.data, res.stateid.data, - sizeof(state->stateid.data)); - clear_bit(NFS_DELEGATED_STATE, &state->flags); - } -out_free: - nfs_free_seqid(arg.seqid); -out: - dput(parent); - return status; + return 0; + opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); + if (opendata == NULL) + return -ENOMEM; + opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; + memcpy(opendata->o_arg.u.delegation.data, state->stateid.data, + sizeof(opendata->o_arg.u.delegation.data)); + ret = nfs4_open_recover(opendata, state); + nfs4_opendata_free(opendata); + return ret; } int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) @@ -382,82 +540,202 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) return err; } -static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid) +static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) { - struct nfs_open_confirmargs arg = { - .fh = fh, - .seqid = seqid, - .stateid = *stateid, - }; - struct nfs_open_confirmres res; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], - .rpc_argp = &arg, - .rpc_resp = &res, - .rpc_cred = sp->so_cred, + struct nfs4_opendata *data = calldata; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], + .rpc_argp = &data->c_arg, + .rpc_resp = &data->c_res, + .rpc_cred = data->owner->so_cred, }; + data->timestamp = jiffies; + rpc_call_setup(task, &msg, 0); +} + +static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_opendata *data = calldata; + + data->rpc_status = task->tk_status; + if (RPC_ASSASSINATED(task)) + return; + if (data->rpc_status == 0) { + memcpy(data->o_res.stateid.data, data->c_res.stateid.data, + sizeof(data->o_res.stateid.data)); + renew_lease(data->o_res.server, data->timestamp); + } + nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); + nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status); +} + +static void nfs4_open_confirm_release(void *calldata) +{ + struct nfs4_opendata *data = calldata; + struct nfs4_state *state = NULL; + + /* If this request hasn't been cancelled, do nothing */ + if (data->cancelled == 0) + goto out_free; + /* In case of error, no cleanup! */ + if (data->rpc_status != 0) + goto out_free; + nfs_confirm_seqid(&data->owner->so_seqid, 0); + state = nfs4_opendata_to_nfs4_state(data); + if (state != NULL) + nfs4_close_state(state, data->o_arg.open_flags); +out_free: + nfs4_opendata_free(data); +} + +static const struct rpc_call_ops nfs4_open_confirm_ops = { + .rpc_call_prepare = nfs4_open_confirm_prepare, + .rpc_call_done = nfs4_open_confirm_done, + .rpc_release = nfs4_open_confirm_release, +}; + +/* + * Note: On error, nfs4_proc_open_confirm will free the struct nfs4_opendata + */ +static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) +{ + struct nfs_server *server = NFS_SERVER(data->dir->d_inode); + struct rpc_task *task; int status; - status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); - /* Confirm the sequence as being established */ - nfs_confirm_seqid(&sp->so_seqid, status); - nfs_increment_open_seqid(status, seqid); - if (status >= 0) - memcpy(stateid, &res.stateid, sizeof(*stateid)); + atomic_inc(&data->count); + task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); + if (IS_ERR(task)) { + nfs4_opendata_free(data); + return PTR_ERR(task); + } + status = nfs4_wait_for_completion_rpc_task(task); + if (status != 0) { + data->cancelled = 1; + smp_wmb(); + } else + status = data->rpc_status; + rpc_release_task(task); return status; } -static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, struct nfs_openargs *o_arg, struct nfs_openres *o_res) +static void nfs4_open_prepare(struct rpc_task *task, void *calldata) { - struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_opendata *data = calldata; + struct nfs4_state_owner *sp = data->owner; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN], - .rpc_argp = o_arg, - .rpc_resp = o_res, + .rpc_argp = &data->o_arg, + .rpc_resp = &data->o_res, .rpc_cred = sp->so_cred, }; - int status; + + if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) + return; + /* Update sequence id. */ + data->o_arg.id = sp->so_id; + data->o_arg.clientid = sp->so_client->cl_clientid; + if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) + msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; + data->timestamp = jiffies; + rpc_call_setup(task, &msg, 0); +} - /* Update sequence id. The caller must serialize! */ - o_arg->id = sp->so_id; - o_arg->clientid = sp->so_client->cl_clientid; +static void nfs4_open_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_opendata *data = calldata; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - if (status == 0) { - /* OPEN on anything except a regular file is disallowed in NFSv4 */ - switch (o_res->f_attr->mode & S_IFMT) { + data->rpc_status = task->tk_status; + if (RPC_ASSASSINATED(task)) + return; + if (task->tk_status == 0) { + switch (data->o_res.f_attr->mode & S_IFMT) { case S_IFREG: break; case S_IFLNK: - status = -ELOOP; + data->rpc_status = -ELOOP; break; case S_IFDIR: - status = -EISDIR; + data->rpc_status = -EISDIR; break; default: - status = -ENOTDIR; + data->rpc_status = -ENOTDIR; } + renew_lease(data->o_res.server, data->timestamp); } + nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid); +} + +static void nfs4_open_release(void *calldata) +{ + struct nfs4_opendata *data = calldata; + struct nfs4_state *state = NULL; - nfs_increment_open_seqid(status, o_arg->seqid); + /* If this request hasn't been cancelled, do nothing */ + if (data->cancelled == 0) + goto out_free; + /* In case of error, no cleanup! */ + if (data->rpc_status != 0) + goto out_free; + /* In case we need an open_confirm, no cleanup! */ + if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) + goto out_free; + nfs_confirm_seqid(&data->owner->so_seqid, 0); + state = nfs4_opendata_to_nfs4_state(data); + if (state != NULL) + nfs4_close_state(state, data->o_arg.open_flags); +out_free: + nfs4_opendata_free(data); +} + +static const struct rpc_call_ops nfs4_open_ops = { + .rpc_call_prepare = nfs4_open_prepare, + .rpc_call_done = nfs4_open_done, + .rpc_release = nfs4_open_release, +}; + +/* + * Note: On error, nfs4_proc_open will free the struct nfs4_opendata + */ +static int _nfs4_proc_open(struct nfs4_opendata *data) +{ + struct inode *dir = data->dir->d_inode; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_openargs *o_arg = &data->o_arg; + struct nfs_openres *o_res = &data->o_res; + struct rpc_task *task; + int status; + + atomic_inc(&data->count); + task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); + if (IS_ERR(task)) { + nfs4_opendata_free(data); + return PTR_ERR(task); + } + status = nfs4_wait_for_completion_rpc_task(task); + if (status != 0) { + data->cancelled = 1; + smp_wmb(); + } else + status = data->rpc_status; + rpc_release_task(task); if (status != 0) - goto out; + return status; + if (o_arg->open_flags & O_CREAT) { update_changeattr(dir, &o_res->cinfo); nfs_post_op_update_inode(dir, o_res->dir_attr); } else nfs_refresh_inode(dir, o_res->dir_attr); if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { - status = _nfs4_proc_open_confirm(server->client, &o_res->fh, - sp, &o_res->stateid, o_arg->seqid); + status = _nfs4_proc_open_confirm(data); if (status != 0) - goto out; + return status; } - nfs_confirm_seqid(&sp->so_seqid, 0); + nfs_confirm_seqid(&data->owner->so_seqid, 0); if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) - status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); -out: - return status; + return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); + return 0; } static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags) @@ -488,6 +766,15 @@ out: return -EACCES; } +int nfs4_recover_expired_lease(struct nfs_server *server) +{ + struct nfs4_client *clp = server->nfs4_state; + + if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + nfs4_schedule_state_recovery(clp); + return nfs4_wait_clnt_recover(server->client, clp); +} + /* * OPEN_EXPIRED: * reclaim state on the server after a network partition. @@ -495,77 +782,31 @@ out: */ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) { - struct dentry *parent = dget_parent(dentry); - struct inode *dir = parent->d_inode; struct inode *inode = state->inode; - struct nfs_server *server = NFS_SERVER(dir); struct nfs_delegation *delegation = NFS_I(inode)->delegation; - struct nfs_fattr f_attr, dir_attr; - struct nfs_openargs o_arg = { - .fh = NFS_FH(dir), - .open_flags = state->state, - .name = &dentry->d_name, - .bitmask = server->attr_bitmask, - .claim = NFS4_OPEN_CLAIM_NULL, - }; - struct nfs_openres o_res = { - .f_attr = &f_attr, - .dir_attr = &dir_attr, - .server = server, - }; - int status = 0; + struct nfs4_opendata *opendata; + int openflags = state->state & (FMODE_READ|FMODE_WRITE); + int ret; if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { - status = _nfs4_do_access(inode, sp->so_cred, state->state); - if (status < 0) - goto out; + ret = _nfs4_do_access(inode, sp->so_cred, openflags); + if (ret < 0) + return ret; memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid)); set_bit(NFS_DELEGATED_STATE, &state->flags); - goto out; + return 0; } - o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); - status = -ENOMEM; - if (o_arg.seqid == NULL) - goto out; - nfs_fattr_init(&f_attr); - nfs_fattr_init(&dir_attr); - status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); - if (status != 0) - goto out_nodeleg; - /* Check if files differ */ - if ((f_attr.mode & S_IFMT) != (inode->i_mode & S_IFMT)) - goto out_stale; - /* Has the file handle changed? */ - if (nfs_compare_fh(&o_res.fh, NFS_FH(inode)) != 0) { - /* Verify if the change attributes are the same */ - if (f_attr.change_attr != NFS_I(inode)->change_attr) - goto out_stale; - if (nfs_size_to_loff_t(f_attr.size) != inode->i_size) - goto out_stale; - /* Lets just pretend that this is the same file */ - nfs_copy_fh(NFS_FH(inode), &o_res.fh); - NFS_I(inode)->fileid = f_attr.fileid; - } - memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); - if (o_res.delegation_type != 0) { - if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) - nfs_inode_set_delegation(inode, sp->so_cred, &o_res); - else - nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); + opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL); + if (opendata == NULL) + return -ENOMEM; + ret = nfs4_open_recover(opendata, state); + if (ret == -ESTALE) { + /* Invalidate the state owner so we don't ever use it again */ + nfs4_drop_state_owner(sp); + d_drop(dentry); } -out_nodeleg: - nfs_free_seqid(o_arg.seqid); - clear_bit(NFS_DELEGATED_STATE, &state->flags); -out: - dput(parent); - return status; -out_stale: - status = -ESTALE; - /* Invalidate the state owner so we don't ever use it again */ - nfs4_drop_state_owner(sp); - d_drop(dentry); - /* Should we be trying to close that stateid? */ - goto out_nodeleg; + nfs4_opendata_free(opendata); + return ret; } static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) @@ -584,26 +825,19 @@ static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { - struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_open_context *ctx; - int status; + int ret; - spin_lock(&state->inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { - if (ctx->state != state) - continue; - get_nfs_open_context(ctx); - spin_unlock(&state->inode->i_lock); - status = nfs4_do_open_expired(sp, state, ctx->dentry); - put_nfs_open_context(ctx); - return status; - } - spin_unlock(&state->inode->i_lock); - return -ENOENT; + ctx = nfs4_state_find_open_context(state); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + ret = nfs4_do_open_expired(sp, state, ctx->dentry); + put_nfs_open_context(ctx); + return ret; } /* - * Returns an nfs4_state + an extra reference to the inode + * Returns a referenced nfs4_state if there is an open delegation on the file */ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) { @@ -616,6 +850,14 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred int open_flags = flags & (FMODE_READ|FMODE_WRITE); int err; + err = -ENOMEM; + if (!(sp = nfs4_get_state_owner(server, cred))) { + dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); + return err; + } + err = nfs4_recover_expired_lease(server); + if (err != 0) + goto out_put_state_owner; /* Protect against reboot recovery - NOTE ORDER! */ down_read(&clp->cl_sem); /* Protect against delegation recall */ @@ -625,10 +867,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred if (delegation == NULL || (delegation->type & open_flags) != open_flags) goto out_err; err = -ENOMEM; - if (!(sp = nfs4_get_state_owner(server, cred))) { - dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); - goto out_err; - } state = nfs4_get_open_state(inode, sp); if (state == NULL) goto out_err; @@ -636,39 +874,34 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred err = -ENOENT; if ((state->state & open_flags) == open_flags) { spin_lock(&inode->i_lock); - if (open_flags & FMODE_READ) - state->nreaders++; - if (open_flags & FMODE_WRITE) - state->nwriters++; + update_open_stateflags(state, open_flags); spin_unlock(&inode->i_lock); goto out_ok; } else if (state->state != 0) - goto out_err; + goto out_put_open_state; lock_kernel(); err = _nfs4_do_access(inode, cred, open_flags); unlock_kernel(); if (err != 0) - goto out_err; + goto out_put_open_state; set_bit(NFS_DELEGATED_STATE, &state->flags); update_open_stateid(state, &delegation->stateid, open_flags); out_ok: nfs4_put_state_owner(sp); up_read(&nfsi->rwsem); up_read(&clp->cl_sem); - igrab(inode); *res = state; - return 0; + return 0; +out_put_open_state: + nfs4_put_open_state(state); out_err: - if (sp != NULL) { - if (state != NULL) - nfs4_put_open_state(state); - nfs4_put_state_owner(sp); - } up_read(&nfsi->rwsem); up_read(&clp->cl_sem); if (err != -EACCES) nfs_inode_return_delegation(inode); +out_put_state_owner: + nfs4_put_state_owner(sp); return err; } @@ -689,7 +922,7 @@ static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, st } /* - * Returns an nfs4_state + an referenced inode + * Returns a referenced nfs4_state */ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) { @@ -697,73 +930,46 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_client *clp = server->nfs4_state; - struct inode *inode = NULL; + struct nfs4_opendata *opendata; int status; - struct nfs_fattr f_attr, dir_attr; - struct nfs_openargs o_arg = { - .fh = NFS_FH(dir), - .open_flags = flags, - .name = &dentry->d_name, - .server = server, - .bitmask = server->attr_bitmask, - .claim = NFS4_OPEN_CLAIM_NULL, - }; - struct nfs_openres o_res = { - .f_attr = &f_attr, - .dir_attr = &dir_attr, - .server = server, - }; /* Protect against reboot recovery conflicts */ - down_read(&clp->cl_sem); status = -ENOMEM; if (!(sp = nfs4_get_state_owner(server, cred))) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); goto out_err; } - if (flags & O_EXCL) { - u32 *p = (u32 *) o_arg.u.verifier.data; - p[0] = jiffies; - p[1] = current->pid; - } else - o_arg.u.attrs = sattr; - /* Serialization for the sequence id */ + status = nfs4_recover_expired_lease(server); + if (status != 0) + goto err_put_state_owner; + down_read(&clp->cl_sem); + status = -ENOMEM; + opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); + if (opendata == NULL) + goto err_put_state_owner; - o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); - if (o_arg.seqid == NULL) - return -ENOMEM; - nfs_fattr_init(&f_attr); - nfs_fattr_init(&dir_attr); - status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); + status = _nfs4_proc_open(opendata); if (status != 0) - goto out_err; + goto err_opendata_free; status = -ENOMEM; - inode = nfs_fhget(dir->i_sb, &o_res.fh, &f_attr); - if (!inode) - goto out_err; - state = nfs4_get_open_state(inode, sp); - if (!state) - goto out_err; - update_open_stateid(state, &o_res.stateid, flags); - if (o_res.delegation_type != 0) - nfs_inode_set_delegation(inode, cred, &o_res); - nfs_free_seqid(o_arg.seqid); + state = nfs4_opendata_to_nfs4_state(opendata); + if (state == NULL) + goto err_opendata_free; + if (opendata->o_res.delegation_type != 0) + nfs_inode_set_delegation(state->inode, cred, &opendata->o_res); + nfs4_opendata_free(opendata); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); *res = state; return 0; +err_opendata_free: + nfs4_opendata_free(opendata); +err_put_state_owner: + nfs4_put_state_owner(sp); out_err: - if (sp != NULL) { - if (state != NULL) - nfs4_put_open_state(state); - nfs_free_seqid(o_arg.seqid); - nfs4_put_state_owner(sp); - } /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ up_read(&clp->cl_sem); - if (inode != NULL) - iput(inode); *res = NULL; return status; } @@ -830,6 +1036,7 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, .rpc_argp = &arg, .rpc_resp = &res, }; + unsigned long timestamp = jiffies; int status; nfs_fattr_init(fattr); @@ -841,6 +1048,8 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); status = rpc_call_sync(server->client, &msg, 0); + if (status == 0 && state != NULL) + renew_lease(server, timestamp); return status; } @@ -865,12 +1074,13 @@ struct nfs4_closedata { struct nfs_closeargs arg; struct nfs_closeres res; struct nfs_fattr fattr; + unsigned long timestamp; }; -static void nfs4_free_closedata(struct nfs4_closedata *calldata) +static void nfs4_free_closedata(void *data) { - struct nfs4_state *state = calldata->state; - struct nfs4_state_owner *sp = state->owner; + struct nfs4_closedata *calldata = data; + struct nfs4_state_owner *sp = calldata->state->owner; nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); @@ -878,12 +1088,14 @@ static void nfs4_free_closedata(struct nfs4_closedata *calldata) kfree(calldata); } -static void nfs4_close_done(struct rpc_task *task) +static void nfs4_close_done(struct rpc_task *task, void *data) { - struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; + struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct nfs_server *server = NFS_SERVER(calldata->inode); + if (RPC_ASSASSINATED(task)) + return; /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ @@ -892,6 +1104,7 @@ static void nfs4_close_done(struct rpc_task *task) case 0: memcpy(&state->stateid, &calldata->res.stateid, sizeof(state->stateid)); + renew_lease(server, calldata->timestamp); break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: @@ -904,12 +1117,11 @@ static void nfs4_close_done(struct rpc_task *task) } } nfs_refresh_inode(calldata->inode, calldata->res.fattr); - nfs4_free_closedata(calldata); } -static void nfs4_close_begin(struct rpc_task *task) +static void nfs4_close_prepare(struct rpc_task *task, void *data) { - struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; + struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], @@ -918,10 +1130,8 @@ static void nfs4_close_begin(struct rpc_task *task) .rpc_cred = state->owner->so_cred, }; int mode = 0, old_mode; - int status; - status = nfs_wait_on_sequence(calldata->arg.seqid, task); - if (status != 0) + if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; /* Recalculate the new open mode in case someone reopened the file * while we were waiting in line to be scheduled. @@ -929,26 +1139,34 @@ static void nfs4_close_begin(struct rpc_task *task) spin_lock(&state->owner->so_lock); spin_lock(&calldata->inode->i_lock); mode = old_mode = state->state; - if (state->nreaders == 0) - mode &= ~FMODE_READ; - if (state->nwriters == 0) - mode &= ~FMODE_WRITE; + if (state->n_rdwr == 0) { + if (state->n_rdonly == 0) + mode &= ~FMODE_READ; + if (state->n_wronly == 0) + mode &= ~FMODE_WRITE; + } nfs4_state_set_mode_locked(state, mode); spin_unlock(&calldata->inode->i_lock); spin_unlock(&state->owner->so_lock); if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) { - nfs4_free_closedata(calldata); - task->tk_exit = NULL; - rpc_exit(task, 0); + /* Note: exit _without_ calling nfs4_close_done */ + task->tk_action = NULL; return; } nfs_fattr_init(calldata->res.fattr); if (mode != 0) msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; calldata->arg.open_flags = mode; + calldata->timestamp = jiffies; rpc_call_setup(task, &msg, 0); } +static const struct rpc_call_ops nfs4_close_ops = { + .rpc_call_prepare = nfs4_close_prepare, + .rpc_call_done = nfs4_close_done, + .rpc_release = nfs4_free_closedata, +}; + /* * It is possible for data to be read/written from a mem-mapped file * after the sys_close call (which hits the vfs layer as a flush). @@ -981,8 +1199,7 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state) calldata->res.fattr = &calldata->fattr; calldata->res.server = server; - status = nfs4_call_async(server->client, nfs4_close_begin, - nfs4_close_done, calldata); + status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); if (status == 0) goto out; @@ -1034,7 +1251,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) d_add(dentry, NULL); return (struct dentry *)state; } - res = d_add_unique(dentry, state->inode); + res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) dentry = res; nfs4_intent_set_file(nd, dentry, state); @@ -1046,7 +1263,6 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st { struct rpc_cred *cred; struct nfs4_state *state; - struct inode *inode; cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); if (IS_ERR(cred)) @@ -1070,9 +1286,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st } goto out_drop; } - inode = state->inode; - iput(inode); - if (inode == dentry->d_inode) { + if (state->inode == dentry->d_inode) { nfs4_intent_set_file(nd, dentry, state); return 1; } @@ -1508,11 +1722,13 @@ static int _nfs4_proc_write(struct nfs_write_data *wdata) wdata->args.bitmask = server->attr_bitmask; wdata->res.server = server; + wdata->timestamp = jiffies; nfs_fattr_init(fattr); status = rpc_call_sync(server->client, &msg, rpcflags); dprintk("NFS reply write: %d\n", status); if (status < 0) return status; + renew_lease(server, wdata->timestamp); nfs_post_op_update_inode(inode, fattr); return wdata->res.count; } @@ -1547,8 +1763,11 @@ static int _nfs4_proc_commit(struct nfs_write_data *cdata) cdata->args.bitmask = server->attr_bitmask; cdata->res.server = server; + cdata->timestamp = jiffies; nfs_fattr_init(fattr); status = rpc_call_sync(server->client, &msg, 0); + if (status >= 0) + renew_lease(server, cdata->timestamp); dprintk("NFS reply commit: %d\n", status); if (status >= 0) nfs_post_op_update_inode(inode, fattr); @@ -1601,7 +1820,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = PTR_ERR(state); goto out; } - d_instantiate(dentry, state->inode); + d_instantiate(dentry, igrab(state->inode)); if (flags & O_EXCL) { struct nfs_fattr fattr; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, @@ -2125,10 +2344,9 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -static void -nfs4_read_done(struct rpc_task *task) +static void nfs4_read_done(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; + struct nfs_read_data *data = calldata; struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { @@ -2138,9 +2356,14 @@ nfs4_read_done(struct rpc_task *task) if (task->tk_status > 0) renew_lease(NFS_SERVER(inode), data->timestamp); /* Call back common NFS readpage processing */ - nfs_readpage_result(task); + nfs_readpage_result(task, calldata); } +static const struct rpc_call_ops nfs4_read_ops = { + .rpc_call_done = nfs4_read_done, + .rpc_release = nfs_readdata_release, +}; + static void nfs4_proc_read_setup(struct nfs_read_data *data) { @@ -2160,14 +2383,13 @@ nfs4_proc_read_setup(struct nfs_read_data *data) flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data); rpc_call_setup(task, &msg, 0); } -static void -nfs4_write_done(struct rpc_task *task) +static void nfs4_write_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data = calldata; struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { @@ -2179,9 +2401,14 @@ nfs4_write_done(struct rpc_task *task) nfs_post_op_update_inode(inode, data->res.fattr); } /* Call back common NFS writeback processing */ - nfs_writeback_done(task); + nfs_writeback_done(task, calldata); } +static const struct rpc_call_ops nfs4_write_ops = { + .rpc_call_done = nfs4_write_done, + .rpc_release = nfs_writedata_release, +}; + static void nfs4_proc_write_setup(struct nfs_write_data *data, int how) { @@ -2214,14 +2441,13 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how) flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data); rpc_call_setup(task, &msg, 0); } -static void -nfs4_commit_done(struct rpc_task *task) +static void nfs4_commit_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data = calldata; struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { @@ -2231,9 +2457,14 @@ nfs4_commit_done(struct rpc_task *task) if (task->tk_status >= 0) nfs_post_op_update_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ - nfs_commit_done(task); + nfs_commit_done(task, calldata); } +static const struct rpc_call_ops nfs4_commit_ops = { + .rpc_call_done = nfs4_commit_done, + .rpc_release = nfs_commit_release, +}; + static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how) { @@ -2255,7 +2486,7 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how) flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data); rpc_call_setup(task, &msg, 0); } @@ -2263,11 +2494,10 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how) * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special * standalone procedure for queueing an asynchronous RENEW. */ -static void -renew_done(struct rpc_task *task) +static void nfs4_renew_done(struct rpc_task *task, void *data) { struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; - unsigned long timestamp = (unsigned long)task->tk_calldata; + unsigned long timestamp = (unsigned long)data; if (task->tk_status < 0) { switch (task->tk_status) { @@ -2284,26 +2514,28 @@ renew_done(struct rpc_task *task) spin_unlock(&clp->cl_lock); } -int -nfs4_proc_async_renew(struct nfs4_client *clp) +static const struct rpc_call_ops nfs4_renew_ops = { + .rpc_call_done = nfs4_renew_done, +}; + +int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], .rpc_argp = clp, - .rpc_cred = clp->cl_cred, + .rpc_cred = cred, }; return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, - renew_done, (void *)jiffies); + &nfs4_renew_ops, (void *)jiffies); } -int -nfs4_proc_renew(struct nfs4_client *clp) +int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], .rpc_argp = clp, - .rpc_cred = clp->cl_cred, + .rpc_cred = cred, }; unsigned long now = jiffies; int status; @@ -2519,7 +2751,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) case -NFS4ERR_EXPIRED: rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); nfs4_schedule_state_recovery(clp); - if (test_bit(NFS4CLNT_OK, &clp->cl_state)) + if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) rpc_wake_up_task(task); task->tk_status = 0; return -EAGAIN; @@ -2536,25 +2768,25 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) return 0; } +static int nfs4_wait_bit_interruptible(void *word) +{ + if (signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) { - DEFINE_WAIT(wait); sigset_t oldset; - int interruptible, res = 0; + int res; might_sleep(); rpc_clnt_sigmask(clnt, &oldset); - interruptible = TASK_UNINTERRUPTIBLE; - if (clnt->cl_intr) - interruptible = TASK_INTERRUPTIBLE; - prepare_to_wait(&clp->cl_waitq, &wait, interruptible); - nfs4_schedule_state_recovery(clp); - if (clnt->cl_intr && signalled()) - res = -ERESTARTSYS; - else if (!test_bit(NFS4CLNT_OK, &clp->cl_state)) - schedule(); - finish_wait(&clp->cl_waitq, &wait); + res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER, + nfs4_wait_bit_interruptible, + TASK_INTERRUPTIBLE); rpc_clnt_sigunmask(clnt, &oldset); return res; } @@ -2597,6 +2829,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(clp); ret = nfs4_wait_clnt_recover(server->client, clp); if (ret == 0) exception->retry = 1; @@ -2613,7 +2846,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct return nfs4_map_errors(ret); } -int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port) +int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) { nfs4_verifier sc_verifier; struct nfs4_setclientid setclientid = { @@ -2624,7 +2857,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], .rpc_argp = &setclientid, .rpc_resp = clp, - .rpc_cred = clp->cl_cred, + .rpc_cred = cred, }; u32 *p; int loop = 0; @@ -2638,7 +2871,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p setclientid.sc_name_len = scnprintf(setclientid.sc_name, sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u", clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr), - clp->cl_cred->cr_ops->cr_name, + cred->cr_ops->cr_name, clp->cl_id_uniquifier); setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, sizeof(setclientid.sc_netid), "tcp"); @@ -2661,14 +2894,14 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p } int -nfs4_proc_setclientid_confirm(struct nfs4_client *clp) +nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) { struct nfs_fsinfo fsinfo; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], .rpc_argp = clp, .rpc_resp = &fsinfo, - .rpc_cred = clp->cl_cred, + .rpc_cred = cred, }; unsigned long now; int status; @@ -2679,24 +2912,92 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp) spin_lock(&clp->cl_lock); clp->cl_lease_time = fsinfo.lease_time * HZ; clp->cl_last_renewal = now; + clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); spin_unlock(&clp->cl_lock); } return status; } -static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) +struct nfs4_delegreturndata { + struct nfs4_delegreturnargs args; + struct nfs4_delegreturnres res; + struct nfs_fh fh; + nfs4_stateid stateid; + struct rpc_cred *cred; + unsigned long timestamp; + struct nfs_fattr fattr; + int rpc_status; +}; + +static void nfs4_delegreturn_prepare(struct rpc_task *task, void *calldata) { - struct nfs4_delegreturnargs args = { - .fhandle = NFS_FH(inode), - .stateid = stateid, - }; + struct nfs4_delegreturndata *data = calldata; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], - .rpc_argp = &args, - .rpc_cred = cred, + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, }; + nfs_fattr_init(data->res.fattr); + rpc_call_setup(task, &msg, 0); +} - return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); +static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_delegreturndata *data = calldata; + data->rpc_status = task->tk_status; + if (data->rpc_status == 0) + renew_lease(data->res.server, data->timestamp); +} + +static void nfs4_delegreturn_release(void *calldata) +{ + struct nfs4_delegreturndata *data = calldata; + + put_rpccred(data->cred); + kfree(calldata); +} + +const static struct rpc_call_ops nfs4_delegreturn_ops = { + .rpc_call_prepare = nfs4_delegreturn_prepare, + .rpc_call_done = nfs4_delegreturn_done, + .rpc_release = nfs4_delegreturn_release, +}; + +static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) +{ + struct nfs4_delegreturndata *data; + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_task *task; + int status; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + data->args.fhandle = &data->fh; + data->args.stateid = &data->stateid; + data->args.bitmask = server->attr_bitmask; + nfs_copy_fh(&data->fh, NFS_FH(inode)); + memcpy(&data->stateid, stateid, sizeof(data->stateid)); + data->res.fattr = &data->fattr; + data->res.server = server; + data->cred = get_rpccred(cred); + data->timestamp = jiffies; + data->rpc_status = 0; + + task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data); + if (IS_ERR(task)) { + nfs4_delegreturn_release(data); + return PTR_ERR(task); + } + status = nfs4_wait_for_completion_rpc_task(task); + if (status == 0) { + status = data->rpc_status; + if (status == 0) + nfs_post_op_update_inode(inode, &data->fattr); + } + rpc_release_task(task); + return status; } int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) @@ -2734,43 +3035,17 @@ nfs4_set_lock_task_retry(unsigned long timeout) return timeout; } -static inline int -nfs4_lck_type(int cmd, struct file_lock *request) -{ - /* set lock type */ - switch (request->fl_type) { - case F_RDLCK: - return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT; - case F_WRLCK: - return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT; - case F_UNLCK: - return NFS4_WRITE_LT; - } - BUG(); - return 0; -} - -static inline uint64_t -nfs4_lck_length(struct file_lock *request) -{ - if (request->fl_end == OFFSET_MAX) - return ~(uint64_t)0; - return request->fl_end - request->fl_start + 1; -} - static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); struct nfs4_client *clp = server->nfs4_state; - struct nfs_lockargs arg = { + struct nfs_lockt_args arg = { .fh = NFS_FH(inode), - .type = nfs4_lck_type(cmd, request), - .offset = request->fl_start, - .length = nfs4_lck_length(request), + .fl = request, }; - struct nfs_lockres res = { - .server = server, + struct nfs_lockt_res res = { + .denied = request, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT], @@ -2778,36 +3053,23 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock .rpc_resp = &res, .rpc_cred = state->owner->so_cred, }; - struct nfs_lowner nlo; struct nfs4_lock_state *lsp; int status; down_read(&clp->cl_sem); - nlo.clientid = clp->cl_clientid; + arg.lock_owner.clientid = clp->cl_clientid; status = nfs4_set_lock_state(state, request); if (status != 0) goto out; lsp = request->fl_u.nfs4_fl.owner; - nlo.id = lsp->ls_id; - arg.u.lockt = &nlo; + arg.lock_owner.id = lsp->ls_id; status = rpc_call_sync(server->client, &msg, 0); - if (!status) { - request->fl_type = F_UNLCK; - } else if (status == -NFS4ERR_DENIED) { - int64_t len, start, end; - start = res.u.denied.offset; - len = res.u.denied.length; - end = start + len - 1; - if (end < 0 || len == 0) - request->fl_end = OFFSET_MAX; - else - request->fl_end = (loff_t)end; - request->fl_start = (loff_t)start; - request->fl_type = F_WRLCK; - if (res.u.denied.type & 1) - request->fl_type = F_RDLCK; - request->fl_pid = 0; - status = 0; + switch (status) { + case 0: + request->fl_type = F_UNLCK; + break; + case -NFS4ERR_DENIED: + status = 0; } out: up_read(&clp->cl_sem); @@ -2847,196 +3109,314 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) } struct nfs4_unlockdata { - struct nfs_lockargs arg; - struct nfs_locku_opargs luargs; - struct nfs_lockres res; + struct nfs_locku_args arg; + struct nfs_locku_res res; struct nfs4_lock_state *lsp; struct nfs_open_context *ctx; - atomic_t refcount; - struct completion completion; + struct file_lock fl; + const struct nfs_server *server; + unsigned long timestamp; }; -static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata) -{ - if (atomic_dec_and_test(&calldata->refcount)) { - nfs_free_seqid(calldata->luargs.seqid); - nfs4_put_lock_state(calldata->lsp); - put_nfs_open_context(calldata->ctx); - kfree(calldata); - } +static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, + struct nfs_open_context *ctx, + struct nfs4_lock_state *lsp, + struct nfs_seqid *seqid) +{ + struct nfs4_unlockdata *p; + struct inode *inode = lsp->ls_state->inode; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return NULL; + p->arg.fh = NFS_FH(inode); + p->arg.fl = &p->fl; + p->arg.seqid = seqid; + p->arg.stateid = &lsp->ls_stateid; + p->lsp = lsp; + atomic_inc(&lsp->ls_count); + /* Ensure we don't close file until we're done freeing locks! */ + p->ctx = get_nfs_open_context(ctx); + memcpy(&p->fl, fl, sizeof(p->fl)); + p->server = NFS_SERVER(inode); + return p; } -static void nfs4_locku_complete(struct nfs4_unlockdata *calldata) +static void nfs4_locku_release_calldata(void *data) { - complete(&calldata->completion); - nfs4_locku_release_calldata(calldata); + struct nfs4_unlockdata *calldata = data; + nfs_free_seqid(calldata->arg.seqid); + nfs4_put_lock_state(calldata->lsp); + put_nfs_open_context(calldata->ctx); + kfree(calldata); } -static void nfs4_locku_done(struct rpc_task *task) +static void nfs4_locku_done(struct rpc_task *task, void *data) { - struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; + struct nfs4_unlockdata *calldata = data; - nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid); + if (RPC_ASSASSINATED(task)) + return; + nfs_increment_lock_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: memcpy(calldata->lsp->ls_stateid.data, - calldata->res.u.stateid.data, + calldata->res.stateid.data, sizeof(calldata->lsp->ls_stateid.data)); + renew_lease(calldata->server, calldata->timestamp); break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(calldata->res.server->nfs4_state); + nfs4_schedule_state_recovery(calldata->server->nfs4_state); break; default: - if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) { + if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) { rpc_restart_call(task); - return; } } - nfs4_locku_complete(calldata); } -static void nfs4_locku_begin(struct rpc_task *task) +static void nfs4_locku_prepare(struct rpc_task *task, void *data) { - struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; + struct nfs4_unlockdata *calldata = data; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], .rpc_argp = &calldata->arg, .rpc_resp = &calldata->res, .rpc_cred = calldata->lsp->ls_state->owner->so_cred, }; - int status; - status = nfs_wait_on_sequence(calldata->luargs.seqid, task); - if (status != 0) + if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { - nfs4_locku_complete(calldata); - task->tk_exit = NULL; - rpc_exit(task, 0); + /* Note: exit _without_ running nfs4_locku_done */ + task->tk_action = NULL; return; } + calldata->timestamp = jiffies; rpc_call_setup(task, &msg, 0); } +static const struct rpc_call_ops nfs4_locku_ops = { + .rpc_call_prepare = nfs4_locku_prepare, + .rpc_call_done = nfs4_locku_done, + .rpc_release = nfs4_locku_release_calldata, +}; + +static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, + struct nfs_open_context *ctx, + struct nfs4_lock_state *lsp, + struct nfs_seqid *seqid) +{ + struct nfs4_unlockdata *data; + struct rpc_task *task; + + data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); + if (data == NULL) { + nfs_free_seqid(seqid); + return ERR_PTR(-ENOMEM); + } + + /* Unlock _before_ we do the RPC call */ + do_vfs_lock(fl->fl_file, fl); + task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); + if (IS_ERR(task)) + nfs4_locku_release_calldata(data); + return task; +} + static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs4_unlockdata *calldata; - struct inode *inode = state->inode; - struct nfs_server *server = NFS_SERVER(inode); + struct nfs_seqid *seqid; struct nfs4_lock_state *lsp; - int status; + struct rpc_task *task; + int status = 0; /* Is this a delegated lock? */ if (test_bit(NFS_DELEGATED_STATE, &state->flags)) - return do_vfs_lock(request->fl_file, request); + goto out_unlock; + /* Is this open_owner holding any locks on the server? */ + if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) + goto out_unlock; status = nfs4_set_lock_state(state, request); if (status != 0) - return status; + goto out_unlock; lsp = request->fl_u.nfs4_fl.owner; - /* We might have lost the locks! */ - if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) - return 0; - calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); - if (calldata == NULL) - return -ENOMEM; - calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); - if (calldata->luargs.seqid == NULL) { - kfree(calldata); - return -ENOMEM; - } - calldata->luargs.stateid = &lsp->ls_stateid; - calldata->arg.fh = NFS_FH(inode); - calldata->arg.type = nfs4_lck_type(cmd, request); - calldata->arg.offset = request->fl_start; - calldata->arg.length = nfs4_lck_length(request); - calldata->arg.u.locku = &calldata->luargs; - calldata->res.server = server; - calldata->lsp = lsp; - atomic_inc(&lsp->ls_count); - - /* Ensure we don't close file until we're done freeing locks! */ - calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data); - - atomic_set(&calldata->refcount, 2); - init_completion(&calldata->completion); - - status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin, - nfs4_locku_done, calldata); - if (status == 0) - wait_for_completion_interruptible(&calldata->completion); + status = -ENOMEM; + seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (seqid == NULL) + goto out_unlock; + task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); + status = PTR_ERR(task); + if (IS_ERR(task)) + goto out_unlock; + status = nfs4_wait_for_completion_rpc_task(task); + rpc_release_task(task); + return status; +out_unlock: do_vfs_lock(request->fl_file, request); - nfs4_locku_release_calldata(calldata); return status; } -static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim) +struct nfs4_lockdata { + struct nfs_lock_args arg; + struct nfs_lock_res res; + struct nfs4_lock_state *lsp; + struct nfs_open_context *ctx; + struct file_lock fl; + unsigned long timestamp; + int rpc_status; + int cancelled; +}; + +static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, + struct nfs_open_context *ctx, struct nfs4_lock_state *lsp) { - struct inode *inode = state->inode; + struct nfs4_lockdata *p; + struct inode *inode = lsp->ls_state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; - struct nfs_lock_opargs largs = { - .lock_stateid = &lsp->ls_stateid, - .open_stateid = &state->stateid, - .lock_owner = { - .clientid = server->nfs4_state->cl_clientid, - .id = lsp->ls_id, - }, - .reclaim = reclaim, - }; - struct nfs_lockargs arg = { - .fh = NFS_FH(inode), - .type = nfs4_lck_type(cmd, request), - .offset = request->fl_start, - .length = nfs4_lck_length(request), - .u = { - .lock = &largs, - }, - }; - struct nfs_lockres res = { - .server = server, - }; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return NULL; + + p->arg.fh = NFS_FH(inode); + p->arg.fl = &p->fl; + p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (p->arg.lock_seqid == NULL) + goto out_free; + p->arg.lock_stateid = &lsp->ls_stateid; + p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid; + p->arg.lock_owner.id = lsp->ls_id; + p->lsp = lsp; + atomic_inc(&lsp->ls_count); + p->ctx = get_nfs_open_context(ctx); + memcpy(&p->fl, fl, sizeof(p->fl)); + return p; +out_free: + kfree(p); + return NULL; +} + +static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_lockdata *data = calldata; + struct nfs4_state *state = data->lsp->ls_state; + struct nfs4_state_owner *sp = state->owner; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], - .rpc_argp = &arg, - .rpc_resp = &res, - .rpc_cred = state->owner->so_cred, + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], + .rpc_argp = &data->arg, + .rpc_resp = &data->res, + .rpc_cred = sp->so_cred, }; - int status = -ENOMEM; - - largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); - if (largs.lock_seqid == NULL) - return -ENOMEM; - if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) { - struct nfs4_state_owner *owner = state->owner; - largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid); - if (largs.open_seqid == NULL) + if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) + return; + dprintk("%s: begin!\n", __FUNCTION__); + /* Do we need to do an open_to_lock_owner? */ + if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { + data->arg.open_seqid = nfs_alloc_seqid(&sp->so_seqid); + if (data->arg.open_seqid == NULL) { + data->rpc_status = -ENOMEM; + task->tk_action = NULL; goto out; - largs.new_lock_owner = 1; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment open seqid on success, and seqid mutating errors */ - if (largs.new_lock_owner != 0) { - nfs_increment_open_seqid(status, largs.open_seqid); - if (status == 0) - nfs_confirm_seqid(&lsp->ls_seqid, 0); } - nfs_free_seqid(largs.open_seqid); - } else - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment lock seqid on success, and seqid mutating errors*/ - nfs_increment_lock_seqid(status, largs.lock_seqid); - /* save the returned stateid. */ - if (status == 0) { - memcpy(lsp->ls_stateid.data, res.u.stateid.data, - sizeof(lsp->ls_stateid.data)); - lsp->ls_flags |= NFS_LOCK_INITIALIZED; - } else if (status == -NFS4ERR_DENIED) - status = -EAGAIN; + data->arg.open_stateid = &state->stateid; + data->arg.new_lock_owner = 1; + } + data->timestamp = jiffies; + rpc_call_setup(task, &msg, 0); out: - nfs_free_seqid(largs.lock_seqid); - return status; + dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status); +} + +static void nfs4_lock_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_lockdata *data = calldata; + + dprintk("%s: begin!\n", __FUNCTION__); + + data->rpc_status = task->tk_status; + if (RPC_ASSASSINATED(task)) + goto out; + if (data->arg.new_lock_owner != 0) { + nfs_increment_open_seqid(data->rpc_status, data->arg.open_seqid); + if (data->rpc_status == 0) + nfs_confirm_seqid(&data->lsp->ls_seqid, 0); + else + goto out; + } + if (data->rpc_status == 0) { + memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, + sizeof(data->lsp->ls_stateid.data)); + data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; + renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); + } + nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid); +out: + dprintk("%s: done, ret = %d!\n", __FUNCTION__, data->rpc_status); +} + +static void nfs4_lock_release(void *calldata) +{ + struct nfs4_lockdata *data = calldata; + + dprintk("%s: begin!\n", __FUNCTION__); + if (data->arg.open_seqid != NULL) + nfs_free_seqid(data->arg.open_seqid); + if (data->cancelled != 0) { + struct rpc_task *task; + task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, + data->arg.lock_seqid); + if (!IS_ERR(task)) + rpc_release_task(task); + dprintk("%s: cancelling lock!\n", __FUNCTION__); + } else + nfs_free_seqid(data->arg.lock_seqid); + nfs4_put_lock_state(data->lsp); + put_nfs_open_context(data->ctx); + kfree(data); + dprintk("%s: done!\n", __FUNCTION__); +} + +static const struct rpc_call_ops nfs4_lock_ops = { + .rpc_call_prepare = nfs4_lock_prepare, + .rpc_call_done = nfs4_lock_done, + .rpc_release = nfs4_lock_release, +}; + +static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int reclaim) +{ + struct nfs4_lockdata *data; + struct rpc_task *task; + int ret; + + dprintk("%s: begin!\n", __FUNCTION__); + data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data, + fl->fl_u.nfs4_fl.owner); + if (data == NULL) + return -ENOMEM; + if (IS_SETLKW(cmd)) + data->arg.block = 1; + if (reclaim != 0) + data->arg.reclaim = 1; + task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC, + &nfs4_lock_ops, data); + if (IS_ERR(task)) { + nfs4_lock_release(data); + return PTR_ERR(task); + } + ret = nfs4_wait_for_completion_rpc_task(task); + if (ret == 0) { + ret = data->rpc_status; + if (ret == -NFS4ERR_DENIED) + ret = -EAGAIN; + } else + data->cancelled = 1; + rpc_release_task(task); + dprintk("%s: done, ret = %d!\n", __FUNCTION__, ret); + return ret; } static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index a3001628ad32..5d764d8e6d8a 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -54,6 +54,7 @@ #include <linux/nfs4.h> #include <linux/nfs_fs.h> #include "nfs4_fs.h" +#include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -61,6 +62,7 @@ void nfs4_renew_state(void *data) { struct nfs4_client *clp = (struct nfs4_client *)data; + struct rpc_cred *cred; long lease, timeout; unsigned long last, now; @@ -68,7 +70,7 @@ nfs4_renew_state(void *data) dprintk("%s: start\n", __FUNCTION__); /* Are there any active superblocks? */ if (list_empty(&clp->cl_superblocks)) - goto out; + goto out; spin_lock(&clp->cl_lock); lease = clp->cl_lease_time; last = clp->cl_last_renewal; @@ -76,9 +78,17 @@ nfs4_renew_state(void *data) timeout = (2 * lease) / 3 + (long)last - (long)now; /* Are we close to a lease timeout? */ if (time_after(now, last + lease/3)) { + cred = nfs4_get_renew_cred(clp); + if (cred == NULL) { + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + spin_unlock(&clp->cl_lock); + nfs_expire_all_delegations(clp); + goto out; + } spin_unlock(&clp->cl_lock); /* Queue an asynchronous RENEW. */ - nfs4_proc_async_renew(clp); + nfs4_proc_async_renew(clp, cred); + put_rpccred(cred); timeout = (2 * lease) / 3; spin_lock(&clp->cl_lock); } else diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5ef4c57618fe..afad0255e7db 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -43,6 +43,8 @@ #include <linux/smp_lock.h> #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> +#include <linux/kthread.h> +#include <linux/module.h> #include <linux/workqueue.h> #include <linux/bitops.h> @@ -57,8 +59,6 @@ const nfs4_stateid zero_stateid; static DEFINE_SPINLOCK(state_spinlock); static LIST_HEAD(nfs4_clientid_list); -static void nfs4_recover_state(void *); - void init_nfsv4_state(struct nfs_server *server) { @@ -91,11 +91,10 @@ nfs4_alloc_client(struct in_addr *addr) if (nfs_callback_up() < 0) return NULL; - if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)) == NULL) { + if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) { nfs_callback_down(); return NULL; } - memset(clp, 0, sizeof(*clp)); memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); init_rwsem(&clp->cl_sem); INIT_LIST_HEAD(&clp->cl_delegations); @@ -103,14 +102,12 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_unused); spin_lock_init(&clp->cl_lock); atomic_set(&clp->cl_count, 1); - INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp); INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); INIT_LIST_HEAD(&clp->cl_superblocks); - init_waitqueue_head(&clp->cl_waitq); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_boot_time = CURRENT_TIME; - clp->cl_state = 1 << NFS4CLNT_OK; + clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; return clp; } @@ -127,8 +124,6 @@ nfs4_free_client(struct nfs4_client *clp) kfree(sp); } BUG_ON(!list_empty(&clp->cl_state_owners)); - if (clp->cl_cred) - put_rpccred(clp->cl_cred); nfs_idmap_delete(clp); if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); @@ -193,27 +188,22 @@ nfs4_put_client(struct nfs4_client *clp) list_del(&clp->cl_servers); spin_unlock(&state_spinlock); BUG_ON(!list_empty(&clp->cl_superblocks)); - wake_up_all(&clp->cl_waitq); rpc_wake_up(&clp->cl_rpcwaitq); nfs4_kill_renewd(clp); nfs4_free_client(clp); } -static int __nfs4_init_client(struct nfs4_client *clp) +static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred) { - int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport); + int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, + nfs_callback_tcpport, cred); if (status == 0) - status = nfs4_proc_setclientid_confirm(clp); + status = nfs4_proc_setclientid_confirm(clp, cred); if (status == 0) nfs4_schedule_state_renewal(clp); return status; } -int nfs4_init_client(struct nfs4_client *clp) -{ - return nfs4_map_errors(__nfs4_init_client(clp)); -} - u32 nfs4_alloc_lockowner_id(struct nfs4_client *clp) { @@ -235,6 +225,32 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) return sp; } +struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp) +{ + struct nfs4_state_owner *sp; + struct rpc_cred *cred = NULL; + + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + if (list_empty(&sp->so_states)) + continue; + cred = get_rpccred(sp->so_cred); + break; + } + return cred; +} + +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp) +{ + struct nfs4_state_owner *sp; + + if (!list_empty(&clp->cl_state_owners)) { + sp = list_entry(clp->cl_state_owners.next, + struct nfs4_state_owner, so_list); + return get_rpccred(sp->so_cred); + } + return NULL; +} + static struct nfs4_state_owner * nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) { @@ -349,14 +365,9 @@ nfs4_alloc_open_state(void) { struct nfs4_state *state; - state = kmalloc(sizeof(*state), GFP_KERNEL); + state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) return NULL; - state->state = 0; - state->nreaders = 0; - state->nwriters = 0; - state->flags = 0; - memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); spin_lock_init(&state->state_lock); @@ -475,15 +486,23 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) /* Protect against nfs4_find_state() */ spin_lock(&owner->so_lock); spin_lock(&inode->i_lock); - if (mode & FMODE_READ) - state->nreaders--; - if (mode & FMODE_WRITE) - state->nwriters--; + switch (mode & (FMODE_READ | FMODE_WRITE)) { + case FMODE_READ: + state->n_rdonly--; + break; + case FMODE_WRITE: + state->n_wronly--; + break; + case FMODE_READ|FMODE_WRITE: + state->n_rdwr--; + } oldstate = newstate = state->state; - if (state->nreaders == 0) - newstate &= ~FMODE_READ; - if (state->nwriters == 0) - newstate &= ~FMODE_WRITE; + if (state->n_rdwr == 0) { + if (state->n_rdonly == 0) + newstate &= ~FMODE_READ; + if (state->n_wronly == 0) + newstate &= ~FMODE_WRITE; + } if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { nfs4_state_set_mode_locked(state, newstate); oldstate = newstate; @@ -733,45 +752,43 @@ out: } static int reclaimer(void *); -struct reclaimer_args { - struct nfs4_client *clp; - struct completion complete; -}; + +static inline void nfs4_clear_recover_bit(struct nfs4_client *clp) +{ + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state); + smp_mb__after_clear_bit(); + wake_up_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER); + rpc_wake_up(&clp->cl_rpcwaitq); +} /* * State recovery routine */ -void -nfs4_recover_state(void *data) +static void nfs4_recover_state(struct nfs4_client *clp) { - struct nfs4_client *clp = (struct nfs4_client *)data; - struct reclaimer_args args = { - .clp = clp, - }; - might_sleep(); - - init_completion(&args.complete); + struct task_struct *task; - if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0) - goto out_failed_clear; - wait_for_completion(&args.complete); - return; -out_failed_clear: - set_bit(NFS4CLNT_OK, &clp->cl_state); - wake_up_all(&clp->cl_waitq); - rpc_wake_up(&clp->cl_rpcwaitq); + __module_get(THIS_MODULE); + atomic_inc(&clp->cl_count); + task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim", + NIPQUAD(clp->cl_addr)); + if (!IS_ERR(task)) + return; + nfs4_clear_recover_bit(clp); + nfs4_put_client(clp); + module_put(THIS_MODULE); } /* * Schedule a state recovery attempt */ -void -nfs4_schedule_state_recovery(struct nfs4_client *clp) +void nfs4_schedule_state_recovery(struct nfs4_client *clp) { if (!clp) return; - if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state)) - schedule_work(&clp->cl_recoverd); + if (test_and_set_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) + nfs4_recover_state(clp); } static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state) @@ -887,18 +904,14 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp) static int reclaimer(void *ptr) { - struct reclaimer_args *args = (struct reclaimer_args *)ptr; - struct nfs4_client *clp = args->clp; + struct nfs4_client *clp = ptr; struct nfs4_state_owner *sp; struct nfs4_state_recovery_ops *ops; + struct rpc_cred *cred; int status = 0; - daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr)); allow_signal(SIGKILL); - atomic_inc(&clp->cl_count); - complete(&args->complete); - /* Ensure exclusive access to NFSv4 state */ lock_kernel(); down_write(&clp->cl_sem); @@ -906,20 +919,33 @@ static int reclaimer(void *ptr) if (list_empty(&clp->cl_superblocks)) goto out; restart_loop: - status = nfs4_proc_renew(clp); - switch (status) { - case 0: - case -NFS4ERR_CB_PATH_DOWN: - goto out; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_LEASE_MOVED: - ops = &nfs4_reboot_recovery_ops; - break; - default: - ops = &nfs4_network_partition_recovery_ops; - }; + ops = &nfs4_network_partition_recovery_ops; + /* Are there any open files on this volume? */ + cred = nfs4_get_renew_cred(clp); + if (cred != NULL) { + /* Yes there are: try to renew the old lease */ + status = nfs4_proc_renew(clp, cred); + switch (status) { + case 0: + case -NFS4ERR_CB_PATH_DOWN: + put_rpccred(cred); + goto out; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_LEASE_MOVED: + ops = &nfs4_reboot_recovery_ops; + } + } else { + /* "reboot" to ensure we clear all state on the server */ + clp->cl_boot_time = CURRENT_TIME; + cred = nfs4_get_setclientid_cred(clp); + } + /* We're going to have to re-establish a clientid */ nfs4_state_mark_reclaim(clp); - status = __nfs4_init_client(clp); + status = -ENOENT; + if (cred != NULL) { + status = nfs4_init_client(clp, cred); + put_rpccred(cred); + } if (status) goto out_error; /* Mark all delegations for reclaim */ @@ -940,14 +966,13 @@ restart_loop: } nfs_delegation_reap_unclaimed(clp); out: - set_bit(NFS4CLNT_OK, &clp->cl_state); up_write(&clp->cl_sem); unlock_kernel(); - wake_up_all(&clp->cl_waitq); - rpc_wake_up(&clp->cl_rpcwaitq); if (status == -NFS4ERR_CB_PATH_DOWN) nfs_handle_cb_pathdown(clp); + nfs4_clear_recover_bit(clp); nfs4_put_client(clp); + module_put_and_exit(0); return 0; out_error: printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fbbace8a30c4..4bbf5ef57785 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -392,9 +392,11 @@ static int nfs_stat_to_errno(int); decode_getattr_maxsz) #define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_delegreturn_maxsz) + encode_delegreturn_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ - decode_delegreturn_maxsz) + decode_delegreturn_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) @@ -564,7 +566,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s } if (iap->ia_valid & ATTR_MODE) { bmval1 |= FATTR4_WORD1_MODE; - WRITE32(iap->ia_mode); + WRITE32(iap->ia_mode & S_IALLUGO); } if (iap->ia_valid & ATTR_UID) { bmval1 |= FATTR4_WORD1_OWNER; @@ -742,69 +744,80 @@ static int encode_link(struct xdr_stream *xdr, const struct qstr *name) return 0; } +static inline int nfs4_lock_type(struct file_lock *fl, int block) +{ + if ((fl->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) == F_RDLCK) + return block ? NFS4_READW_LT : NFS4_READ_LT; + return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT; +} + +static inline uint64_t nfs4_lock_length(struct file_lock *fl) +{ + if (fl->fl_end == OFFSET_MAX) + return ~(uint64_t)0; + return fl->fl_end - fl->fl_start + 1; +} + /* * opcode,type,reclaim,offset,length,new_lock_owner = 32 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 */ -static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) +static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args) { uint32_t *p; - struct nfs_lock_opargs *opargs = arg->u.lock; RESERVE_SPACE(32); WRITE32(OP_LOCK); - WRITE32(arg->type); - WRITE32(opargs->reclaim); - WRITE64(arg->offset); - WRITE64(arg->length); - WRITE32(opargs->new_lock_owner); - if (opargs->new_lock_owner){ + WRITE32(nfs4_lock_type(args->fl, args->block)); + WRITE32(args->reclaim); + WRITE64(args->fl->fl_start); + WRITE64(nfs4_lock_length(args->fl)); + WRITE32(args->new_lock_owner); + if (args->new_lock_owner){ RESERVE_SPACE(40); - WRITE32(opargs->open_seqid->sequence->counter); - WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data)); - WRITE32(opargs->lock_seqid->sequence->counter); - WRITE64(opargs->lock_owner.clientid); + WRITE32(args->open_seqid->sequence->counter); + WRITEMEM(args->open_stateid->data, sizeof(args->open_stateid->data)); + WRITE32(args->lock_seqid->sequence->counter); + WRITE64(args->lock_owner.clientid); WRITE32(4); - WRITE32(opargs->lock_owner.id); + WRITE32(args->lock_owner.id); } else { RESERVE_SPACE(20); - WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data)); - WRITE32(opargs->lock_seqid->sequence->counter); + WRITEMEM(args->lock_stateid->data, sizeof(args->lock_stateid->data)); + WRITE32(args->lock_seqid->sequence->counter); } return 0; } -static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockargs *arg) +static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args) { uint32_t *p; - struct nfs_lowner *opargs = arg->u.lockt; RESERVE_SPACE(40); WRITE32(OP_LOCKT); - WRITE32(arg->type); - WRITE64(arg->offset); - WRITE64(arg->length); - WRITE64(opargs->clientid); + WRITE32(nfs4_lock_type(args->fl, 0)); + WRITE64(args->fl->fl_start); + WRITE64(nfs4_lock_length(args->fl)); + WRITE64(args->lock_owner.clientid); WRITE32(4); - WRITE32(opargs->id); + WRITE32(args->lock_owner.id); return 0; } -static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg) +static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args) { uint32_t *p; - struct nfs_locku_opargs *opargs = arg->u.locku; RESERVE_SPACE(44); WRITE32(OP_LOCKU); - WRITE32(arg->type); - WRITE32(opargs->seqid->sequence->counter); - WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data)); - WRITE64(arg->offset); - WRITE64(arg->length); + WRITE32(nfs4_lock_type(args->fl, 0)); + WRITE32(args->seqid->sequence->counter); + WRITEMEM(args->stateid->data, sizeof(args->stateid->data)); + WRITE64(args->fl->fl_start); + WRITE64(nfs4_lock_length(args->fl)); return 0; } @@ -964,9 +977,9 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con { uint32_t *p; - RESERVE_SPACE(8+sizeof(arg->stateid.data)); + RESERVE_SPACE(8+sizeof(arg->stateid->data)); WRITE32(OP_OPEN_CONFIRM); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); WRITE32(arg->seqid->sequence->counter); return 0; @@ -1499,9 +1512,6 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena }; int status; - status = nfs_wait_on_sequence(args->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1538,9 +1548,6 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n }; int status; - status = nfs_wait_on_sequence(args->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1558,19 +1565,19 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nf { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; - status = nfs_wait_on_sequence(args->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); if (status) goto out; status = encode_open(&xdr, args); + if (status) + goto out; + status = encode_getfattr(&xdr, args->bitmask); out: return status; } @@ -1602,21 +1609,14 @@ out: /* * Encode a LOCK request */ -static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lock_args *args) { struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, }; - struct nfs_lock_opargs *opargs = args->u.lock; int status; - status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task); - if (status != 0) - goto out; - /* Do we need to do an open_to_lock_owner? */ - if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED) - opargs->new_lock_owner = 0; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1630,7 +1630,7 @@ out: /* * Encode a LOCKT request */ -static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockt_args *args) { struct xdr_stream xdr; struct compound_hdr hdr = { @@ -1651,7 +1651,7 @@ out: /* * Encode a LOCKU request */ -static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_locku_args *args) { struct xdr_stream xdr; struct compound_hdr hdr = { @@ -1985,14 +1985,20 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, uint32_t *p, const str { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fhandle)) == 0) - status = encode_delegreturn(&xdr, args->stateid); + status = encode_putfh(&xdr, args->fhandle); + if (status != 0) + goto out; + status = encode_delegreturn(&xdr, args->stateid); + if (status != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: return status; } @@ -2955,55 +2961,64 @@ static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) /* * We create the owner, so we know a proper owner.id length is 4. */ -static int decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied) +static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) { + uint64_t offset, length, clientid; uint32_t *p; - uint32_t namelen; + uint32_t namelen, type; READ_BUF(32); - READ64(denied->offset); - READ64(denied->length); - READ32(denied->type); - READ64(denied->owner.clientid); + READ64(offset); + READ64(length); + READ32(type); + if (fl != NULL) { + fl->fl_start = (loff_t)offset; + fl->fl_end = fl->fl_start + (loff_t)length - 1; + if (length == ~(uint64_t)0) + fl->fl_end = OFFSET_MAX; + fl->fl_type = F_WRLCK; + if (type & 1) + fl->fl_type = F_RDLCK; + fl->fl_pid = 0; + } + READ64(clientid); READ32(namelen); READ_BUF(namelen); - if (namelen == 4) - READ32(denied->owner.id); return -NFS4ERR_DENIED; } -static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res) +static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) { uint32_t *p; int status; status = decode_op_hdr(xdr, OP_LOCK); if (status == 0) { - READ_BUF(sizeof(res->u.stateid.data)); - COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); + READ_BUF(sizeof(res->stateid.data)); + COPYMEM(res->stateid.data, sizeof(res->stateid.data)); } else if (status == -NFS4ERR_DENIED) - return decode_lock_denied(xdr, &res->u.denied); + return decode_lock_denied(xdr, NULL); return status; } -static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res) +static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res) { int status; status = decode_op_hdr(xdr, OP_LOCKT); if (status == -NFS4ERR_DENIED) - return decode_lock_denied(xdr, &res->u.denied); + return decode_lock_denied(xdr, res->denied); return status; } -static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res) +static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) { uint32_t *p; int status; status = decode_op_hdr(xdr, OP_LOCKU); if (status == 0) { - READ_BUF(sizeof(res->u.stateid.data)); - COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); + READ_BUF(sizeof(res->stateid.data)); + COPYMEM(res->stateid.data, sizeof(res->stateid.data)); } return status; } @@ -3831,6 +3846,9 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, uint32_t *p, struct if (status) goto out; status = decode_open(&xdr, res); + if (status) + goto out; + decode_getfattr(&xdr, res->f_attr, res->server); out: return status; } @@ -3864,7 +3882,7 @@ out: /* * Decode LOCK response */ -static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lock_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -3885,7 +3903,7 @@ out: /* * Decode LOCKT response */ -static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockt_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -3906,7 +3924,7 @@ out: /* * Decode LOCKU response */ -static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_locku_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4174,7 +4192,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s /* * DELEGRETURN request */ -static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) +static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_delegreturnres *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4182,11 +4200,14 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *d xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); - if (status == 0) { - status = decode_putfh(&xdr); - if (status == 0) - status = decode_delegreturn(&xdr); - } + if (status != 0) + goto out; + status = decode_putfh(&xdr); + if (status != 0) + goto out; + status = decode_delegreturn(&xdr); + decode_getfattr(&xdr, res->fattr, res->server); +out: return status; } diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 1b272a135a31..e897e00c2c9d 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -275,7 +275,9 @@ static int __init root_nfs_parse(char *name, char *buf) case Opt_noacl: nfs_data.flags |= NFS_MOUNT_NOACL; break; - default : + default: + printk(KERN_WARNING "Root-NFS: unknown " + "option: %s\n", p); return 0; } } @@ -296,8 +298,8 @@ static int __init root_nfs_name(char *name) nfs_port = -1; nfs_data.version = NFS_MOUNT_VERSION; nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ - nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE; - nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; + nfs_data.wsize = NFS_DEF_FILE_IO_SIZE; nfs_data.acregmin = 3; nfs_data.acregmax = 60; nfs_data.acdirmin = 30; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index e1e3ca5d746b..f5150d71c03d 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -111,6 +111,9 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, }; int status; + /* Mask out the non-modebit related stuff from attr->ia_mode */ + sattr->ia_mode &= S_IALLUGO; + dprintk("NFS call setattr\n"); nfs_fattr_init(fattr); status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); @@ -547,10 +550,9 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); -static void -nfs_read_done(struct rpc_task *task) +static void nfs_read_done(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; + struct nfs_read_data *data = calldata; if (task->tk_status >= 0) { nfs_refresh_inode(data->inode, data->res.fattr); @@ -560,9 +562,14 @@ nfs_read_done(struct rpc_task *task) if (data->args.offset + data->args.count >= data->res.fattr->size) data->res.eof = 1; } - nfs_readpage_result(task); + nfs_readpage_result(task, calldata); } +static const struct rpc_call_ops nfs_read_ops = { + .rpc_call_done = nfs_read_done, + .rpc_release = nfs_readdata_release, +}; + static void nfs_proc_read_setup(struct nfs_read_data *data) { @@ -580,20 +587,24 @@ nfs_proc_read_setup(struct nfs_read_data *data) flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs_read_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data); rpc_call_setup(task, &msg, 0); } -static void -nfs_write_done(struct rpc_task *task) +static void nfs_write_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data = calldata; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_writeback_done(task); + nfs_writeback_done(task, calldata); } +static const struct rpc_call_ops nfs_write_ops = { + .rpc_call_done = nfs_write_done, + .rpc_release = nfs_writedata_release, +}; + static void nfs_proc_write_setup(struct nfs_write_data *data, int how) { @@ -614,7 +625,7 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how) flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs_write_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data); rpc_call_setup(task, &msg, 0); } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 5f20eafba8ec..05eb43fadf8e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -42,9 +42,8 @@ mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) -void nfs_readdata_release(struct rpc_task *task) +void nfs_readdata_release(void *data) { - struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; nfs_readdata_free(data); } @@ -84,7 +83,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, int result; struct nfs_read_data *rdata; - rdata = nfs_readdata_alloc(); + rdata = nfs_readdata_alloc(1); if (!rdata) return -ENOMEM; @@ -220,9 +219,6 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, NFS_PROTO(inode)->read_setup(data); data->task.tk_cookie = (unsigned long)inode; - data->task.tk_calldata = data; - /* Release requests */ - data->task.tk_release = nfs_readdata_release; dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", data->task.tk_pid, @@ -287,7 +283,7 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) nbytes = req->wb_bytes; for(;;) { - data = nfs_readdata_alloc(); + data = nfs_readdata_alloc(1); if (!data) goto out_bad; INIT_LIST_HEAD(&data->pages); @@ -343,7 +339,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) return nfs_pagein_multi(head, inode); - data = nfs_readdata_alloc(); + data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); if (!data) goto out_bad; @@ -452,9 +448,9 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status) * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -void nfs_readpage_result(struct rpc_task *task) +void nfs_readpage_result(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; + struct nfs_read_data *data = calldata; struct nfs_readargs *argp = &data->args; struct nfs_readres *resp = &data->res; int status = task->tk_status; diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c new file mode 100644 index 000000000000..4c486eb867ca --- /dev/null +++ b/fs/nfs/sysctl.c @@ -0,0 +1,84 @@ +/* + * linux/fs/nfs/sysctl.c + * + * Sysctl interface to NFS parameters + */ +#include <linux/config.h> +#include <linux/types.h> +#include <linux/linkage.h> +#include <linux/ctype.h> +#include <linux/fs.h> +#include <linux/sysctl.h> +#include <linux/module.h> +#include <linux/nfs4.h> +#include <linux/nfs_idmap.h> + +#include "callback.h" + +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; +static struct ctl_table_header *nfs_callback_sysctl_table; +/* + * Something that isn't CTL_ANY, CTL_NONE or a value that may clash. + * Use the same values as fs/lockd/svc.c + */ +#define CTL_UNNUMBERED -2 + +static ctl_table nfs_cb_sysctls[] = { +#ifdef CONFIG_NFS_V4 + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs_callback_tcpport", + .data = &nfs_callback_set_tcpport, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = (int *)&nfs_set_port_min, + .extra2 = (int *)&nfs_set_port_max, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "idmap_cache_timeout", + .data = &nfs_idmap_cache_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, +#endif + { .ctl_name = 0 } +}; + +static ctl_table nfs_cb_sysctl_dir[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs", + .mode = 0555, + .child = nfs_cb_sysctls, + }, + { .ctl_name = 0 } +}; + +static ctl_table nfs_cb_sysctl_root[] = { + { + .ctl_name = CTL_FS, + .procname = "fs", + .mode = 0555, + .child = nfs_cb_sysctl_dir, + }, + { .ctl_name = 0 } +}; + +int nfs_register_sysctl(void) +{ + nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root, 0); + if (nfs_callback_sysctl_table == NULL) + return -ENOMEM; + return 0; +} + +void nfs_unregister_sysctl(void) +{ + unregister_sysctl_table(nfs_callback_sysctl_table); + nfs_callback_sysctl_table = NULL; +} diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index d639d172d568..a65c7b53d558 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -87,10 +87,9 @@ nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data) * We delay initializing RPC info until after the call to dentry_iput() * in order to minimize races against rename(). */ -static void -nfs_async_unlink_init(struct rpc_task *task) +static void nfs_async_unlink_init(struct rpc_task *task, void *calldata) { - struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata; + struct nfs_unlinkdata *data = calldata; struct dentry *dir = data->dir; struct rpc_message msg = { .rpc_cred = data->cred, @@ -116,10 +115,9 @@ nfs_async_unlink_init(struct rpc_task *task) * * Do the directory attribute update. */ -static void -nfs_async_unlink_done(struct rpc_task *task) +static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) { - struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata; + struct nfs_unlinkdata *data = calldata; struct dentry *dir = data->dir; struct inode *dir_i; @@ -141,13 +139,18 @@ nfs_async_unlink_done(struct rpc_task *task) * We need to call nfs_put_unlinkdata as a 'tk_release' task since the * rpc_task would be freed too. */ -static void -nfs_async_unlink_release(struct rpc_task *task) +static void nfs_async_unlink_release(void *calldata) { - struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata; + struct nfs_unlinkdata *data = calldata; nfs_put_unlinkdata(data); } +static const struct rpc_call_ops nfs_unlink_ops = { + .rpc_call_prepare = nfs_async_unlink_init, + .rpc_call_done = nfs_async_unlink_done, + .rpc_release = nfs_async_unlink_release, +}; + /** * nfs_async_unlink - asynchronous unlinking of a file * @dentry: dentry to unlink @@ -157,7 +160,6 @@ nfs_async_unlink(struct dentry *dentry) { struct dentry *dir = dentry->d_parent; struct nfs_unlinkdata *data; - struct rpc_task *task; struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode); int status = -ENOMEM; @@ -178,17 +180,13 @@ nfs_async_unlink(struct dentry *dentry) nfs_deletes = data; data->count = 1; - task = &data->task; - rpc_init_task(task, clnt, nfs_async_unlink_done , RPC_TASK_ASYNC); - task->tk_calldata = data; - task->tk_action = nfs_async_unlink_init; - task->tk_release = nfs_async_unlink_release; + rpc_init_task(&data->task, clnt, RPC_TASK_ASYNC, &nfs_unlink_ops, data); spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_NFSFS_RENAMED; spin_unlock(&dentry->d_lock); - rpc_sleep_on(&nfs_delete_queue, task, NULL, NULL); + rpc_sleep_on(&nfs_delete_queue, &data->task, NULL, NULL); status = 0; out: return status; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3107908e5f3f..9449b6835509 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -89,24 +89,38 @@ static mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); -static inline struct nfs_write_data *nfs_commit_alloc(void) +static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); + if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kmalloc(size, GFP_NOFS); + if (p->pagevec) { + memset(p->pagevec, 0, size); + } else { + mempool_free(p, nfs_commit_mempool); + p = NULL; + } + } } return p; } static inline void nfs_commit_free(struct nfs_write_data *p) { + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); mempool_free(p, nfs_commit_mempool); } -static void nfs_writedata_release(struct rpc_task *task) +void nfs_writedata_release(void *wdata) { - struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; nfs_writedata_free(wdata); } @@ -168,7 +182,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, int result, written = 0; struct nfs_write_data *wdata; - wdata = nfs_writedata_alloc(); + wdata = nfs_writedata_alloc(1); if (!wdata) return -ENOMEM; @@ -232,19 +246,16 @@ static int nfs_writepage_async(struct nfs_open_context *ctx, unsigned int offset, unsigned int count) { struct nfs_page *req; - int status; req = nfs_update_request(ctx, inode, page, offset, count); - status = (IS_ERR(req)) ? PTR_ERR(req) : 0; - if (status < 0) - goto out; + if (IS_ERR(req)) + return PTR_ERR(req); /* Update file length */ nfs_grow_file(page, offset, count); /* Set the PG_uptodate flag? */ nfs_mark_uptodate(page, offset, count); nfs_unlock_request(req); - out: - return status; + return 0; } static int wb_priority(struct writeback_control *wbc) @@ -304,11 +315,8 @@ do_it: lock_kernel(); if (!IS_SYNC(inode) && inode_referenced) { err = nfs_writepage_async(ctx, inode, page, 0, offset); - if (err >= 0) { - err = 0; - if (wbc->for_reclaim) - nfs_flush_inode(inode, 0, 0, FLUSH_STABLE); - } + if (!wbc->for_writepages) + nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); } else { err = nfs_writepage_sync(ctx, inode, page, 0, offset, priority); @@ -877,9 +885,6 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->task.tk_priority = flush_task_priority(how); data->task.tk_cookie = (unsigned long)inode; - data->task.tk_calldata = data; - /* Release requests */ - data->task.tk_release = nfs_writedata_release; dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n", data->task.tk_pid, @@ -919,7 +924,7 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) nbytes = req->wb_bytes; for (;;) { - data = nfs_writedata_alloc(); + data = nfs_writedata_alloc(1); if (!data) goto out_bad; list_add(&data->pages, &list); @@ -983,7 +988,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) return nfs_flush_multi(head, inode, how); - data = nfs_writedata_alloc(); + data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); if (!data) goto out_bad; @@ -1137,9 +1142,9 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task) +void nfs_writeback_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data = calldata; struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; @@ -1206,9 +1211,8 @@ void nfs_writeback_done(struct rpc_task *task) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -static void nfs_commit_release(struct rpc_task *task) +void nfs_commit_release(void *wdata) { - struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; nfs_commit_free(wdata); } @@ -1244,9 +1248,6 @@ static void nfs_commit_rpcsetup(struct list_head *head, data->task.tk_priority = flush_task_priority(how); data->task.tk_cookie = (unsigned long)inode; - data->task.tk_calldata = data; - /* Release requests */ - data->task.tk_release = nfs_commit_release; dprintk("NFS: %4d initiated commit call\n", data->task.tk_pid); } @@ -1255,12 +1256,12 @@ static void nfs_commit_rpcsetup(struct list_head *head, * Commit dirty pages */ static int -nfs_commit_list(struct list_head *head, int how) +nfs_commit_list(struct inode *inode, struct list_head *head, int how) { struct nfs_write_data *data; struct nfs_page *req; - data = nfs_commit_alloc(); + data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); if (!data) goto out_bad; @@ -1283,10 +1284,9 @@ nfs_commit_list(struct list_head *head, int how) /* * COMMIT call returned */ -void -nfs_commit_done(struct rpc_task *task) +void nfs_commit_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; + struct nfs_write_data *data = calldata; struct nfs_page *req; int res = 0; @@ -1366,7 +1366,7 @@ int nfs_commit_inode(struct inode *inode, int how) res = nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&nfsi->req_lock); if (res) { - error = nfs_commit_list(&head, how); + error = nfs_commit_list(inode, &head, how); if (error < 0) return error; } @@ -1377,22 +1377,23 @@ int nfs_commit_inode(struct inode *inode, int how) int nfs_sync_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how) { - int error, - wait; + int nocommit = how & FLUSH_NOCOMMIT; + int wait = how & FLUSH_WAIT; + int error; - wait = how & FLUSH_WAIT; - how &= ~FLUSH_WAIT; + how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT); do { - error = 0; - if (wait) + if (wait) { error = nfs_wait_on_requests(inode, idx_start, npages); - if (error == 0) - error = nfs_flush_inode(inode, idx_start, npages, how); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (error == 0) + if (error != 0) + continue; + } + error = nfs_flush_inode(inode, idx_start, npages, how); + if (error != 0) + continue; + if (!nocommit) error = nfs_commit_inode(inode, how); -#endif } while (error > 0); return error; } diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 041380fe667b..6d2dfed1de08 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -56,13 +56,20 @@ static int nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, struct nfsd3_attrstat *resp) { - int nfserr; + int err, nfserr; dprintk("nfsd: GETATTR(3) %s\n", - SVCFH_fmt(&argp->fh)); + SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + if (nfserr) + RETURN_STATUS(nfserr); + + err = vfs_getattr(resp->fh.fh_export->ex_mnt, + resp->fh.fh_dentry, &resp->stat); + nfserr = nfserrno(err); + RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 9147b8524d05..243d94b9653a 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -154,37 +154,34 @@ decode_sattr3(u32 *p, struct iattr *iap) } static inline u32 * -encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp, + struct kstat *stat) { - struct vfsmount *mnt = fhp->fh_export->ex_mnt; struct dentry *dentry = fhp->fh_dentry; - struct kstat stat; struct timespec time; - vfs_getattr(mnt, dentry, &stat); - - *p++ = htonl(nfs3_ftypes[(stat.mode & S_IFMT) >> 12]); - *p++ = htonl((u32) stat.mode); - *p++ = htonl((u32) stat.nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); - if (S_ISLNK(stat.mode) && stat.size > NFS3_MAXPATHLEN) { + *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); + *p++ = htonl((u32) stat->mode); + *p++ = htonl((u32) stat->nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { - p = xdr_encode_hyper(p, (u64) stat.size); + p = xdr_encode_hyper(p, (u64) stat->size); } - p = xdr_encode_hyper(p, ((u64)stat.blocks) << 9); - *p++ = htonl((u32) MAJOR(stat.rdev)); - *p++ = htonl((u32) MINOR(stat.rdev)); + p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); + *p++ = htonl((u32) MAJOR(stat->rdev)); + *p++ = htonl((u32) MINOR(stat->rdev)); if (is_fsid(fhp, rqstp->rq_reffh)) p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); else - p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat.dev)); - p = xdr_encode_hyper(p, (u64) stat.ino); - p = encode_time3(p, &stat.atime); + p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat->dev)); + p = xdr_encode_hyper(p, (u64) stat->ino); + p = encode_time3(p, &stat->atime); lease_get_mtime(dentry->d_inode, &time); p = encode_time3(p, &time); - p = encode_time3(p, &stat.ctime); + p = encode_time3(p, &stat->ctime); return p; } @@ -232,8 +229,14 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; if (dentry && dentry->d_inode != NULL) { - *p++ = xdr_one; /* attributes follow */ - return encode_fattr3(rqstp, p, fhp); + int err; + struct kstat stat; + + err = vfs_getattr(fhp->fh_export->ex_mnt, dentry, &stat); + if (!err) { + *p++ = xdr_one; /* attributes follow */ + return encode_fattr3(rqstp, p, fhp, &stat); + } } *p++ = xdr_zero; return p; @@ -616,7 +619,7 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p, struct nfsd3_attrstat *resp) { if (resp->status == 0) - p = encode_fattr3(rqstp, p, &resp->fh); + p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 583c0710e45e..d828662d737d 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -53,7 +53,7 @@ #define NFSPROC4_CB_COMPOUND 1 /* declarations */ -static void nfs4_cb_null(struct rpc_task *task); +static const struct rpc_call_ops nfs4_cb_null_ops; /* Index of predefined Linux callback client operations */ @@ -431,7 +431,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) } clnt->cl_intr = 0; clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; /* Kick rpciod, put the call on the wire. */ @@ -447,7 +446,7 @@ nfsd4_probe_callback(struct nfs4_client *clp) msg.rpc_cred = nfsd4_lookupcred(clp,0); if (IS_ERR(msg.rpc_cred)) goto out_rpciod; - status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL); + status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); put_rpccred(msg.rpc_cred); if (status != 0) { @@ -469,7 +468,7 @@ out_err: } static void -nfs4_cb_null(struct rpc_task *task) +nfs4_cb_null(struct rpc_task *task, void *dummy) { struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; struct nfs4_callback *cb = &clp->cl_callback; @@ -488,6 +487,10 @@ out: put_nfs4_client(clp); } +static const struct rpc_call_ops nfs4_cb_null_ops = { + .rpc_call_done = nfs4_cb_null, +}; + /* * called with dp->dl_count inc'ed. * nfs4_lock_state() may or may not have been called. diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 954cf893d50c..be963a133aaa 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -121,9 +121,9 @@ out: static void nfsd4_sync_rec_dir(void) { - down(&rec_dir.dentry->d_inode->i_sem); + mutex_lock(&rec_dir.dentry->d_inode->i_mutex); nfsd_sync_dir(rec_dir.dentry); - up(&rec_dir.dentry->d_inode->i_sem); + mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); } int @@ -143,7 +143,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) nfs4_save_user(&uid, &gid); /* lock the parent */ - down(&rec_dir.dentry->d_inode->i_sem); + mutex_lock(&rec_dir.dentry->d_inode->i_mutex); dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1); if (IS_ERR(dentry)) { @@ -159,7 +159,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) out_put: dput(dentry); out_unlock: - up(&rec_dir.dentry->d_inode->i_sem); + mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); if (status == 0) { clp->cl_firststate = 1; nfsd4_sync_rec_dir(); @@ -259,9 +259,9 @@ nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry) printk("nfsd4: non-file found in client recovery directory\n"); return -EINVAL; } - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); status = vfs_unlink(dir->d_inode, dentry); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return status; } @@ -274,9 +274,9 @@ nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry) * any regular files anyway, just in case the directory was created by * a kernel from the future.... */ nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); status = vfs_rmdir(dir->d_inode, dentry); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return status; } @@ -288,9 +288,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen) dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); - down(&rec_dir.dentry->d_inode->i_sem); + mutex_lock(&rec_dir.dentry->d_inode->i_mutex); dentry = lookup_one_len(name, rec_dir.dentry, namlen); - up(&rec_dir.dentry->d_inode->i_sem); + mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); return status; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index b45999ff33e6..aa7bb41b293d 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -152,46 +152,44 @@ decode_sattr(u32 *p, struct iattr *iap) } static inline u32 * -encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp, + struct kstat *stat) { - struct vfsmount *mnt = fhp->fh_export->ex_mnt; struct dentry *dentry = fhp->fh_dentry; - struct kstat stat; int type; struct timespec time; - vfs_getattr(mnt, dentry, &stat); - type = (stat.mode & S_IFMT); + type = (stat->mode & S_IFMT); *p++ = htonl(nfs_ftypes[type >> 12]); - *p++ = htonl((u32) stat.mode); - *p++ = htonl((u32) stat.nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); + *p++ = htonl((u32) stat->mode); + *p++ = htonl((u32) stat->nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); - if (S_ISLNK(type) && stat.size > NFS_MAXPATHLEN) { + if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); } else { - *p++ = htonl((u32) stat.size); + *p++ = htonl((u32) stat->size); } - *p++ = htonl((u32) stat.blksize); + *p++ = htonl((u32) stat->blksize); if (S_ISCHR(type) || S_ISBLK(type)) - *p++ = htonl(new_encode_dev(stat.rdev)); + *p++ = htonl(new_encode_dev(stat->rdev)); else *p++ = htonl(0xffffffff); - *p++ = htonl((u32) stat.blocks); + *p++ = htonl((u32) stat->blocks); if (is_fsid(fhp, rqstp->rq_reffh)) *p++ = htonl((u32) fhp->fh_export->ex_fsid); else - *p++ = htonl(new_encode_dev(stat.dev)); - *p++ = htonl((u32) stat.ino); - *p++ = htonl((u32) stat.atime.tv_sec); - *p++ = htonl(stat.atime.tv_nsec ? stat.atime.tv_nsec / 1000 : 0); + *p++ = htonl(new_encode_dev(stat->dev)); + *p++ = htonl((u32) stat->ino); + *p++ = htonl((u32) stat->atime.tv_sec); + *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0); lease_get_mtime(dentry->d_inode, &time); *p++ = htonl((u32) time.tv_sec); *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); - *p++ = htonl((u32) stat.ctime.tv_sec); - *p++ = htonl(stat.ctime.tv_nsec ? stat.ctime.tv_nsec / 1000 : 0); + *p++ = htonl((u32) stat->ctime.tv_sec); + *p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0); return p; } @@ -199,7 +197,9 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) /* Helper function for NFSv2 ACL code */ u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) { - return encode_fattr(rqstp, p, fhp); + struct kstat stat; + vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry, &stat); + return encode_fattr(rqstp, p, fhp, &stat); } /* @@ -394,7 +394,7 @@ int nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p, struct nfsd_attrstat *resp) { - p = encode_fattr(rqstp, p, &resp->fh); + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } @@ -403,7 +403,7 @@ nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p, struct nfsd_diropres *resp) { p = encode_fh(p, &resp->fh); - p = encode_fattr(rqstp, p, &resp->fh); + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } @@ -428,7 +428,7 @@ int nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p, struct nfsd_readres *resp) { - p = encode_fattr(rqstp, p, &resp->fh); + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); *p++ = htonl(resp->count); xdr_ressize_check(rqstp, p); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index af7c3c3074b0..eef0576a7785 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -48,8 +48,8 @@ #include <linux/fsnotify.h> #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> -#ifdef CONFIG_NFSD_V4 #include <linux/xattr.h> +#ifdef CONFIG_NFSD_V4 #include <linux/nfs4.h> #include <linux/nfs4_acl.h> #include <linux/nfsd_idmap.h> @@ -365,8 +365,30 @@ out_nfserr: goto out; } -#if defined(CONFIG_NFSD_V4) +#if defined(CONFIG_NFSD_V2_ACL) || \ + defined(CONFIG_NFSD_V3_ACL) || \ + defined(CONFIG_NFSD_V4) +static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) +{ + ssize_t buflen; + int error; + + buflen = vfs_getxattr(dentry, key, NULL, 0); + if (buflen <= 0) + return buflen; + + *buf = kmalloc(buflen, GFP_KERNEL); + if (!*buf) + return -ENOMEM; + error = vfs_getxattr(dentry, key, *buf, buflen); + if (error < 0) + return error; + return buflen; +} +#endif + +#if defined(CONFIG_NFSD_V4) static int set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) { @@ -374,7 +396,6 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) size_t buflen; char *buf = NULL; int error = 0; - struct inode *inode = dentry->d_inode; buflen = posix_acl_xattr_size(pacl->a_count); buf = kmalloc(buflen, GFP_KERNEL); @@ -388,15 +409,7 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) goto out; } - error = -EOPNOTSUPP; - if (inode->i_op && inode->i_op->setxattr) { - down(&inode->i_sem); - security_inode_setxattr(dentry, key, buf, len, 0); - error = inode->i_op->setxattr(dentry, key, buf, len, 0); - if (!error) - security_inode_post_setxattr(dentry, key, buf, len, 0); - up(&inode->i_sem); - } + error = vfs_setxattr(dentry, key, buf, len, 0); out: kfree(buf); return error; @@ -455,44 +468,19 @@ out_nfserr: static struct posix_acl * _get_posix_acl(struct dentry *dentry, char *key) { - struct inode *inode = dentry->d_inode; - char *buf = NULL; - int buflen, error = 0; + void *buf = NULL; struct posix_acl *pacl = NULL; + int buflen; - error = -EOPNOTSUPP; - if (inode->i_op == NULL) - goto out_err; - if (inode->i_op->getxattr == NULL) - goto out_err; - - error = security_inode_getxattr(dentry, key); - if (error) - goto out_err; - - buflen = inode->i_op->getxattr(dentry, key, NULL, 0); - if (buflen <= 0) { - error = buflen < 0 ? buflen : -ENODATA; - goto out_err; - } - - buf = kmalloc(buflen, GFP_KERNEL); - if (buf == NULL) { - error = -ENOMEM; - goto out_err; - } - - error = inode->i_op->getxattr(dentry, key, buf, buflen); - if (error < 0) - goto out_err; + buflen = nfsd_getxattr(dentry, key, &buf); + if (!buflen) + buflen = -ENODATA; + if (buflen <= 0) + return ERR_PTR(buflen); pacl = posix_acl_from_xattr(buf, buflen); - out: kfree(buf); return pacl; - out_err: - pacl = ERR_PTR(error); - goto out; } int @@ -717,27 +705,33 @@ nfsd_close(struct file *filp) * As this calls fsync (not fdatasync) there is no need for a write_inode * after it. */ -static inline void nfsd_dosync(struct file *filp, struct dentry *dp, - struct file_operations *fop) +static inline int nfsd_dosync(struct file *filp, struct dentry *dp, + struct file_operations *fop) { struct inode *inode = dp->d_inode; int (*fsync) (struct file *, struct dentry *, int); + int err = nfs_ok; filemap_fdatawrite(inode->i_mapping); if (fop && (fsync = fop->fsync)) - fsync(filp, dp, 0); + err=fsync(filp, dp, 0); filemap_fdatawait(inode->i_mapping); + + return nfserrno(err); } -static void +static int nfsd_sync(struct file *filp) { + int err; struct inode *inode = filp->f_dentry->d_inode; dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name); - down(&inode->i_sem); - nfsd_dosync(filp, filp->f_dentry, filp->f_op); - up(&inode->i_sem); + mutex_lock(&inode->i_mutex); + err=nfsd_dosync(filp, filp->f_dentry, filp->f_op); + mutex_unlock(&inode->i_mutex); + + return err; } void @@ -874,6 +868,16 @@ out: return err; } +static void kill_suid(struct dentry *dentry) +{ + struct iattr ia; + ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID; + + mutex_lock(&dentry->d_inode->i_mutex); + notify_change(dentry, &ia); + mutex_unlock(&dentry->d_inode->i_mutex); +} + static inline int nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, @@ -927,14 +931,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, } /* clear setuid/setgid flag after write */ - if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) { - struct iattr ia; - ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID; - - down(&inode->i_sem); - notify_change(dentry, &ia); - up(&inode->i_sem); - } + if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) + kill_suid(dentry); if (err >= 0 && stable) { static ino_t last_ino; @@ -962,7 +960,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (inode->i_state & I_DIRTY) { dprintk("nfsd: write sync %d\n", current->pid); - nfsd_sync(file); + err=nfsd_sync(file); } #if 0 wake_up(&inode->i_wait); @@ -1066,7 +1064,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; if (EX_ISSYNC(fhp->fh_export)) { if (file->f_op && file->f_op->fsync) { - nfsd_sync(file); + err = nfsd_sync(file); } else { err = nfserr_notsupp; } @@ -1874,39 +1872,25 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type) ssize_t size; struct posix_acl *acl; - if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr) + if (!IS_POSIXACL(inode)) + return ERR_PTR(-EOPNOTSUPP); + + switch (type) { + case ACL_TYPE_ACCESS: + name = POSIX_ACL_XATTR_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name = POSIX_ACL_XATTR_DEFAULT; + break; + default: return ERR_PTR(-EOPNOTSUPP); - switch(type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - return ERR_PTR(-EOPNOTSUPP); } - size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0); + size = nfsd_getxattr(fhp->fh_dentry, name, &value); + if (size < 0) + return ERR_PTR(size); - if (size < 0) { - acl = ERR_PTR(size); - goto getout; - } else if (size > 0) { - value = kmalloc(size, GFP_KERNEL); - if (!value) { - acl = ERR_PTR(-ENOMEM); - goto getout; - } - size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size); - if (size < 0) { - acl = ERR_PTR(size); - goto getout; - } - } acl = posix_acl_from_xattr(value, size); - -getout: kfree(value); return acl; } @@ -1947,16 +1931,13 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) } else size = 0; - if (!fhp->fh_locked) - fh_lock(fhp); /* unlocking is done automatically */ if (size) - error = inode->i_op->setxattr(fhp->fh_dentry, name, - value, size, 0); + error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); else { if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) error = 0; else { - error = inode->i_op->removexattr(fhp->fh_dentry, name); + error = vfs_removexattr(fhp->fh_dentry, name); if (error == -ENODATA) error = 0; } diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index eda056bac256..9480a0526cd3 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1532,7 +1532,7 @@ int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, * NOTE to self: No changes in the attribute list are required to move from * a resident to a non-resident attribute. * - * Locking: - The caller must hold i_sem on the inode. + * Locking: - The caller must hold i_mutex on the inode. */ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size) { @@ -1728,7 +1728,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size) /* * This needs to be last since the address space operations ->readpage * and ->writepage can run concurrently with us as they are not - * serialized on i_sem. Note, we are not allowed to fail once we flip + * serialized on i_mutex. Note, we are not allowed to fail once we flip * this switch, which is another reason to do this last. */ NInoSetNonResident(ni); diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 795c3d1930f5..b0690d4c8906 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -69,7 +69,7 @@ ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'), * work but we don't care for how quickly one can access them. This also fixes * the dcache aliasing issues. * - * Locking: - Caller must hold i_sem on the directory. + * Locking: - Caller must hold i_mutex on the directory. * - Each page cache page in the index allocation mapping must be * locked whilst being accessed otherwise we may find a corrupt * page due to it being under ->writepage at the moment which @@ -1085,11 +1085,11 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, * While this will return the names in random order this doesn't matter for * ->readdir but OTOH results in a faster ->readdir. * - * VFS calls ->readdir without BKL but with i_sem held. This protects the VFS + * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS * parts (e.g. ->f_pos and ->i_size, and it also protects against directory * modifications). * - * Locking: - Caller must hold i_sem on the directory. + * Locking: - Caller must hold i_mutex on the directory. * - Each page cache page in the index allocation mapping must be * locked whilst being accessed otherwise we may find a corrupt * page due to it being under ->writepage at the moment which @@ -1520,7 +1520,7 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp) * Note: In the past @filp could be NULL so we ignore it as we don't need it * anyway. * - * Locking: Caller must hold i_sem on the inode. + * Locking: Caller must hold i_mutex on the inode. * * TODO: We should probably also write all attribute/index inodes associated * with this inode but since we have no simple way of getting to them we ignore diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 727533891813..fb413d3d8618 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -106,7 +106,7 @@ static int ntfs_file_open(struct inode *vi, struct file *filp) * this is the case, the necessary zeroing will also have happened and that all * metadata is self-consistent. * - * Locking: i_sem on the vfs inode corrseponsind to the ntfs inode @ni must be + * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be * held by the caller. */ static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size, @@ -473,7 +473,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh) * @bytes: number of bytes to be written * * This is called for non-resident attributes from ntfs_file_buffered_write() - * with i_sem held on the inode (@pages[0]->mapping->host). There are + * with i_mutex held on the inode (@pages[0]->mapping->host). There are * @nr_pages pages in @pages which are locked but not kmap()ped. The source * data has not yet been copied into the @pages. * @@ -1637,7 +1637,7 @@ err_out: * @pos: byte position in file at which the write begins * @bytes: number of bytes to be written * - * This is called from ntfs_file_buffered_write() with i_sem held on the inode + * This is called from ntfs_file_buffered_write() with i_mutex held on the inode * (@pages[0]->mapping->host). There are @nr_pages pages in @pages which are * locked but not kmap()ped. The source data has already been copied into the * @page. ntfs_prepare_pages_for_non_resident_write() has been called before @@ -1814,7 +1814,7 @@ err_out: /** * ntfs_file_buffered_write - * - * Locking: The vfs is holding ->i_sem on the inode. + * Locking: The vfs is holding ->i_mutex on the inode. */ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, @@ -2173,7 +2173,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, err = remove_suid(file->f_dentry); if (err) goto out; - inode_update_time(inode, 1); + file_update_time(file); written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, count); out: @@ -2196,9 +2196,9 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf, BUG_ON(iocb->ki_pos != pos); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { int err = sync_page_range(inode, mapping, pos, ret); if (err < 0) @@ -2221,12 +2221,12 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov, struct kiocb kiocb; ssize_t ret; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); init_sync_kiocb(&kiocb, file); ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); if (ret == -EIOCBQUEUED) ret = wait_on_sync_kiocb(&kiocb); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { int err = sync_page_range(inode, mapping, *ppos - ret, ret); if (err < 0) @@ -2269,7 +2269,7 @@ static ssize_t ntfs_file_write(struct file *file, const char __user *buf, * Note: In the past @filp could be NULL so we ignore it as we don't need it * anyway. * - * Locking: Caller must hold i_sem on the inode. + * Locking: Caller must hold i_mutex on the inode. * * TODO: We should probably also write all attribute/index inodes associated * with this inode but since we have no simple way of getting to them we ignore diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index 8f2d5727546f..9f5427c2d105 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c @@ -32,7 +32,7 @@ * Allocate a new index context, initialize it with @idx_ni and return it. * Return NULL if allocation failed. * - * Locking: Caller must hold i_sem on the index inode. + * Locking: Caller must hold i_mutex on the index inode. */ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni) { @@ -50,7 +50,7 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni) * * Release the index context @ictx, releasing all associated resources. * - * Locking: Caller must hold i_sem on the index inode. + * Locking: Caller must hold i_mutex on the index inode. */ void ntfs_index_ctx_put(ntfs_index_context *ictx) { @@ -106,7 +106,7 @@ void ntfs_index_ctx_put(ntfs_index_context *ictx) * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to * ensure that the changes are written to disk. * - * Locking: - Caller must hold i_sem on the index inode. + * Locking: - Caller must hold i_mutex on the index inode. * - Each page cache page in the index allocation mapping must be * locked whilst being accessed otherwise we may find a corrupt * page due to it being under ->writepage at the moment which diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index b24f4c4b2c5c..ea1bd3feea1b 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2125,13 +2125,13 @@ void ntfs_put_inode(struct inode *vi) ntfs_inode *ni = NTFS_I(vi); if (NInoIndexAllocPresent(ni)) { struct inode *bvi = NULL; - down(&vi->i_sem); + mutex_lock(&vi->i_mutex); if (atomic_read(&vi->i_count) == 2) { bvi = ni->itype.index.bmp_ino; if (bvi) ni->itype.index.bmp_ino = NULL; } - up(&vi->i_sem); + mutex_unlock(&vi->i_mutex); if (bvi) iput(bvi); } @@ -2311,7 +2311,7 @@ static const char *es = " Leaving inconsistent metadata. Unmount and run " * * Returns 0 on success or -errno on error. * - * Called with ->i_sem held. In all but one case ->i_alloc_sem is held for + * Called with ->i_mutex held. In all but one case ->i_alloc_sem is held for * writing. The only case in the kernel where ->i_alloc_sem is not held is * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called * with the current i_size as the offset. The analogous place in NTFS is in @@ -2767,7 +2767,25 @@ unm_done: up_write(&ni->runlist.lock); done: /* Update the mtime and ctime on the base inode. */ - inode_update_time(VFS_I(base_ni), 1); + /* normally ->truncate shouldn't update ctime or mtime, + * but ntfs did before so it got a copy & paste version + * of file_update_time. one day someone should fix this + * for real. + */ + if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) { + struct timespec now = current_fs_time(VFS_I(base_ni)->i_sb); + int sync_it = 0; + + if (!timespec_equal(&VFS_I(base_ni)->i_mtime, &now) || + !timespec_equal(&VFS_I(base_ni)->i_ctime, &now)) + sync_it = 1; + VFS_I(base_ni)->i_mtime = now; + VFS_I(base_ni)->i_ctime = now; + + if (sync_it) + mark_inode_dirty_sync(VFS_I(base_ni)); + } + if (likely(!err)) { NInoClearTruncateFailed(ni); ntfs_debug("Done."); @@ -2831,7 +2849,7 @@ void ntfs_truncate_vfs(struct inode *vi) { * We also abort all changes of user, group, and mode as we do not implement * the NTFS ACLs yet. * - * Called with ->i_sem held. For the ATTR_SIZE (i.e. ->truncate) case, also + * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also * called with ->i_alloc_sem held for writing. * * Basically this is a copy of generic notify_change() and inode_setattr() diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 351dbc3b6e40..5ea9eb93af62 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -96,7 +96,7 @@ * name. We then convert the name to the current NLS code page, and proceed * searching for a dentry with this name, etc, as in case 2), above. * - * Locking: Caller must hold i_sem on the directory. + * Locking: Caller must hold i_mutex on the directory. */ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, struct nameidata *nd) @@ -254,7 +254,7 @@ handle_name: nls_name.hash = full_name_hash(nls_name.name, nls_name.len); /* - * Note: No need for dent->d_lock lock as i_sem is held on the + * Note: No need for dent->d_lock lock as i_mutex is held on the * parent inode. */ @@ -374,7 +374,7 @@ struct inode_operations ntfs_dir_inode_ops = { * The code is based on the ext3 ->get_parent() implementation found in * fs/ext3/namei.c::ext3_get_parent(). * - * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_sem down. + * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_mutex down. * * Return the dentry of the parent directory on success or the error code on * error (IS_ERR() is true). diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c index 833df2a4e9fb..d0ef4182147b 100644 --- a/fs/ntfs/quota.c +++ b/fs/ntfs/quota.c @@ -48,7 +48,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol) ntfs_error(vol->sb, "Quota inodes are not open."); return FALSE; } - down(&vol->quota_q_ino->i_sem); + mutex_lock(&vol->quota_q_ino->i_mutex); ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino)); if (!ictx) { ntfs_error(vol->sb, "Failed to get index context."); @@ -98,7 +98,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol) ntfs_index_entry_mark_dirty(ictx); set_done: ntfs_index_ctx_put(ictx); - up(&vol->quota_q_ino->i_sem); + mutex_unlock(&vol->quota_q_ino->i_mutex); /* * We set the flag so we do not try to mark the quotas out of date * again on remount. @@ -110,7 +110,7 @@ done: err_out: if (ictx) ntfs_index_ctx_put(ictx); - up(&vol->quota_q_ino->i_sem); + mutex_unlock(&vol->quota_q_ino->i_mutex); return FALSE; } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 6c16db9e1a8a..c3a3f1a8310b 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -443,8 +443,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) ntfs_debug("Entering with remount options string: %s", opt); #ifndef NTFS_RW - /* For read-only compiled driver, enforce all read-only flags. */ - *flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + /* For read-only compiled driver, enforce read-only flag. */ + *flags |= MS_RDONLY; #else /* NTFS_RW */ /* * For the read-write compiled driver, if we are remounting read-write, @@ -1213,10 +1213,10 @@ static int check_windows_hibernation_status(ntfs_volume *vol) * Find the inode number for the hibernation file by looking up the * filename hiberfil.sys in the root directory. */ - down(&vol->root_ino->i_sem); + mutex_lock(&vol->root_ino->i_mutex); mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12, &name); - up(&vol->root_ino->i_sem); + mutex_unlock(&vol->root_ino->i_mutex); if (IS_ERR_MREF(mref)) { ret = MREF_ERR(mref); /* If the file does not exist, Windows is not hibernated. */ @@ -1307,10 +1307,10 @@ static BOOL load_and_init_quota(ntfs_volume *vol) * Find the inode number for the quota file by looking up the filename * $Quota in the extended system files directory $Extend. */ - down(&vol->extend_ino->i_sem); + mutex_lock(&vol->extend_ino->i_mutex); mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6, &name); - up(&vol->extend_ino->i_sem); + mutex_unlock(&vol->extend_ino->i_mutex); if (IS_ERR_MREF(mref)) { /* * If the file does not exist, quotas are disabled and have @@ -1390,10 +1390,10 @@ static BOOL load_and_init_usnjrnl(ntfs_volume *vol) * Find the inode number for the transaction log file by looking up the * filename $UsnJrnl in the extended system files directory $Extend. */ - down(&vol->extend_ino->i_sem); + mutex_lock(&vol->extend_ino->i_mutex); mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8, &name); - up(&vol->extend_ino->i_sem); + mutex_unlock(&vol->extend_ino->i_mutex); if (IS_ERR_MREF(mref)) { /* * If the file does not exist, transaction logging is disabled, @@ -1721,7 +1721,7 @@ static BOOL load_system_files(ntfs_volume *vol) es3); goto iput_mirr_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", !vol->mftmirr_ino ? es1 : es2, es3); } else @@ -1837,7 +1837,7 @@ get_ctx_vol_failed: es1, es2); goto iput_vol_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -1874,7 +1874,7 @@ get_ctx_vol_failed: } goto iput_logfile_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -1919,7 +1919,7 @@ get_ctx_vol_failed: es1, es2); goto iput_root_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -1943,7 +1943,7 @@ get_ctx_vol_failed: goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; /* * Do not set NVolErrors() because ntfs_remount() might manage * to set the dirty flag in which case all would be well. @@ -1970,7 +1970,7 @@ get_ctx_vol_failed: goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; NVolSetErrors(vol); } #endif @@ -1989,7 +1989,7 @@ get_ctx_vol_failed: goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; NVolSetErrors(vol); } #endif /* NTFS_RW */ @@ -2030,7 +2030,7 @@ get_ctx_vol_failed: es1, es2); goto iput_quota_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2053,7 +2053,7 @@ get_ctx_vol_failed: goto iput_quota_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; NVolSetErrors(vol); } /* @@ -2074,7 +2074,7 @@ get_ctx_vol_failed: es1, es2); goto iput_usnjrnl_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2097,7 +2097,7 @@ get_ctx_vol_failed: goto iput_usnjrnl_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; NVolSetErrors(vol); } #endif /* NTFS_RW */ @@ -2312,9 +2312,9 @@ static void ntfs_put_super(struct super_block *sb) if (!list_empty(&sb->s_dirty)) { const char *s1, *s2; - down(&vol->mft_ino->i_sem); + mutex_lock(&vol->mft_ino->i_mutex); truncate_inode_pages(vol->mft_ino->i_mapping, 0); - up(&vol->mft_ino->i_sem); + mutex_unlock(&vol->mft_ino->i_mutex); write_inode_now(vol->mft_ino, 1); if (!list_empty(&sb->s_dirty)) { static const char *_s1 = "inodes"; @@ -2689,7 +2689,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) ntfs_debug("Entering."); #ifndef NTFS_RW - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; #endif /* ! NTFS_RW */ /* Allocate a new ntfs_volume and place it in sb->s_fs_info. */ sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS); diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 465f797451ee..6b9812db3779 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -966,7 +966,7 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb, mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk, num_clusters); - BUG_ON(!down_trylock(&tl_inode->i_sem)); + BUG_ON(mutex_trylock(&tl_inode->i_mutex)); start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); @@ -1108,7 +1108,7 @@ bail: return status; } -/* Expects you to already be holding tl_inode->i_sem */ +/* Expects you to already be holding tl_inode->i_mutex */ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) { int status; @@ -1123,7 +1123,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) mlog_entry_void(); - BUG_ON(!down_trylock(&tl_inode->i_sem)); + BUG_ON(mutex_trylock(&tl_inode->i_mutex)); di = (struct ocfs2_dinode *) tl_bh->b_data; tl = &di->id2.i_dealloc; @@ -1198,9 +1198,9 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb) int status; struct inode *tl_inode = osb->osb_tl_inode; - down(&tl_inode->i_sem); + mutex_lock(&tl_inode->i_mutex); status = __ocfs2_flush_truncate_log(osb); - up(&tl_inode->i_sem); + mutex_unlock(&tl_inode->i_mutex); return status; } @@ -1363,7 +1363,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs, tl_copy->i_blkno); - down(&tl_inode->i_sem); + mutex_lock(&tl_inode->i_mutex); for(i = 0; i < num_recs; i++) { if (ocfs2_truncate_log_needs_flush(osb)) { status = __ocfs2_flush_truncate_log(osb); @@ -1395,7 +1395,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, } bail_up: - up(&tl_inode->i_sem); + mutex_unlock(&tl_inode->i_mutex); mlog_exit(status); return status; @@ -1840,7 +1840,7 @@ start: mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del); - down(&tl_inode->i_sem); + mutex_lock(&tl_inode->i_mutex); tl_sem = 1; /* ocfs2_truncate_log_needs_flush guarantees us at least one * record is free for use. If there isn't any, we flush to get @@ -1875,7 +1875,7 @@ start: goto bail; } - up(&tl_inode->i_sem); + mutex_unlock(&tl_inode->i_mutex); tl_sem = 0; ocfs2_commit_trans(handle); @@ -1890,7 +1890,7 @@ bail: ocfs2_schedule_truncate_log_flush(osb, 1); if (tl_sem) - up(&tl_inode->i_sem); + mutex_unlock(&tl_inode->i_mutex); if (handle) ocfs2_commit_trans(handle); @@ -1994,7 +1994,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, goto bail; } - down(&ext_alloc_inode->i_sem); + mutex_lock(&ext_alloc_inode->i_mutex); (*tc)->tc_ext_alloc_inode = ext_alloc_inode; status = ocfs2_meta_lock(ext_alloc_inode, @@ -2026,7 +2026,7 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) if (tc->tc_ext_alloc_locked) ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); - up(&tc->tc_ext_alloc_inode->i_sem); + mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex); iput(tc->tc_ext_alloc_inode); } diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index f5ef5ea61a05..e8c56a3d9c64 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -212,11 +212,10 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; mlog(ML_ENTRY, "ENTRY:\n"); \ } while (0) -/* We disable this for old compilers since they don't have support for - * __builtin_types_compatible_p. +/* + * We disable this for sparse. */ -#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) && \ - !defined(__CHECKER__) +#if !defined(__CHECKER__) #define mlog_exit(st) do { \ if (__builtin_types_compatible_p(typeof(st), unsigned long)) \ mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st)); \ diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 5fd60c105913..cf7828f23361 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -653,7 +653,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g struct config_group *o2hb_group = NULL, *ret = NULL; void *defs = NULL; - /* this runs under the parent dir's i_sem; there can be only + /* this runs under the parent dir's i_mutex; there can be only * one caller in here at a time */ if (o2nm_single_cluster) goto out; /* ENOSPC */ diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 856e20ae8263..57158fa75d91 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -202,7 +202,7 @@ bail: } /* - * NOTE: this should always be called with parent dir i_sem taken. + * NOTE: this should always be called with parent dir i_mutex taken. */ int ocfs2_find_files_on_disk(const char *name, int namelen, @@ -245,7 +245,7 @@ leave: * Return 0 if the name does not exist * Return -EEXIST if the directory contains the name * - * Callers should have i_sem + a cluster lock on dir + * Callers should have i_mutex + a cluster lock on dir */ int ocfs2_check_dir_for_entry(struct inode *dir, const char *name, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 72ae9e3306f4..ca5f9f90d794 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -492,7 +492,7 @@ restart_all: } /* blocks peope in read/write from reading our allocation - * until we're done changing it. We depend on i_sem to block + * until we're done changing it. We depend on i_mutex to block * other extend/truncate calls while we're here. Ordering wrt * start_trans is important here -- always do it before! */ down_write(&OCFS2_I(inode)->ip_alloc_sem); @@ -958,8 +958,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, filp->f_flags &= ~O_DIRECT; #endif - down(&inode->i_sem); - /* to match setattr's i_sem -> i_alloc_sem -> rw_lock ordering */ + mutex_lock(&inode->i_mutex); + /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ if (filp->f_flags & O_DIRECT) { have_alloc_sem = 1; down_read(&inode->i_alloc_sem); @@ -1123,7 +1123,7 @@ out: up_read(&inode->i_alloc_sem); if (rw_level != -1) ocfs2_rw_unlock(inode, rw_level); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); mlog_exit(ret); return ret; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index a91ba4dec936..d4ecc0627716 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -485,10 +485,10 @@ static int ocfs2_remove_inode(struct inode *inode, goto bail; } - down(&inode_alloc_inode->i_sem); + mutex_lock(&inode_alloc_inode->i_mutex); status = ocfs2_meta_lock(inode_alloc_inode, NULL, &inode_alloc_bh, 1); if (status < 0) { - up(&inode_alloc_inode->i_sem); + mutex_unlock(&inode_alloc_inode->i_mutex); mlog_errno(status); goto bail; @@ -536,7 +536,7 @@ bail_commit: ocfs2_commit_trans(handle); bail_unlock: ocfs2_meta_unlock(inode_alloc_inode, 1); - up(&inode_alloc_inode->i_sem); + mutex_unlock(&inode_alloc_inode->i_mutex); brelse(inode_alloc_bh); bail: iput(inode_alloc_inode); @@ -567,10 +567,10 @@ static int ocfs2_wipe_inode(struct inode *inode, /* Lock the orphan dir. The lock will be held for the entire * delete_inode operation. We do this now to avoid races with * recovery completion on other nodes. */ - down(&orphan_dir_inode->i_sem); + mutex_lock(&orphan_dir_inode->i_mutex); status = ocfs2_meta_lock(orphan_dir_inode, NULL, &orphan_dir_bh, 1); if (status < 0) { - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); mlog_errno(status); goto bail; @@ -593,7 +593,7 @@ static int ocfs2_wipe_inode(struct inode *inode, bail_unlock_dir: ocfs2_meta_unlock(orphan_dir_inode, 1); - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); brelse(orphan_dir_bh); bail: iput(orphan_dir_inode); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 04428042e5e5..303c8d96457f 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -216,7 +216,7 @@ void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle, atomic_inc(&inode->i_count); /* we're obviously changing it... */ - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); /* sanity check */ BUG_ON(OCFS2_I(inode)->ip_handle); @@ -241,7 +241,7 @@ static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle) OCFS2_I(inode)->ip_handle = NULL; list_del_init(&OCFS2_I(inode)->ip_handle_list); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); iput(inode); } } @@ -1433,10 +1433,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, goto out; } - down(&orphan_dir_inode->i_sem); + mutex_lock(&orphan_dir_inode->i_mutex); status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0); if (status < 0) { - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); mlog_errno(status); goto out; } @@ -1451,7 +1451,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, if (!bh) status = -EINVAL; if (status < 0) { - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); if (bh) brelse(bh); mlog_errno(status); @@ -1465,7 +1465,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, if (!ocfs2_check_dir_entry(orphan_dir_inode, de, bh, local)) { - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); status = -EINVAL; mlog_errno(status); brelse(bh); @@ -1509,7 +1509,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, } brelse(bh); } - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); ocfs2_meta_unlock(orphan_dir_inode, 0); have_disk_lock = 0; diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index fe373a2101d9..149b35181666 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -334,7 +334,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, goto bail; } - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &alloc_bh, 0, inode); @@ -367,7 +367,7 @@ bail: brelse(alloc_bh); if (inode) { - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); iput(inode); } @@ -446,7 +446,7 @@ bail: /* * make sure we've got at least bitswanted contiguous bits in the - * local alloc. You lose them when you drop i_sem. + * local alloc. You lose them when you drop i_mutex. * * We will add ourselves to the transaction passed in, but may start * our own in order to shift windows. diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index afdeec4b0eef..843cf9ddefe8 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -80,12 +80,8 @@ static struct vm_operations_struct ocfs2_file_vm_ops = { .nopage = ocfs2_nopage, }; -int ocfs2_mmap(struct file *file, - struct vm_area_struct *vma) +int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) { - struct address_space *mapping = file->f_dentry->d_inode->i_mapping; - struct inode *inode = mapping->host; - /* We don't want to support shared writable mappings yet. */ if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { @@ -95,7 +91,7 @@ int ocfs2_mmap(struct file *file, return -EINVAL; } - update_atime(inode); + file_accessed(file); vma->vm_ops = &ocfs2_file_vm_ops; return 0; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 48bf7f0ce544..364d64bd5f10 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -169,7 +169,7 @@ static match_table_t tokens = { */ static void ocfs2_write_super(struct super_block *sb) { - if (down_trylock(&sb->s_lock) == 0) + if (mutex_trylock(&sb->s_lock) != 0) BUG(); sb->s_dirt = 0; } diff --git a/fs/open.c b/fs/open.c index f53a5b9ffb7d..a3b3a9b5c2ff 100644 --- a/fs/open.c +++ b/fs/open.c @@ -194,7 +194,8 @@ out: return error; } -int do_truncate(struct dentry *dentry, loff_t length, struct file *filp) +int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, + struct file *filp) { int err; struct iattr newattrs; @@ -204,19 +205,19 @@ int do_truncate(struct dentry *dentry, loff_t length, struct file *filp) return -EINVAL; newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + newattrs.ia_valid = ATTR_SIZE | time_attrs; if (filp) { newattrs.ia_file = filp; newattrs.ia_valid |= ATTR_FILE; } - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); err = notify_change(dentry, &newattrs); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return err; } -static inline long do_sys_truncate(const char __user * path, loff_t length) +static long do_sys_truncate(const char __user * path, loff_t length) { struct nameidata nd; struct inode * inode; @@ -266,7 +267,7 @@ static inline long do_sys_truncate(const char __user * path, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) { DQUOT_INIT(inode); - error = do_truncate(nd.dentry, length, NULL); + error = do_truncate(nd.dentry, length, 0, NULL); } put_write_access(inode); @@ -282,7 +283,7 @@ asmlinkage long sys_truncate(const char __user * path, unsigned long length) return do_sys_truncate(path, (long)length); } -static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small) +static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode * inode; struct dentry *dentry; @@ -318,7 +319,7 @@ static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small) error = locks_verify_truncate(inode, file, length); if (!error) - error = do_truncate(dentry, length, file); + error = do_truncate(dentry, length, 0, file); out_putf: fput(file); out: @@ -397,9 +398,9 @@ asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times) (error = vfs_permission(&nd, MAY_WRITE)) != 0) goto dput_and_out; } - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); error = notify_change(nd.dentry, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); dput_and_out: path_release(&nd); out: @@ -450,9 +451,9 @@ long do_utimes(char __user * filename, struct timeval * times) (error = vfs_permission(&nd, MAY_WRITE)) != 0) goto dput_and_out; } - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); error = notify_change(nd.dentry, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); dput_and_out: path_release(&nd); out: @@ -619,13 +620,13 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) err = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out_putf; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; err = notify_change(dentry, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out_putf: fput(file); @@ -653,13 +654,13 @@ asmlinkage long sys_chmod(const char __user * filename, mode_t mode) if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto dput_and_out; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; error = notify_change(nd.dentry, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); dput_and_out: path_release(&nd); @@ -695,9 +696,9 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group) } if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); error = notify_change(dentry, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out: return error; } @@ -970,7 +971,7 @@ out: EXPORT_SYMBOL(get_unused_fd); -static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) +static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files); __FD_CLR(fd, fdt->open_fds); diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index 656bc43431b9..7490cc9208b3 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig @@ -21,26 +21,30 @@ config ACORN_PARTITION Support hard disks partitioned under Acorn operating systems. config ACORN_PARTITION_CUMANA - bool "Cumana partition support" if PARTITION_ADVANCED && ACORN_PARTITION + bool "Cumana partition support" if PARTITION_ADVANCED default y if ARCH_ACORN + depends on ACORN_PARTITION help Say Y here if you would like to use hard disks under Linux which were partitioned using the Cumana interface on Acorn machines. config ACORN_PARTITION_EESOX - bool "EESOX partition support" if PARTITION_ADVANCED && ACORN_PARTITION + bool "EESOX partition support" if PARTITION_ADVANCED default y if ARCH_ACORN + depends on ACORN_PARTITION config ACORN_PARTITION_ICS - bool "ICS partition support" if PARTITION_ADVANCED && ACORN_PARTITION + bool "ICS partition support" if PARTITION_ADVANCED default y if ARCH_ACORN + depends on ACORN_PARTITION help Say Y here if you would like to use hard disks under Linux which were partitioned using the ICS interface on Acorn machines. config ACORN_PARTITION_ADFS - bool "Native filecore partition support" if PARTITION_ADVANCED && ACORN_PARTITION + bool "Native filecore partition support" if PARTITION_ADVANCED default y if ARCH_ACORN + depends on ACORN_PARTITION help The Acorn Disc Filing System is the standard file system of the RiscOS operating system which runs on Acorn's ARM-based Risc PC @@ -48,15 +52,17 @@ config ACORN_PARTITION_ADFS `Y' here, Linux will support disk partitions created under ADFS. config ACORN_PARTITION_POWERTEC - bool "PowerTec partition support" if PARTITION_ADVANCED && ACORN_PARTITION + bool "PowerTec partition support" if PARTITION_ADVANCED default y if ARCH_ACORN + depends on ACORN_PARTITION help Support reading partition tables created on Acorn machines using the PowerTec SCSI drive. config ACORN_PARTITION_RISCIX - bool "RISCiX partition support" if PARTITION_ADVANCED && ACORN_PARTITION + bool "RISCiX partition support" if PARTITION_ADVANCED default y if ARCH_ACORN + depends on ACORN_PARTITION help Once upon a time, there was a native Unix port for the Acorn series of machines called RISCiX. If you say 'Y' here, Linux will be able @@ -85,7 +91,7 @@ config ATARI_PARTITION config IBM_PARTITION bool "IBM disk label and partition support" - depends on PARTITION_ADVANCED && ARCH_S390 + depends on PARTITION_ADVANCED && S390 help Say Y here if you would like to be able to read the hard disk partition table format used by IBM DASD disks operating under CMS. @@ -224,5 +230,3 @@ config EFI_PARTITION Say Y here if you would like to use hard disks under Linux which were partitioned using EFI GPT. Presently only useful on the IA-64 platform. - -# define_bool CONFIG_ACORN_PARTITION_CUMANA y diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index 6327bcb2d73d..78010ad60e47 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c @@ -56,7 +56,10 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) struct hd_geometry *geo; char type[5] = {0,}; char name[7] = {0,}; - struct vtoc_volume_label *vlabel; + union label_t { + struct vtoc_volume_label vol; + struct vtoc_cms_label cms; + } *label; unsigned char *data; Sector sect; @@ -64,9 +67,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) goto out_noinfo; if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL) goto out_nogeo; - if ((vlabel = kmalloc(sizeof(struct vtoc_volume_label), - GFP_KERNEL)) == NULL) - goto out_novlab; + if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL) + goto out_nolab; if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 || ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0) @@ -87,7 +89,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) strncpy(name, data + 8, 6); else strncpy(name, data + 4, 6); - memcpy (vlabel, data, sizeof(struct vtoc_volume_label)); + memcpy(label, data, sizeof(union label_t)); put_dev_sector(sect); EBCASC(type, 4); @@ -100,14 +102,12 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) /* * VM style CMS1 labeled disk */ - int *label = (int *) vlabel; - - if (label[13] != 0) { + if (label->cms.disk_offset != 0) { printk("CMS1/%8s(MDSK):", name); /* disk is reserved minidisk */ - blocksize = label[3]; - offset = label[13]; - size = (label[7] - 1)*(blocksize >> 9); + blocksize = label->cms.block_size; + offset = label->cms.disk_offset; + size = (label->cms.block_count - 1) * (blocksize >> 9); } else { printk("CMS1/%8s:", name); offset = (info->label_block + 1); @@ -126,7 +126,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) printk("VOL1/%8s:", name); /* get block number and read then go through format1 labels */ - blk = cchhb2blk(&vlabel->vtoc, geo) + 1; + blk = cchhb2blk(&label->vol.vtoc, geo) + 1; counter = 0; while ((data = read_dev_sector(bdev, blk*(blocksize/512), §)) != NULL) { @@ -174,7 +174,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) } printk("\n"); - kfree(vlabel); + kfree(label); kfree(geo); kfree(info); return 1; @@ -182,8 +182,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) out_readerr: out_badsect: out_noioctl: - kfree(vlabel); -out_novlab: + kfree(label); +out_nolab: kfree(geo); out_nogeo: kfree(info); diff --git a/fs/pipe.c b/fs/pipe.c index 66aa0b938d6a..eef0f29e86ef 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -44,10 +44,10 @@ void pipe_wait(struct inode * inode) * is considered a noninteractive wait: */ prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE); - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); schedule(); finish_wait(PIPE_WAIT(*inode), &wait); - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); } static inline int @@ -136,7 +136,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov, do_wakeup = 0; ret = 0; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); info = inode->i_pipe; for (;;) { int bufs = info->nrbufs; @@ -200,7 +200,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov, } pipe_wait(inode); } - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); /* Signal writers asynchronously that there is more room. */ if (do_wakeup) { wake_up_interruptible(PIPE_WAIT(*inode)); @@ -237,7 +237,7 @@ pipe_writev(struct file *filp, const struct iovec *_iov, do_wakeup = 0; ret = 0; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); info = inode->i_pipe; if (!PIPE_READERS(*inode)) { @@ -341,13 +341,13 @@ pipe_writev(struct file *filp, const struct iovec *_iov, PIPE_WAITING_WRITERS(*inode)--; } out: - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); if (do_wakeup) { wake_up_interruptible(PIPE_WAIT(*inode)); kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); } if (ret > 0) - inode_update_time(inode, 1); /* mtime and ctime */ + file_update_time(filp); return ret; } @@ -381,7 +381,7 @@ pipe_ioctl(struct inode *pino, struct file *filp, switch (cmd) { case FIONREAD: - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); info = inode->i_pipe; count = 0; buf = info->curbuf; @@ -390,7 +390,7 @@ pipe_ioctl(struct inode *pino, struct file *filp, count += info->bufs[buf].len; buf = (buf+1) & (PIPE_BUFFERS-1); } - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return put_user(count, (int __user *)arg); default: return -EINVAL; @@ -433,7 +433,7 @@ pipe_poll(struct file *filp, poll_table *wait) static int pipe_release(struct inode *inode, int decr, int decw) { - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); PIPE_READERS(*inode) -= decr; PIPE_WRITERS(*inode) -= decw; if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) { @@ -443,7 +443,7 @@ pipe_release(struct inode *inode, int decr, int decw) kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); } - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } @@ -454,9 +454,9 @@ pipe_read_fasync(int fd, struct file *filp, int on) struct inode *inode = filp->f_dentry->d_inode; int retval; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); if (retval < 0) return retval; @@ -471,9 +471,9 @@ pipe_write_fasync(int fd, struct file *filp, int on) struct inode *inode = filp->f_dentry->d_inode; int retval; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); if (retval < 0) return retval; @@ -488,14 +488,14 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on) struct inode *inode = filp->f_dentry->d_inode; int retval; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); if (retval >= 0) retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); if (retval < 0) return retval; @@ -534,9 +534,9 @@ pipe_read_open(struct inode *inode, struct file *filp) { /* We could have perhaps used atomic_t, but this and friends below are the only places. So it doesn't seem worthwhile. */ - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); PIPE_READERS(*inode)++; - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } @@ -544,9 +544,9 @@ pipe_read_open(struct inode *inode, struct file *filp) static int pipe_write_open(struct inode *inode, struct file *filp) { - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); PIPE_WRITERS(*inode)++; - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } @@ -554,12 +554,12 @@ pipe_write_open(struct inode *inode, struct file *filp) static int pipe_rdwr_open(struct inode *inode, struct file *filp) { - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); if (filp->f_mode & FMODE_READ) PIPE_READERS(*inode)++; if (filp->f_mode & FMODE_WRITE) PIPE_WRITERS(*inode)++; - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } diff --git a/fs/pnode.c b/fs/pnode.c index aeeec8ba8dd2..f1871f773f64 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -103,7 +103,7 @@ static struct vfsmount *propagation_next(struct vfsmount *m, struct vfsmount *next; struct vfsmount *master = m->mnt_master; - if ( master == origin->mnt_master ) { + if (master == origin->mnt_master) { next = next_peer(m); return ((next == origin) ? NULL : next); } else if (m->mnt_slave.next != &master->mnt_slave_list) diff --git a/fs/proc/array.c b/fs/proc/array.c index 3e1239e4b303..7eb1bd7f800c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -308,7 +308,7 @@ int proc_pid_status(struct task_struct *task, char * buffer) buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); buffer = cpuset_task_status_allowed(task, buffer); -#if defined(CONFIG_ARCH_S390) +#if defined(CONFIG_S390) buffer = task_show_regs(task, buffer); #endif return buffer - orig; @@ -330,7 +330,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) unsigned long min_flt = 0, maj_flt = 0; cputime_t cutime, cstime, utime, stime; unsigned long rsslim = 0; - unsigned long it_real_value = 0; + DEFINE_KTIME(it_real_value); struct task_struct *t; char tcomm[sizeof(task->comm)]; @@ -386,7 +386,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) utime = cputime_add(utime, task->signal->utime); stime = cputime_add(stime, task->signal->stime); } - it_real_value = task->signal->it_real_value; + it_real_value = task->signal->real_timer.expires; } ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; read_unlock(&tasklist_lock); @@ -435,7 +435,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) priority, nice, num_threads, - jiffies_to_clock_t(it_real_value), + (long) ktime_to_clock_t(it_real_value), start_time, vsize, mm ? get_mm_rss(mm) : 0, diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 72b431d0a0a4..20e5c4509a43 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -21,6 +21,8 @@ #include <linux/bitops.h> #include <asm/uaccess.h> +#include "internal.h" + static ssize_t proc_file_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos); static ssize_t proc_file_write(struct file *file, const char __user *buffer, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index e6a818a93f3d..6573f31f1fd9 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -19,7 +19,7 @@ #include <asm/system.h> #include <asm/uaccess.h> -extern void free_proc_entry(struct proc_dir_entry *); +#include "internal.h" static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de) { diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 3e55198f9806..95a1cf32b838 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -37,6 +37,10 @@ extern int proc_tgid_stat(struct task_struct *, char *); extern int proc_pid_status(struct task_struct *, char *); extern int proc_pid_statm(struct task_struct *, char *); +void free_proc_entry(struct proc_dir_entry *de); + +int proc_init_inodecache(void); + static inline struct task_struct *proc_task(struct inode *inode) { return PROC_I(inode)->task; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5b6b0b6038a7..63bf6c00fa0c 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -323,6 +323,7 @@ static struct file_operations proc_modules_operations = { }; #endif +#ifdef CONFIG_SLAB extern struct seq_operations slabinfo_op; extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *); static int slabinfo_open(struct inode *inode, struct file *file) @@ -336,6 +337,7 @@ static struct file_operations proc_slabinfo_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif static int show_stat(struct seq_file *p, void *v) { @@ -600,7 +602,9 @@ void __init proc_misc_init(void) create_seq_entry("partitions", 0, &proc_partitions_operations); create_seq_entry("stat", 0, &proc_stat_operations); create_seq_entry("interrupts", 0, &proc_interrupts_operations); +#ifdef CONFIG_SLAB create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); +#endif create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); diff --git a/fs/proc/root.c b/fs/proc/root.c index aef148f099a2..68896283c8ae 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -18,6 +18,8 @@ #include <linux/bitops.h> #include <linux/smp_lock.h> +#include "internal.h" + struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; #ifdef CONFIG_SYSCTL @@ -36,7 +38,6 @@ static struct file_system_type proc_fs_type = { .kill_sb = kill_anon_super, }; -extern int __init proc_init_inodecache(void); void __init proc_root_init(void) { int err = proc_init_inodecache(); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 50bd5a8f0446..0eaad41f4658 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -390,129 +390,12 @@ struct seq_operations proc_pid_smaps_op = { }; #ifdef CONFIG_NUMA - -struct numa_maps { - unsigned long pages; - unsigned long anon; - unsigned long mapped; - unsigned long mapcount_max; - unsigned long node[MAX_NUMNODES]; -}; - -/* - * Calculate numa node maps for a vma - */ -static struct numa_maps *get_numa_maps(struct vm_area_struct *vma) -{ - int i; - struct page *page; - unsigned long vaddr; - struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL); - - if (!md) - return NULL; - md->pages = 0; - md->anon = 0; - md->mapped = 0; - md->mapcount_max = 0; - for_each_node(i) - md->node[i] =0; - - for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { - page = follow_page(vma, vaddr, 0); - if (page) { - int count = page_mapcount(page); - - if (count) - md->mapped++; - if (count > md->mapcount_max) - md->mapcount_max = count; - md->pages++; - if (PageAnon(page)) - md->anon++; - md->node[page_to_nid(page)]++; - } - cond_resched(); - } - return md; -} - -static int show_numa_map(struct seq_file *m, void *v) -{ - struct task_struct *task = m->private; - struct vm_area_struct *vma = v; - struct mempolicy *pol; - struct numa_maps *md; - struct zone **z; - int n; - int first; - - if (!vma->vm_mm) - return 0; - - md = get_numa_maps(vma); - if (!md) - return 0; - - seq_printf(m, "%08lx", vma->vm_start); - pol = get_vma_policy(task, vma, vma->vm_start); - /* Print policy */ - switch (pol->policy) { - case MPOL_PREFERRED: - seq_printf(m, " prefer=%d", pol->v.preferred_node); - break; - case MPOL_BIND: - seq_printf(m, " bind={"); - first = 1; - for (z = pol->v.zonelist->zones; *z; z++) { - - if (!first) - seq_putc(m, ','); - else - first = 0; - seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id, - (*z)->name); - } - seq_putc(m, '}'); - break; - case MPOL_INTERLEAVE: - seq_printf(m, " interleave={"); - first = 1; - for_each_node(n) { - if (node_isset(n, pol->v.nodes)) { - if (!first) - seq_putc(m,','); - else - first = 0; - seq_printf(m, "%d",n); - } - } - seq_putc(m, '}'); - break; - default: - seq_printf(m," default"); - break; - } - seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu", - md->mapcount_max, md->pages, md->mapped); - if (md->anon) - seq_printf(m," Anon=%lu",md->anon); - - for_each_online_node(n) { - if (md->node[n]) - seq_printf(m, " N%d=%lu", n, md->node[n]); - } - seq_putc(m, '\n'); - kfree(md); - if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; - return 0; -} +extern int show_numa_map(struct seq_file *m, void *v); struct seq_operations proc_pid_numa_maps_op = { - .start = m_start, - .next = m_next, - .stop = m_stop, - .show = show_numa_map + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_numa_map }; #endif diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 3b2e7b69e63a..5378d7c78419 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -35,6 +35,9 @@ static size_t elfcorebuf_sz; /* Total size of vmcore file. */ static u64 vmcore_size; +/* Stores the physical address of elf header of crash image. */ +unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; + struct proc_dir_entry *proc_vmcore = NULL; /* Reads a page from the oldmem device from given offset. */ diff --git a/fs/quota.c b/fs/quota.c index 612e04db4b93..d14d872646d4 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -168,7 +168,7 @@ static void quota_sync_sb(struct super_block *sb, int type) sync_blockdev(sb->s_bdev); /* Now when everything is written we can discard the pagecache so - * that userspace sees the changes. We need i_sem and so we could + * that userspace sees the changes. We need i_mutex and so we could * not do it inside dqonoff_sem. Moreover we need to be carefull * about races with quotaoff() (that is the reason why we have own * reference to inode). */ @@ -184,9 +184,9 @@ static void quota_sync_sb(struct super_block *sb, int type) up(&sb_dqopt(sb)->dqonoff_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (discard[cnt]) { - down(&discard[cnt]->i_sem); + mutex_lock(&discard[cnt]->i_mutex); truncate_inode_pages(&discard[cnt]->i_data, 0); - up(&discard[cnt]->i_sem); + mutex_unlock(&discard[cnt]->i_mutex); iput(discard[cnt]); } } diff --git a/fs/ramfs/Makefile b/fs/ramfs/Makefile index f096f3007091..5a0236e02ee1 100644 --- a/fs/ramfs/Makefile +++ b/fs/ramfs/Makefile @@ -4,4 +4,6 @@ obj-$(CONFIG_RAMFS) += ramfs.o -ramfs-objs := inode.o +file-mmu-y := file-nommu.o +file-mmu-$(CONFIG_MMU) := file-mmu.o +ramfs-objs += inode.o $(file-mmu-y) diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c new file mode 100644 index 000000000000..2115383dcc8d --- /dev/null +++ b/fs/ramfs/file-mmu.c @@ -0,0 +1,57 @@ +/* file-mmu.c: ramfs MMU-based file operations + * + * Resizable simple ram filesystem for Linux. + * + * Copyright (C) 2000 Linus Torvalds. + * 2000 Transmeta Corp. + * + * Usage limits added by David Gibson, Linuxcare Australia. + * This file is released under the GPL. + */ + +/* + * NOTE! This filesystem is probably most useful + * not as a real filesystem, but as an example of + * how virtual filesystems can be written. + * + * It doesn't get much simpler than this. Consider + * that this file implements the full semantics of + * a POSIX-compliant read-write filesystem. + * + * Note in particular how the filesystem does not + * need to implement any data structures of its own + * to keep track of the virtual data: using the VFS + * caches is sufficient. + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/backing-dev.h> +#include <linux/ramfs.h> + +#include <asm/uaccess.h> +#include "internal.h" + +struct address_space_operations ramfs_aops = { + .readpage = simple_readpage, + .prepare_write = simple_prepare_write, + .commit_write = simple_commit_write +}; + +struct file_operations ramfs_file_operations = { + .read = generic_file_read, + .write = generic_file_write, + .mmap = generic_file_mmap, + .fsync = simple_sync_file, + .sendfile = generic_file_sendfile, + .llseek = generic_file_llseek, +}; + +struct inode_operations ramfs_file_inode_operations = { + .getattr = simple_getattr, +}; diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c new file mode 100644 index 000000000000..3f810acd0bfa --- /dev/null +++ b/fs/ramfs/file-nommu.c @@ -0,0 +1,292 @@ +/* file-nommu.c: no-MMU version of ramfs + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/backing-dev.h> +#include <linux/ramfs.h> +#include <linux/quotaops.h> +#include <linux/pagevec.h> +#include <linux/mman.h> + +#include <asm/uaccess.h> +#include "internal.h" + +static int ramfs_nommu_setattr(struct dentry *, struct iattr *); + +struct address_space_operations ramfs_aops = { + .readpage = simple_readpage, + .prepare_write = simple_prepare_write, + .commit_write = simple_commit_write +}; + +struct file_operations ramfs_file_operations = { + .mmap = ramfs_nommu_mmap, + .get_unmapped_area = ramfs_nommu_get_unmapped_area, + .read = generic_file_read, + .write = generic_file_write, + .fsync = simple_sync_file, + .sendfile = generic_file_sendfile, + .llseek = generic_file_llseek, +}; + +struct inode_operations ramfs_file_inode_operations = { + .setattr = ramfs_nommu_setattr, + .getattr = simple_getattr, +}; + +/*****************************************************************************/ +/* + * add a contiguous set of pages into a ramfs inode when it's truncated from + * size 0 on the assumption that it's going to be used for an mmap of shared + * memory + */ +static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) +{ + struct pagevec lru_pvec; + unsigned long npages, xpages, loop, limit; + struct page *pages; + unsigned order; + void *data; + int ret; + + /* make various checks */ + order = get_order(newsize); + if (unlikely(order >= MAX_ORDER)) + goto too_big; + + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && newsize > limit) + goto fsize_exceeded; + + if (newsize > inode->i_sb->s_maxbytes) + goto too_big; + + i_size_write(inode, newsize); + + /* allocate enough contiguous pages to be able to satisfy the + * request */ + pages = alloc_pages(mapping_gfp_mask(inode->i_mapping), order); + if (!pages) + return -ENOMEM; + + /* split the high-order page into an array of single pages */ + xpages = 1UL << order; + npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT; + + for (loop = 0; loop < npages; loop++) + set_page_count(pages + loop, 1); + + /* trim off any pages we don't actually require */ + for (loop = npages; loop < xpages; loop++) + __free_page(pages + loop); + + /* clear the memory we allocated */ + newsize = PAGE_SIZE * npages; + data = page_address(pages); + memset(data, 0, newsize); + + /* attach all the pages to the inode's address space */ + pagevec_init(&lru_pvec, 0); + for (loop = 0; loop < npages; loop++) { + struct page *page = pages + loop; + + ret = add_to_page_cache(page, inode->i_mapping, loop, GFP_KERNEL); + if (ret < 0) + goto add_error; + + if (!pagevec_add(&lru_pvec, page)) + __pagevec_lru_add(&lru_pvec); + + unlock_page(page); + } + + pagevec_lru_add(&lru_pvec); + return 0; + + fsize_exceeded: + send_sig(SIGXFSZ, current, 0); + too_big: + return -EFBIG; + + add_error: + page_cache_release(pages + loop); + for (loop++; loop < npages; loop++) + __free_page(pages + loop); + return ret; +} + +/*****************************************************************************/ +/* + * check that file shrinkage doesn't leave any VMAs dangling in midair + */ +static int ramfs_nommu_check_mappings(struct inode *inode, + size_t newsize, size_t size) +{ + struct vm_area_struct *vma; + struct prio_tree_iter iter; + + /* search for VMAs that fall within the dead zone */ + vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, + newsize >> PAGE_SHIFT, + (size + PAGE_SIZE - 1) >> PAGE_SHIFT + ) { + /* found one - only interested if it's shared out of the page + * cache */ + if (vma->vm_flags & VM_SHARED) + return -ETXTBSY; /* not quite true, but near enough */ + } + + return 0; +} + +/*****************************************************************************/ +/* + * + */ +static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) +{ + int ret; + + /* assume a truncate from zero size is going to be for the purposes of + * shared mmap */ + if (size == 0) { + if (unlikely(newsize >> 32)) + return -EFBIG; + + return ramfs_nommu_expand_for_mapping(inode, newsize); + } + + /* check that a decrease in size doesn't cut off any shared mappings */ + if (newsize < size) { + ret = ramfs_nommu_check_mappings(inode, newsize, size); + if (ret < 0) + return ret; + } + + ret = vmtruncate(inode, size); + + return ret; +} + +/*****************************************************************************/ +/* + * handle a change of attributes + * - we're specifically interested in a change of size + */ +static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) +{ + struct inode *inode = dentry->d_inode; + unsigned int old_ia_valid = ia->ia_valid; + int ret = 0; + + /* by providing our own setattr() method, we skip this quotaism */ + if ((old_ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) || + (old_ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid)) + ret = DQUOT_TRANSFER(inode, ia) ? -EDQUOT : 0; + + /* pick out size-changing events */ + if (ia->ia_valid & ATTR_SIZE) { + loff_t size = i_size_read(inode); + if (ia->ia_size != size) { + ret = ramfs_nommu_resize(inode, ia->ia_size, size); + if (ret < 0 || ia->ia_valid == ATTR_SIZE) + goto out; + } else { + /* we skipped the truncate but must still update + * timestamps + */ + ia->ia_valid |= ATTR_MTIME|ATTR_CTIME; + } + } + + ret = inode_setattr(inode, ia); + out: + ia->ia_valid = old_ia_valid; + return ret; +} + +/*****************************************************************************/ +/* + * try to determine where a shared mapping can be made + * - we require that: + * - the pages to be mapped must exist + * - the pages be physically contiguous in sequence + */ +unsigned long ramfs_nommu_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + unsigned long maxpages, lpages, nr, loop, ret; + struct inode *inode = file->f_dentry->d_inode; + struct page **pages = NULL, **ptr, *page; + loff_t isize; + + if (!(flags & MAP_SHARED)) + return addr; + + /* the mapping mustn't extend beyond the EOF */ + lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + isize = i_size_read(inode); + + ret = -EINVAL; + maxpages = (isize + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (pgoff >= maxpages) + goto out; + + if (maxpages - pgoff < lpages) + goto out; + + /* gang-find the pages */ + ret = -ENOMEM; + pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto out; + + nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages); + if (nr != lpages) + goto out; /* leave if some pages were missing */ + + /* check the pages for physical adjacency */ + ptr = pages; + page = *ptr++; + page++; + for (loop = lpages; loop > 1; loop--) + if (*ptr++ != page++) + goto out; + + /* okay - all conditions fulfilled */ + ret = (unsigned long) page_address(pages[0]); + + out: + if (pages) { + ptr = pages; + for (loop = lpages; loop > 0; loop--) + put_page(*ptr++); + kfree(pages); + } + + return ret; +} + +/*****************************************************************************/ +/* + * set up a mapping + */ +int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) +{ + return 0; +} diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 0a88917605ae..c66bd5e4c05c 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -34,13 +34,12 @@ #include <linux/ramfs.h> #include <asm/uaccess.h> +#include "internal.h" /* some random number */ #define RAMFS_MAGIC 0x858458f6 static struct super_operations ramfs_ops; -static struct address_space_operations ramfs_aops; -static struct inode_operations ramfs_file_inode_operations; static struct inode_operations ramfs_dir_inode_operations; static struct backing_dev_info ramfs_backing_dev_info = { @@ -142,25 +141,6 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * return error; } -static struct address_space_operations ramfs_aops = { - .readpage = simple_readpage, - .prepare_write = simple_prepare_write, - .commit_write = simple_commit_write -}; - -struct file_operations ramfs_file_operations = { - .read = generic_file_read, - .write = generic_file_write, - .mmap = generic_file_mmap, - .fsync = simple_sync_file, - .sendfile = generic_file_sendfile, - .llseek = generic_file_llseek, -}; - -static struct inode_operations ramfs_file_inode_operations = { - .getattr = simple_getattr, -}; - static struct inode_operations ramfs_dir_inode_operations = { .create = ramfs_create, .lookup = simple_lookup, diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h new file mode 100644 index 000000000000..272c8a7120b0 --- /dev/null +++ b/fs/ramfs/internal.h @@ -0,0 +1,15 @@ +/* internal.h: ramfs internal definitions + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +extern struct address_space_operations ramfs_aops; +extern struct file_operations ramfs_file_operations; +extern struct inode_operations ramfs_file_inode_operations; diff --git a/fs/read_write.c b/fs/read_write.c index df3468a22fea..3f7a1a62165f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -33,7 +33,7 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) long long retval; struct inode *inode = file->f_mapping->host; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); switch (origin) { case 2: offset += inode->i_size; @@ -49,7 +49,7 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) } retval = offset; } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return retval; } diff --git a/fs/readdir.c b/fs/readdir.c index b03579bc0210..b6109329b607 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -30,13 +30,13 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf) if (res) goto out; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); res = -ENOENT; if (!IS_DEADDIR(inode)) { res = file->f_op->readdir(file, buf, filler); file_accessed(file); } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out: return res; } diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 7892a865b58a..ad6fa964b0e7 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -49,7 +49,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) } reiserfs_write_lock(inode->i_sb); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); /* freeing preallocation only involves relogging blocks that * are already in the current transaction. preallocation gets * freed at the end of each transaction, so it is impossible for @@ -100,7 +100,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) err = reiserfs_truncate_file(inode, 0); } out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); reiserfs_write_unlock(inode->i_sb); return err; } @@ -1342,7 +1342,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t if (unlikely(!access_ok(VERIFY_READ, buf, count))) return -EFAULT; - down(&inode->i_sem); // locks the entire file for just us + mutex_lock(&inode->i_mutex); // locks the entire file for just us pos = *ppos; @@ -1360,7 +1360,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t if (res) goto out; - inode_update_time(inode, 1); /* Both mtime and ctime */ + file_update_time(file); // Ok, we are done with all the checks. @@ -1532,12 +1532,12 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t generic_osync_inode(inode, file->f_mapping, OSYNC_METADATA | OSYNC_DATA); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); reiserfs_async_progress_wait(inode->i_sb); return (already_written != 0) ? already_written : res; out: - up(&inode->i_sem); // unlock the file on exit. + mutex_unlock(&inode->i_mutex); // unlock the file on exit. return res; } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a5e3a0ddbe53..ffa34b861bdb 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -40,12 +40,12 @@ void reiserfs_delete_inode(struct inode *inode) /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); reiserfs_delete_xattrs(inode); if (journal_begin(&th, inode->i_sb, jbegin_count)) { - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); goto out; } reiserfs_update_inode_transaction(inode); @@ -59,11 +59,11 @@ void reiserfs_delete_inode(struct inode *inode) DQUOT_FREE_INODE(inode); if (journal_end(&th, inode->i_sb, jbegin_count)) { - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); goto out; } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); /* check return value from reiserfs_delete_object after * ending the transaction @@ -551,7 +551,7 @@ static int convert_tail_for_hole(struct inode *inode, /* we don't have to make sure the conversion did not happen while ** we were locking the page because anyone that could convert - ** must first take i_sem. + ** must first take i_mutex. ** ** We must fix the tail page for writing because it might have buffers ** that are mapped, but have a block number of 0. This indicates tail @@ -586,7 +586,7 @@ static inline int _allocate_block(struct reiserfs_transaction_handle *th, BUG_ON(!th->t_trans_id); #ifdef REISERFS_PREALLOCATE - if (!(flags & GET_BLOCK_NO_ISEM)) { + if (!(flags & GET_BLOCK_NO_IMUX)) { return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block); } @@ -2318,7 +2318,7 @@ static int map_block_for_writepage(struct inode *inode, /* this is where we fill in holes in the file. */ if (use_get_block) { retval = reiserfs_get_block(inode, block, bh_result, - GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM + GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX | GET_BLOCK_NO_DANGLE); if (!retval) { if (!buffer_mapped(bh_result) diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 81fc00285f60..ba8bf8df6dc7 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -120,7 +120,7 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp) /* we need to make sure nobody is changing the file size beneath ** us */ - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); write_from = inode->i_size & (blocksize - 1); /* if we are on a block boundary, we are already unpacked. */ @@ -156,7 +156,7 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp) page_cache_release(page); out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); reiserfs_write_unlock(inode->i_sb); return retval; } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 42afb5bef111..397d9590c8f2 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2211,7 +2211,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct buffer_head tmp_bh, *bh; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; @@ -2250,7 +2250,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return len - towrite; } diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c index c92e124f628e..196e971c03c9 100644 --- a/fs/reiserfs/tail_conversion.c +++ b/fs/reiserfs/tail_conversion.c @@ -205,7 +205,7 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in 1) * p_s_sb->s_blocksize; pos1 = pos; - // we are protected by i_sem. The tail can not disapper, not + // we are protected by i_mutex. The tail can not disapper, not // append can be done either // we are in truncate or packing tail in file_release diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 02091eaac0b4..6f99e01f94ab 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -67,11 +67,11 @@ static struct dentry *create_xa_root(struct super_block *sb) goto out; } else if (!xaroot->d_inode) { int err; - down(&privroot->d_inode->i_sem); + mutex_lock(&privroot->d_inode->i_mutex); err = privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot, 0700); - up(&privroot->d_inode->i_sem); + mutex_unlock(&privroot->d_inode->i_mutex); if (err) { dput(xaroot); @@ -219,7 +219,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode, } else if (flags & XATTR_REPLACE || flags & FL_READONLY) { goto out; } else { - /* inode->i_sem is down, so nothing else can try to create + /* inode->i_mutex is down, so nothing else can try to create * the same xattr */ err = xadir->d_inode->i_op->create(xadir->d_inode, xafile, 0700 | S_IFREG, NULL); @@ -268,7 +268,7 @@ static struct file *open_xa_file(const struct inode *inode, const char *name, * and don't mess with f->f_pos, but the idea is the same. Do some * action on each and every entry in the directory. * - * we're called with i_sem held, so there are no worries about the directory + * we're called with i_mutex held, so there are no worries about the directory * changing underneath us. */ static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir) @@ -426,7 +426,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf) int res = -ENOTDIR; if (!file->f_op || !file->f_op->readdir) goto out; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); // down(&inode->i_zombie); res = -ENOENT; if (!IS_DEADDIR(inode)) { @@ -435,7 +435,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf) unlock_kernel(); } // up(&inode->i_zombie); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out: return res; } @@ -480,7 +480,7 @@ static inline __u32 xattr_hash(const char *msg, int len) /* Generic extended attribute operations that can be used by xa plugins */ /* - * inode->i_sem: down + * inode->i_mutex: down */ int reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, @@ -497,12 +497,6 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, struct iattr newattrs; __u32 xahash = 0; - if (IS_RDONLY(inode)) - return -EROFS; - - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - if (get_inode_sd_version(inode) == STAT_DATA_V1) return -EOPNOTSUPP; @@ -535,7 +529,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, /* Resize it so we're ok to write there */ newattrs.ia_size = buffer_size; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - down(&xinode->i_sem); + mutex_lock(&xinode->i_mutex); err = notify_change(fp->f_dentry, &newattrs); if (err) goto out_filp; @@ -598,7 +592,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, } out_filp: - up(&xinode->i_sem); + mutex_unlock(&xinode->i_mutex); fput(fp); out: @@ -606,7 +600,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, } /* - * inode->i_sem: down + * inode->i_mutex: down */ int reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer, @@ -758,9 +752,6 @@ int reiserfs_xattr_del(struct inode *inode, const char *name) struct dentry *dir; int err; - if (IS_RDONLY(inode)) - return -EROFS; - dir = open_xa_dir(inode, FL_READONLY); if (IS_ERR(dir)) { err = PTR_ERR(dir); @@ -793,7 +784,7 @@ reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen, } -/* This is called w/ inode->i_sem downed */ +/* This is called w/ inode->i_mutex downed */ int reiserfs_delete_xattrs(struct inode *inode) { struct file *fp; @@ -946,7 +937,7 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) /* * Inode operation getxattr() - * Preliminary locking: we down dentry->d_inode->i_sem + * Preliminary locking: we down dentry->d_inode->i_mutex */ ssize_t reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, @@ -970,7 +961,7 @@ reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, /* * Inode operation setxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_mutex down */ int reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, @@ -984,12 +975,6 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) return -EOPNOTSUPP; - if (IS_RDONLY(dentry->d_inode)) - return -EROFS; - - if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) - return -EROFS; - reiserfs_write_lock_xattr_i(dentry->d_inode); lock = !has_xattr_dir(dentry->d_inode); if (lock) @@ -1008,7 +993,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, /* * Inode operation removexattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_mutex down */ int reiserfs_removexattr(struct dentry *dentry, const char *name) { @@ -1019,12 +1004,6 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name) get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) return -EOPNOTSUPP; - if (IS_RDONLY(dentry->d_inode)) - return -EROFS; - - if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) - return -EPERM; - reiserfs_write_lock_xattr_i(dentry->d_inode); reiserfs_read_lock_xattrs(dentry->d_sb); @@ -1091,7 +1070,7 @@ reiserfs_listxattr_filler(void *buf, const char *name, int namelen, /* * Inode operation listxattr() * - * Preliminary locking: we down dentry->d_inode->i_sem + * Preliminary locking: we down dentry->d_inode->i_mutex */ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) { @@ -1289,9 +1268,9 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) if (!IS_ERR(dentry)) { if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) { struct inode *inode = dentry->d_parent->d_inode; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); err = inode->i_op->mkdir(inode, dentry, 0700); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (err) { dput(dentry); dentry = NULL; diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index a47ac9aac8b2..2dc953504cc0 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -174,7 +174,7 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size) /* * Inode operation get_posix_acl(). * - * inode->i_sem: down + * inode->i_mutex: down * BKL held [before 2.5.x] */ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) @@ -237,7 +237,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) /* * Inode operation set_posix_acl(). * - * inode->i_sem: down + * inode->i_mutex: down * BKL held [before 2.5.x] */ static int @@ -312,7 +312,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) return error; } -/* dir->i_sem: down, +/* dir->i_mutex: locked, * inode is new and not released into the wild yet */ int reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry, diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index 51458048ca66..073f39364b11 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -16,18 +16,10 @@ static int user_get(struct inode *inode, const char *name, void *buffer, size_t size) { - int error; - if (strlen(name) < sizeof(XATTR_USER_PREFIX)) return -EINVAL; - if (!reiserfs_xattrs_user(inode->i_sb)) return -EOPNOTSUPP; - - error = reiserfs_permission_locked(inode, MAY_READ, NULL); - if (error) - return error; - return reiserfs_xattr_get(inode, name, buffer, size); } @@ -36,43 +28,21 @@ user_set(struct inode *inode, const char *name, const void *buffer, size_t size, int flags) { - int error; - if (strlen(name) < sizeof(XATTR_USER_PREFIX)) return -EINVAL; if (!reiserfs_xattrs_user(inode->i_sb)) return -EOPNOTSUPP; - - if (!S_ISREG(inode->i_mode) && - (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) - return -EPERM; - - error = reiserfs_permission_locked(inode, MAY_WRITE, NULL); - if (error) - return error; - return reiserfs_xattr_set(inode, name, buffer, size, flags); } static int user_del(struct inode *inode, const char *name) { - int error; - if (strlen(name) < sizeof(XATTR_USER_PREFIX)) return -EINVAL; if (!reiserfs_xattrs_user(inode->i_sb)) return -EOPNOTSUPP; - - if (!S_ISREG(inode->i_mode) && - (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) - return -EPERM; - - error = reiserfs_permission_locked(inode, MAY_WRITE, NULL); - if (error) - return error; - return 0; } diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c index 84e21ffa5ca8..10187812771e 100644 --- a/fs/relayfs/buffers.c +++ b/fs/relayfs/buffers.c @@ -185,5 +185,6 @@ void relay_destroy_buf(struct rchan_buf *buf) void relay_remove_buf(struct kref *kref) { struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); - relayfs_remove(buf->dentry); + buf->chan->cb->remove_buf_file(buf->dentry); + relay_destroy_buf(buf); } diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c index 0f7f88d067ad..383523011aad 100644 --- a/fs/relayfs/inode.c +++ b/fs/relayfs/inode.c @@ -26,31 +26,22 @@ static struct vfsmount * relayfs_mount; static int relayfs_mount_count; -static kmem_cache_t * relayfs_inode_cachep; static struct backing_dev_info relayfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, }; -static struct inode *relayfs_get_inode(struct super_block *sb, int mode, - struct rchan *chan) +static struct inode *relayfs_get_inode(struct super_block *sb, + int mode, + struct file_operations *fops, + void *data) { - struct rchan_buf *buf = NULL; struct inode *inode; - if (S_ISREG(mode)) { - BUG_ON(!chan); - buf = relay_create_buf(chan); - if (!buf) - return NULL; - } - inode = new_inode(sb); - if (!inode) { - relay_destroy_buf(buf); + if (!inode) return NULL; - } inode->i_mode = mode; inode->i_uid = 0; @@ -61,8 +52,9 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode, inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { case S_IFREG: - inode->i_fop = &relayfs_file_operations; - RELAYFS_I(inode)->buf = buf; + inode->i_fop = fops; + if (data) + inode->u.generic_ip = data; break; case S_IFDIR: inode->i_op = &simple_dir_inode_operations; @@ -83,7 +75,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode, * @name: the name of the file to create * @parent: parent directory * @mode: mode - * @chan: relay channel associated with the file + * @fops: file operations to use for the file + * @data: user-associated data for this file * * Returns the new dentry, NULL on failure * @@ -92,7 +85,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode, static struct dentry *relayfs_create_entry(const char *name, struct dentry *parent, int mode, - struct rchan *chan) + struct file_operations *fops, + void *data) { struct dentry *d; struct inode *inode; @@ -115,7 +109,7 @@ static struct dentry *relayfs_create_entry(const char *name, } parent = dget(parent); - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); d = lookup_one_len(name, parent, strlen(name)); if (IS_ERR(d)) { d = NULL; @@ -127,7 +121,7 @@ static struct dentry *relayfs_create_entry(const char *name, goto release_mount; } - inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan); + inode = relayfs_get_inode(parent->d_inode->i_sb, mode, fops, data); if (!inode) { d = NULL; goto release_mount; @@ -145,7 +139,7 @@ release_mount: simple_release_fs(&relayfs_mount, &relayfs_mount_count); exit: - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); dput(parent); return d; } @@ -155,20 +149,26 @@ exit: * @name: the name of the file to create * @parent: parent directory * @mode: mode, if not specied the default perms are used - * @chan: channel associated with the file + * @fops: file operations to use for the file + * @data: user-associated data for this file * * Returns file dentry if successful, NULL otherwise. * * The file will be created user r on behalf of current user. */ -struct dentry *relayfs_create_file(const char *name, struct dentry *parent, - int mode, struct rchan *chan) +struct dentry *relayfs_create_file(const char *name, + struct dentry *parent, + int mode, + struct file_operations *fops, + void *data) { + BUG_ON(!fops); + if (!mode) mode = S_IRUSR; mode = (mode & S_IALLUGO) | S_IFREG; - return relayfs_create_entry(name, parent, mode, chan); + return relayfs_create_entry(name, parent, mode, fops, data); } /** @@ -183,7 +183,7 @@ struct dentry *relayfs_create_file(const char *name, struct dentry *parent, struct dentry *relayfs_create_dir(const char *name, struct dentry *parent) { int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; - return relayfs_create_entry(name, parent, mode, NULL); + return relayfs_create_entry(name, parent, mode, NULL, NULL); } /** @@ -204,7 +204,7 @@ int relayfs_remove(struct dentry *dentry) return -EINVAL; parent = dget(parent); - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); if (dentry->d_inode) { if (S_ISDIR(dentry->d_inode->i_mode)) error = simple_rmdir(parent->d_inode, dentry); @@ -215,7 +215,7 @@ int relayfs_remove(struct dentry *dentry) } if (!error) dput(dentry); - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); dput(parent); if (!error) @@ -225,6 +225,17 @@ int relayfs_remove(struct dentry *dentry) } /** + * relayfs_remove_file - remove a file from relay filesystem + * @dentry: directory dentry + * + * Returns 0 if successful, negative otherwise. + */ +int relayfs_remove_file(struct dentry *dentry) +{ + return relayfs_remove(dentry); +} + +/** * relayfs_remove_dir - remove a directory in the relay filesystem * @dentry: directory dentry * @@ -236,45 +247,45 @@ int relayfs_remove_dir(struct dentry *dentry) } /** - * relayfs_open - open file op for relayfs files + * relay_file_open - open file op for relay files * @inode: the inode * @filp: the file * * Increments the channel buffer refcount. */ -static int relayfs_open(struct inode *inode, struct file *filp) +static int relay_file_open(struct inode *inode, struct file *filp) { - struct rchan_buf *buf = RELAYFS_I(inode)->buf; + struct rchan_buf *buf = inode->u.generic_ip; kref_get(&buf->kref); + filp->private_data = buf; return 0; } /** - * relayfs_mmap - mmap file op for relayfs files + * relay_file_mmap - mmap file op for relay files * @filp: the file * @vma: the vma describing what to map * * Calls upon relay_mmap_buf to map the file into user space. */ -static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma) +static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma) { - struct inode *inode = filp->f_dentry->d_inode; - return relay_mmap_buf(RELAYFS_I(inode)->buf, vma); + struct rchan_buf *buf = filp->private_data; + return relay_mmap_buf(buf, vma); } /** - * relayfs_poll - poll file op for relayfs files + * relay_file_poll - poll file op for relay files * @filp: the file * @wait: poll table * * Poll implemention. */ -static unsigned int relayfs_poll(struct file *filp, poll_table *wait) +static unsigned int relay_file_poll(struct file *filp, poll_table *wait) { unsigned int mask = 0; - struct inode *inode = filp->f_dentry->d_inode; - struct rchan_buf *buf = RELAYFS_I(inode)->buf; + struct rchan_buf *buf = filp->private_data; if (buf->finalized) return POLLERR; @@ -289,27 +300,27 @@ static unsigned int relayfs_poll(struct file *filp, poll_table *wait) } /** - * relayfs_release - release file op for relayfs files + * relay_file_release - release file op for relay files * @inode: the inode * @filp: the file * * Decrements the channel refcount, as the filesystem is * no longer using it. */ -static int relayfs_release(struct inode *inode, struct file *filp) +static int relay_file_release(struct inode *inode, struct file *filp) { - struct rchan_buf *buf = RELAYFS_I(inode)->buf; + struct rchan_buf *buf = filp->private_data; kref_put(&buf->kref, relay_remove_buf); return 0; } /** - * relayfs_read_consume - update the consumed count for the buffer + * relay_file_read_consume - update the consumed count for the buffer */ -static void relayfs_read_consume(struct rchan_buf *buf, - size_t read_pos, - size_t bytes_consumed) +static void relay_file_read_consume(struct rchan_buf *buf, + size_t read_pos, + size_t bytes_consumed) { size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; @@ -332,9 +343,9 @@ static void relayfs_read_consume(struct rchan_buf *buf, } /** - * relayfs_read_avail - boolean, are there unconsumed bytes available? + * relay_file_read_avail - boolean, are there unconsumed bytes available? */ -static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos) +static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos) { size_t bytes_produced, bytes_consumed, write_offset; size_t subbuf_size = buf->chan->subbuf_size; @@ -365,16 +376,16 @@ static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos) if (bytes_produced == bytes_consumed) return 0; - relayfs_read_consume(buf, read_pos, 0); + relay_file_read_consume(buf, read_pos, 0); return 1; } /** - * relayfs_read_subbuf_avail - return bytes available in sub-buffer + * relay_file_read_subbuf_avail - return bytes available in sub-buffer */ -static size_t relayfs_read_subbuf_avail(size_t read_pos, - struct rchan_buf *buf) +static size_t relay_file_read_subbuf_avail(size_t read_pos, + struct rchan_buf *buf) { size_t padding, avail = 0; size_t read_subbuf, read_offset, write_subbuf, write_offset; @@ -396,14 +407,14 @@ static size_t relayfs_read_subbuf_avail(size_t read_pos, } /** - * relayfs_read_start_pos - find the first available byte to read + * relay_file_read_start_pos - find the first available byte to read * * If the read_pos is in the middle of padding, return the * position of the first actually available byte, otherwise * return the original value. */ -static size_t relayfs_read_start_pos(size_t read_pos, - struct rchan_buf *buf) +static size_t relay_file_read_start_pos(size_t read_pos, + struct rchan_buf *buf) { size_t read_subbuf, padding, padding_start, padding_end; size_t subbuf_size = buf->chan->subbuf_size; @@ -422,11 +433,11 @@ static size_t relayfs_read_start_pos(size_t read_pos, } /** - * relayfs_read_end_pos - return the new read position + * relay_file_read_end_pos - return the new read position */ -static size_t relayfs_read_end_pos(struct rchan_buf *buf, - size_t read_pos, - size_t count) +static size_t relay_file_read_end_pos(struct rchan_buf *buf, + size_t read_pos, + size_t count) { size_t read_subbuf, padding, end_pos; size_t subbuf_size = buf->chan->subbuf_size; @@ -445,7 +456,7 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf, } /** - * relayfs_read - read file op for relayfs files + * relay_file_read - read file op for relay files * @filp: the file * @buffer: the userspace buffer * @count: number of bytes to read @@ -454,23 +465,23 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf, * Reads count bytes or the number of bytes available in the * current sub-buffer being read, whichever is smaller. */ -static ssize_t relayfs_read(struct file *filp, - char __user *buffer, - size_t count, - loff_t *ppos) +static ssize_t relay_file_read(struct file *filp, + char __user *buffer, + size_t count, + loff_t *ppos) { + struct rchan_buf *buf = filp->private_data; struct inode *inode = filp->f_dentry->d_inode; - struct rchan_buf *buf = RELAYFS_I(inode)->buf; size_t read_start, avail; ssize_t ret = 0; void *from; - down(&inode->i_sem); - if(!relayfs_read_avail(buf, *ppos)) + mutex_lock(&inode->i_mutex); + if(!relay_file_read_avail(buf, *ppos)) goto out; - read_start = relayfs_read_start_pos(*ppos, buf); - avail = relayfs_read_subbuf_avail(read_start, buf); + read_start = relay_file_read_start_pos(*ppos, buf); + avail = relay_file_read_subbuf_avail(read_start, buf); if (!avail) goto out; @@ -480,58 +491,25 @@ static ssize_t relayfs_read(struct file *filp, ret = -EFAULT; goto out; } - relayfs_read_consume(buf, read_start, count); - *ppos = relayfs_read_end_pos(buf, read_start, count); + relay_file_read_consume(buf, read_start, count); + *ppos = relay_file_read_end_pos(buf, read_start, count); out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return ret; } -/** - * relayfs alloc_inode() implementation - */ -static struct inode *relayfs_alloc_inode(struct super_block *sb) -{ - struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL); - if (!p) - return NULL; - p->buf = NULL; - - return &p->vfs_inode; -} - -/** - * relayfs destroy_inode() implementation - */ -static void relayfs_destroy_inode(struct inode *inode) -{ - if (RELAYFS_I(inode)->buf) - relay_destroy_buf(RELAYFS_I(inode)->buf); - - kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode)); -} - -static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags) -{ - struct relayfs_inode_info *i = p; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&i->vfs_inode); -} - -struct file_operations relayfs_file_operations = { - .open = relayfs_open, - .poll = relayfs_poll, - .mmap = relayfs_mmap, - .read = relayfs_read, +struct file_operations relay_file_operations = { + .open = relay_file_open, + .poll = relay_file_poll, + .mmap = relay_file_mmap, + .read = relay_file_read, .llseek = no_llseek, - .release = relayfs_release, + .release = relay_file_release, }; static struct super_operations relayfs_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, - .alloc_inode = relayfs_alloc_inode, - .destroy_inode = relayfs_destroy_inode, }; static int relayfs_fill_super(struct super_block * sb, void * data, int silent) @@ -544,7 +522,7 @@ static int relayfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = RELAYFS_MAGIC; sb->s_op = &relayfs_ops; - inode = relayfs_get_inode(sb, mode, NULL); + inode = relayfs_get_inode(sb, mode, NULL, NULL); if (!inode) return -ENOMEM; @@ -575,33 +553,27 @@ static struct file_system_type relayfs_fs_type = { static int __init init_relayfs_fs(void) { - int err; - - relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache", - sizeof(struct relayfs_inode_info), 0, - 0, init_once, NULL); - if (!relayfs_inode_cachep) - return -ENOMEM; - - err = register_filesystem(&relayfs_fs_type); - if (err) - kmem_cache_destroy(relayfs_inode_cachep); - - return err; + return register_filesystem(&relayfs_fs_type); } static void __exit exit_relayfs_fs(void) { + + + + + unregister_filesystem(&relayfs_fs_type); - kmem_cache_destroy(relayfs_inode_cachep); } module_init(init_relayfs_fs) module_exit(exit_relayfs_fs) -EXPORT_SYMBOL_GPL(relayfs_file_operations); +EXPORT_SYMBOL_GPL(relay_file_operations); EXPORT_SYMBOL_GPL(relayfs_create_dir); EXPORT_SYMBOL_GPL(relayfs_remove_dir); +EXPORT_SYMBOL_GPL(relayfs_create_file); +EXPORT_SYMBOL_GPL(relayfs_remove_file); MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>"); MODULE_DESCRIPTION("Relay Filesystem"); diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c index 2a6f7f12b7f9..abf3ceaace49 100644 --- a/fs/relayfs/relay.c +++ b/fs/relayfs/relay.c @@ -80,11 +80,34 @@ static void buf_unmapped_default_callback(struct rchan_buf *buf, { } +/* + * create_buf_file_create() default callback. Creates file to represent buf. + */ +static struct dentry *create_buf_file_default_callback(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global) +{ + return relayfs_create_file(filename, parent, mode, + &relay_file_operations, buf); +} + +/* + * remove_buf_file() default callback. Removes file representing relay buffer. + */ +static int remove_buf_file_default_callback(struct dentry *dentry) +{ + return relayfs_remove(dentry); +} + /* relay channel default callbacks */ static struct rchan_callbacks default_channel_callbacks = { .subbuf_start = subbuf_start_default_callback, .buf_mapped = buf_mapped_default_callback, .buf_unmapped = buf_unmapped_default_callback, + .create_buf_file = create_buf_file_default_callback, + .remove_buf_file = remove_buf_file_default_callback, }; /** @@ -148,14 +171,16 @@ static inline void __relay_reset(struct rchan_buf *buf, unsigned int init) void relay_reset(struct rchan *chan) { unsigned int i; + struct rchan_buf *prev = NULL; if (!chan) return; for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i]) - continue; + if (!chan->buf[i] || chan->buf[i] == prev) + break; __relay_reset(chan->buf[i], 0); + prev = chan->buf[i]; } } @@ -166,17 +191,27 @@ void relay_reset(struct rchan *chan) */ static struct rchan_buf *relay_open_buf(struct rchan *chan, const char *filename, - struct dentry *parent) + struct dentry *parent, + int *is_global) { struct rchan_buf *buf; struct dentry *dentry; + if (*is_global) + return chan->buf[0]; + + buf = relay_create_buf(chan); + if (!buf) + return NULL; + /* Create file in fs */ - dentry = relayfs_create_file(filename, parent, S_IRUSR, chan); - if (!dentry) + dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR, + buf, is_global); + if (!dentry) { + relay_destroy_buf(buf); return NULL; + } - buf = RELAYFS_I(dentry->d_inode)->buf; buf->dentry = dentry; __relay_reset(buf, 1); @@ -214,6 +249,10 @@ static inline void setup_callbacks(struct rchan *chan, cb->buf_mapped = buf_mapped_default_callback; if (!cb->buf_unmapped) cb->buf_unmapped = buf_unmapped_default_callback; + if (!cb->create_buf_file) + cb->create_buf_file = create_buf_file_default_callback; + if (!cb->remove_buf_file) + cb->remove_buf_file = remove_buf_file_default_callback; chan->cb = cb; } @@ -241,6 +280,7 @@ struct rchan *relay_open(const char *base_filename, unsigned int i; struct rchan *chan; char *tmpname; + int is_global = 0; if (!base_filename) return NULL; @@ -265,7 +305,8 @@ struct rchan *relay_open(const char *base_filename, for_each_online_cpu(i) { sprintf(tmpname, "%s%d", base_filename, i); - chan->buf[i] = relay_open_buf(chan, tmpname, parent); + chan->buf[i] = relay_open_buf(chan, tmpname, parent, + &is_global); chan->buf[i]->cpu = i; if (!chan->buf[i]) goto free_bufs; @@ -279,6 +320,8 @@ free_bufs: if (!chan->buf[i]) break; relay_close_buf(chan->buf[i]); + if (is_global) + break; } kfree(tmpname); @@ -388,14 +431,16 @@ void relay_destroy_channel(struct kref *kref) void relay_close(struct rchan *chan) { unsigned int i; + struct rchan_buf *prev = NULL; if (!chan) return; for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i]) - continue; + if (!chan->buf[i] || chan->buf[i] == prev) + break; relay_close_buf(chan->buf[i]); + prev = chan->buf[i]; } if (chan->last_toobig) @@ -415,14 +460,16 @@ void relay_close(struct rchan *chan) void relay_flush(struct rchan *chan) { unsigned int i; + struct rchan_buf *prev = NULL; if (!chan) return; for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i]) - continue; + if (!chan->buf[i] || chan->buf[i] == prev) + break; relay_switch_subbuf(chan->buf[i], 0); + prev = chan->buf[i]; } } diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h index 703503fa22b6..0993d3e5753b 100644 --- a/fs/relayfs/relay.h +++ b/fs/relayfs/relay.h @@ -1,10 +1,6 @@ #ifndef _RELAY_H #define _RELAY_H -struct dentry *relayfs_create_file(const char *name, - struct dentry *parent, - int mode, - struct rchan *chan); extern int relayfs_remove(struct dentry *dentry); extern int relay_buf_empty(struct rchan_buf *buf); extern void relay_destroy_channel(struct kref *kref); diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index c74f382dabba..0a13859fd57b 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -418,7 +418,7 @@ static int romfs_readpage(struct file *file, struct page * page) { struct inode *inode = page->mapping->host; - unsigned long offset, avail, readlen; + loff_t offset, avail, readlen; void *buf; int result = -EIO; @@ -429,8 +429,8 @@ romfs_readpage(struct file *file, struct page * page) goto err_out; /* 32 bit warning -- but not for us :) */ - offset = page->index << PAGE_CACHE_SHIFT; - if (offset < inode->i_size) { + offset = page_offset(page); + if (offset < i_size_read(inode)) { avail = inode->i_size-offset; readlen = min_t(unsigned long, avail, PAGE_SIZE); if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) { diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c index f3e6b81288ab..74b86d9725a6 100644 --- a/fs/smbfs/cache.c +++ b/fs/smbfs/cache.c @@ -66,7 +66,7 @@ smb_invalidate_dircache_entries(struct dentry *parent) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_child); + dentry = list_entry(next, struct dentry, d_u.d_child); dentry->d_fsdata = NULL; smb_age_dentry(server, dentry); next = next->next; @@ -100,7 +100,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dent = list_entry(next, struct dentry, d_child); + dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget_locked(dent); diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index b4fcfa8b55a1..7042e62726a4 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -209,8 +209,8 @@ smb_updatepage(struct file *file, struct page *page, unsigned long offset, { struct dentry *dentry = file->f_dentry; - DEBUG1("(%s/%s %d@%ld)\n", DENTRY_PATH(dentry), - count, (page->index << PAGE_CACHE_SHIFT)+offset); + DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count, + ((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset); return smb_writepage_sync(dentry->d_inode, page, offset, count); } @@ -374,8 +374,7 @@ smb_file_release(struct inode *inode, struct file * file) /* We must flush any dirty pages now as we won't be able to write anything after close. mmap can trigger this. "openers" should perhaps include mmap'ers ... */ - filemap_fdatawrite(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); smb_close(inode); } unlock_kernel(); diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 10b994428fef..6ec88bf59b2d 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -697,8 +697,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr) DENTRY_PATH(dentry), (long) inode->i_size, (long) attr->ia_size); - filemap_fdatawrite(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); error = smb_open(dentry, O_WRONLY); if (error) diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index 38ab558835c4..d6baec0f24ad 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -3113,7 +3113,7 @@ smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, LSET(data, 32, SMB_TIME_NO_CHANGE); LSET(data, 40, SMB_UID_NO_CHANGE); LSET(data, 48, SMB_GID_NO_CHANGE); - LSET(data, 56, smb_filetype_from_mode(attr->ia_mode)); + DSET(data, 56, smb_filetype_from_mode(attr->ia_mode)); LSET(data, 60, major); LSET(data, 68, minor); LSET(data, 76, 0); diff --git a/fs/super.c b/fs/super.c index 5a347a4f673a..c177b92419c5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -72,7 +72,7 @@ static struct super_block *alloc_super(void) INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); init_rwsem(&s->s_umount); - sema_init(&s->s_lock, 1); + mutex_init(&s->s_lock); down_write(&s->s_umount); s->s_count = S_BIAS; atomic_set(&s->s_active, 1); @@ -700,8 +700,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, s->s_flags = flags; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); - s->s_old_blocksize = block_size(bdev); - sb_set_blocksize(s, s->s_old_blocksize); + sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { up_write(&s->s_umount); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index d36780382176..49bd219275db 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -99,7 +99,7 @@ static int create_dir(struct kobject * k, struct dentry * p, int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; - down(&p->d_inode->i_sem); + mutex_lock(&p->d_inode->i_mutex); *d = lookup_one_len(n, p, strlen(n)); if (!IS_ERR(*d)) { error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR); @@ -122,7 +122,7 @@ static int create_dir(struct kobject * k, struct dentry * p, dput(*d); } else error = PTR_ERR(*d); - up(&p->d_inode->i_sem); + mutex_unlock(&p->d_inode->i_mutex); return error; } @@ -246,7 +246,7 @@ static void remove_dir(struct dentry * d) struct dentry * parent = dget(d->d_parent); struct sysfs_dirent * sd; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); d_delete(d); sd = d->d_fsdata; list_del_init(&sd->s_sibling); @@ -257,7 +257,7 @@ static void remove_dir(struct dentry * d) pr_debug(" o %s removing done (%d)\n",d->d_name.name, atomic_read(&d->d_count)); - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); dput(parent); } @@ -286,7 +286,7 @@ void sysfs_remove_dir(struct kobject * kobj) return; pr_debug("sysfs %s: removing dir\n",dentry->d_name.name); - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); parent_sd = dentry->d_fsdata; list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED)) @@ -295,7 +295,7 @@ void sysfs_remove_dir(struct kobject * kobj) sysfs_drop_dentry(sd, dentry); sysfs_put(sd); } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); remove_dir(dentry); /** @@ -318,7 +318,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) down_write(&sysfs_rename_sem); parent = kobj->parent->dentry; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); if (!IS_ERR(new_dentry)) { @@ -334,7 +334,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) error = -EEXIST; dput(new_dentry); } - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); up_write(&sysfs_rename_sem); return error; @@ -345,9 +345,9 @@ static int sysfs_dir_open(struct inode *inode, struct file *file) struct dentry * dentry = file->f_dentry; struct sysfs_dirent * parent_sd = dentry->d_fsdata; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); file->private_data = sysfs_new_dirent(parent_sd, NULL); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return file->private_data ? 0 : -ENOMEM; @@ -358,9 +358,9 @@ static int sysfs_dir_close(struct inode *inode, struct file *file) struct dentry * dentry = file->f_dentry; struct sysfs_dirent * cursor = file->private_data; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); list_del_init(&cursor->s_sibling); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); release_sysfs_dirent(cursor); @@ -436,7 +436,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) { struct dentry * dentry = file->f_dentry; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -444,7 +444,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) if (offset >= 0) break; default: - up(&file->f_dentry->d_inode->i_sem); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { @@ -468,7 +468,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) list_add_tail(&cursor->s_sibling, p); } } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return offset; } @@ -483,4 +483,3 @@ struct file_operations sysfs_dir_operations = { EXPORT_SYMBOL_GPL(sysfs_create_dir); EXPORT_SYMBOL_GPL(sysfs_remove_dir); EXPORT_SYMBOL_GPL(sysfs_rename_dir); - diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 4013d7905e84..d0e3d8495165 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -364,9 +364,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type) umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; int error = 0; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); error = sysfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return error; } @@ -398,7 +398,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) struct dentry * victim; int res = -ENOENT; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); victim = lookup_one_len(attr->name, dir, strlen(attr->name)); if (!IS_ERR(victim)) { /* make sure dentry is really there */ @@ -420,7 +420,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) */ dput(victim); } - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return res; } @@ -441,22 +441,22 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) struct iattr newattrs; int res = -ENOENT; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); victim = lookup_one_len(attr->name, dir, strlen(attr->name)); if (!IS_ERR(victim)) { if (victim->d_inode && (victim->d_parent->d_inode == dir->d_inode)) { inode = victim->d_inode; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; res = notify_change(victim, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } dput(victim); } - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return res; } @@ -480,4 +480,3 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) EXPORT_SYMBOL_GPL(sysfs_create_file); EXPORT_SYMBOL_GPL(sysfs_remove_file); EXPORT_SYMBOL_GPL(sysfs_update_file); - diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 970a33f03299..c3133219941c 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -201,7 +201,7 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) /* * Unhashes the dentry corresponding to given sysfs_dirent - * Called with parent inode's i_sem held. + * Called with parent inode's i_mutex held. */ void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) { @@ -232,7 +232,7 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name) /* no inode means this hasn't been made visible yet */ return; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (!sd->s_element) continue; @@ -243,7 +243,5 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name) break; } } - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); } - - diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index de402fa915f2..e38d6338a20d 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -86,9 +86,9 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char BUG_ON(!kobj || !kobj->dentry || !name); - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); error = sysfs_add_link(dentry, name, target); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return error; } @@ -177,4 +177,3 @@ struct inode_operations sysfs_symlink_inode_operations = { EXPORT_SYMBOL_GPL(sysfs_create_link); EXPORT_SYMBOL_GPL(sysfs_remove_link); - diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 69a085abad6f..cce8b05cba5a 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -103,7 +103,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) offset = (char *)de - kaddr; over = filldir(dirent, name, strnlen(name,SYSV_NAMELEN), - (n<<PAGE_CACHE_SHIFT) | offset, + ((loff_t)n<<PAGE_CACHE_SHIFT) | offset, fs16_to_cpu(SYSV_SB(sb), de->inode), DT_UNKNOWN); if (over) { @@ -115,7 +115,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) } done: - filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; + filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset; unlock_kernel(); return 0; } diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 6598a5037ac8..4fae57d9d115 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -41,7 +41,7 @@ #define uint(x) xuint(x) #define xuint(x) __le ## x -extern inline int find_next_one_bit (void * addr, int size, int offset) +static inline int find_next_one_bit (void * addr, int size, int offset) { uintBPL_t * p = ((uintBPL_t *) addr) + (offset / BITS_PER_LONG); int result = offset & ~(BITS_PER_LONG-1); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 4014f17d382e..395e582ee542 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1957,11 +1957,6 @@ int8_t inode_bmap(struct inode *inode, int block, kernel_lb_addr *bloc, uint32_t printk(KERN_ERR "udf: inode_bmap: block < 0\n"); return -1; } - if (!inode) - { - printk(KERN_ERR "udf: inode_bmap: NULL inode\n"); - return -1; - } *extoffset = 0; *elen = 0; diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index d0915fba155a..7c10c68902ae 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -491,7 +491,7 @@ int ufs_delete_entry (struct inode * inode, struct ufs_dir_entry * dir, UFSD(("ino %u, reclen %u, namlen %u, name %s\n", fs32_to_cpu(sb, de->d_ino), - fs16to_cpu(sb, de->d_reclen), + fs16_to_cpu(sb, de->d_reclen), ufs_get_de_namlen(sb, de), de->d_name)) while (i < bh->b_size) { diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 54828ebcf1ba..e9a42c711a9e 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1275,7 +1275,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct buffer_head *bh; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; @@ -1296,14 +1296,16 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type, blk++; } out: - if (len == towrite) + if (len == towrite) { + mutex_unlock(&inode->i_mutex); return err; + } if (inode->i_size < off+len-towrite) i_size_write(inode, off+len-towrite); inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return len - towrite; } diff --git a/fs/xattr.c b/fs/xattr.c index bcc2156d4d28..80eca7d3d69f 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -19,6 +19,149 @@ #include <linux/fsnotify.h> #include <asm/uaccess.h> + +/* + * Check permissions for extended attribute access. This is a bit complicated + * because different namespaces have very different rules. + */ +static int +xattr_permission(struct inode *inode, const char *name, int mask) +{ + /* + * We can never set or remove an extended attribute on a read-only + * filesystem or on an immutable / append-only inode. + */ + if (mask & MAY_WRITE) { + if (IS_RDONLY(inode)) + return -EROFS; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return -EPERM; + } + + /* + * No restriction for security.* and system.* from the VFS. Decision + * on these is left to the underlying filesystem / security module. + */ + if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) || + !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return 0; + + /* + * The trusted.* namespace can only accessed by a privilegued user. + */ + if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) + return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); + + if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; + } + + return permission(inode, mask, NULL); +} + +int +vfs_setxattr(struct dentry *dentry, char *name, void *value, + size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = xattr_permission(inode, name, MAY_WRITE); + if (error) + return error; + + mutex_lock(&inode->i_mutex); + error = security_inode_setxattr(dentry, name, value, size, flags); + if (error) + goto out; + error = -EOPNOTSUPP; + if (inode->i_op->setxattr) { + error = inode->i_op->setxattr(dentry, name, value, size, flags); + if (!error) { + fsnotify_xattr(dentry); + security_inode_post_setxattr(dentry, name, value, + size, flags); + } + } else if (!strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN)) { + const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; + error = security_inode_setsecurity(inode, suffix, value, + size, flags); + if (!error) + fsnotify_xattr(dentry); + } +out: + mutex_unlock(&inode->i_mutex); + return error; +} +EXPORT_SYMBOL_GPL(vfs_setxattr); + +ssize_t +vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = xattr_permission(inode, name, MAY_READ); + if (error) + return error; + + error = security_inode_getxattr(dentry, name); + if (error) + return error; + + if (inode->i_op->getxattr) + error = inode->i_op->getxattr(dentry, name, value, size); + else + error = -EOPNOTSUPP; + + if (!strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN)) { + const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; + int ret = security_inode_getsecurity(inode, suffix, value, + size, error); + /* + * Only overwrite the return value if a security module + * is actually active. + */ + if (ret != -EOPNOTSUPP) + error = ret; + } + + return error; +} +EXPORT_SYMBOL_GPL(vfs_getxattr); + +int +vfs_removexattr(struct dentry *dentry, char *name) +{ + struct inode *inode = dentry->d_inode; + int error; + + if (!inode->i_op->removexattr) + return -EOPNOTSUPP; + + error = xattr_permission(inode, name, MAY_WRITE); + if (error) + return error; + + error = security_inode_removexattr(dentry, name); + if (error) + return error; + + mutex_lock(&inode->i_mutex); + error = inode->i_op->removexattr(dentry, name); + mutex_unlock(&inode->i_mutex); + + if (!error) + fsnotify_xattr(dentry); + return error; +} +EXPORT_SYMBOL_GPL(vfs_removexattr); + + /* * Extended attribute SET operations */ @@ -51,29 +194,7 @@ setxattr(struct dentry *d, char __user *name, void __user *value, } } - down(&d->d_inode->i_sem); - error = security_inode_setxattr(d, kname, kvalue, size, flags); - if (error) - goto out; - error = -EOPNOTSUPP; - if (d->d_inode->i_op && d->d_inode->i_op->setxattr) { - error = d->d_inode->i_op->setxattr(d, kname, kvalue, - size, flags); - if (!error) { - fsnotify_xattr(d); - security_inode_post_setxattr(d, kname, kvalue, - size, flags); - } - } else if (!strncmp(kname, XATTR_SECURITY_PREFIX, - sizeof XATTR_SECURITY_PREFIX - 1)) { - const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1; - error = security_inode_setsecurity(d->d_inode, suffix, kvalue, - size, flags); - if (!error) - fsnotify_xattr(d); - } -out: - up(&d->d_inode->i_sem); + error = vfs_setxattr(d, kname, kvalue, size, flags); kfree(kvalue); return error; } @@ -147,22 +268,7 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) return -ENOMEM; } - error = security_inode_getxattr(d, kname); - if (error) - goto out; - error = -EOPNOTSUPP; - if (d->d_inode->i_op && d->d_inode->i_op->getxattr) - error = d->d_inode->i_op->getxattr(d, kname, kvalue, size); - - if (!strncmp(kname, XATTR_SECURITY_PREFIX, - sizeof XATTR_SECURITY_PREFIX - 1)) { - const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1; - int rv = security_inode_getsecurity(d->d_inode, suffix, kvalue, - size, error); - /* Security module active: overwrite error value */ - if (rv != -EOPNOTSUPP) - error = rv; - } + error = vfs_getxattr(d, kname, kvalue, size); if (error > 0) { if (size && copy_to_user(value, kvalue, error)) error = -EFAULT; @@ -171,7 +277,6 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) than XATTR_SIZE_MAX bytes. Not possible. */ error = -E2BIG; } -out: kfree(kvalue); return error; } @@ -318,19 +423,7 @@ removexattr(struct dentry *d, char __user *name) if (error < 0) return error; - error = -EOPNOTSUPP; - if (d->d_inode->i_op && d->d_inode->i_op->removexattr) { - error = security_inode_removexattr(d, kname); - if (error) - goto out; - down(&d->d_inode->i_sem); - error = d->d_inode->i_op->removexattr(d, kname); - up(&d->d_inode->i_sem); - if (!error) - fsnotify_xattr(d); - } -out: - return error; + return vfs_removexattr(d, kname); } asmlinkage long diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h index ce773d89a923..d3369b6ca168 100644 --- a/fs/xfs/linux-2.6/mutex.h +++ b/fs/xfs/linux-2.6/mutex.h @@ -19,7 +19,7 @@ #define __XFS_SUPPORT_MUTEX_H__ #include <linux/spinlock.h> -#include <asm/semaphore.h> +#include <linux/mutex.h> /* * Map the mutex'es from IRIX to Linux semaphores. @@ -28,12 +28,8 @@ * callers. */ #define MUTEX_DEFAULT 0x0 -typedef struct semaphore mutex_t; -#define mutex_init(lock, type, name) sema_init(lock, 1) -#define mutex_destroy(lock) sema_init(lock, -99) -#define mutex_lock(lock, num) down(lock) -#define mutex_trylock(lock) (down_trylock(lock) ? 0 : 1) -#define mutex_unlock(lock) up(lock) +typedef struct mutex mutex_t; +//#define mutex_destroy(lock) do{}while(0) #endif /* __XFS_SUPPORT_MUTEX_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index f89340c61bf2..4fa4b1a5187e 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -79,8 +79,7 @@ fs_flushinval_pages( struct inode *ip = LINVFS_GET_IP(vp); if (VN_CACHED(vp)) { - filemap_fdatawrite(ip->i_mapping); - filemap_fdatawait(ip->i_mapping); + filemap_write_and_wait(ip->i_mapping); truncate_inode_pages(ip->i_mapping, first); } diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index b78b5eb9e96c..f98c5be3dfe7 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -530,6 +530,8 @@ xfs_attrmulti_attr_set( char *kbuf; int error = EFAULT; + if (IS_RDONLY(&vp->v_inode)) + return -EROFS; if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode)) return EPERM; if (len > XATTR_SIZE_MAX) @@ -557,6 +559,9 @@ xfs_attrmulti_attr_remove( { int error; + + if (IS_RDONLY(&vp->v_inode)) + return -EROFS; if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode)) return EPERM; diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index c83ae15bb0e6..a7c9ba1a9f7b 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -19,7 +19,6 @@ #include <linux/compat.h> #include <linux/init.h> #include <linux/ioctl.h> -#include <linux/ioctl32.h> #include <linux/syscalls.h> #include <linux/types.h> #include <linux/fs.h> diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 14215a7db59f..97fb1470cf28 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -54,6 +54,9 @@ #include <linux/xattr.h> #include <linux/namei.h> +#define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) || \ + (S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME)) + /* * Change the requested timestamp in the given inode. * We don't lock across timestamp updates, and we don't log them but @@ -203,7 +206,7 @@ validate_fields( ip->i_nlink = va.va_nlink; ip->i_blocks = va.va_nblocks; - /* we're under i_sem so i_size can't change under us */ + /* we're under i_mutex so i_size can't change under us */ if (i_size_read(ip) != va.va_size) i_size_write(ip, va.va_size); } diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 279e9bc92aba..885dfafeabee 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -254,7 +254,7 @@ xfs_read( } if (unlikely(ioflags & IO_ISDIRECT)) - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && @@ -286,7 +286,7 @@ xfs_read( unlock_isem: if (unlikely(ioflags & IO_ISDIRECT)) - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return ret; } @@ -655,7 +655,7 @@ relock: iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); } else { iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; @@ -686,7 +686,7 @@ start: int dmflags = FILP_DELAY_FLAG(file); if (need_isem) - dmflags |= DM_FLAGS_ISEM; + dmflags |= DM_FLAGS_IMUX; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, @@ -713,7 +713,7 @@ start: } if (likely(!(ioflags & IO_INVIS))) { - inode_update_time(inode, 1); + file_update_time(file); xfs_ichgtime_fast(xip, inode, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } @@ -772,7 +772,7 @@ retry: if (need_isem) { /* demote the lock now the cached pages are gone */ XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; @@ -817,14 +817,14 @@ retry: xfs_rwunlock(bdp, locktype); if (need_isem) - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) goto out_nounlocks; if (need_isem) - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); xfs_rwlock(bdp, locktype); pos = xip->i_d.di_size; ret = 0; @@ -926,7 +926,7 @@ retry: xfs_rwunlock(bdp, locktype); if (need_isem) - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); error = sync_page_range(inode, mapping, pos, ret); if (!error) @@ -938,7 +938,7 @@ retry: xfs_rwunlock(bdp, locktype); out_unlock_isem: if (need_isem) - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out_nounlocks: return -error; } diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 00b5043dfa5a..772ac48329ea 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -104,7 +104,7 @@ xfs_qm_dqinit( */ if (brandnewdquot) { dqp->dq_flnext = dqp->dq_flprev = dqp; - mutex_init(&dqp->q_qlock, MUTEX_DEFAULT, "xdq"); + mutex_init(&dqp->q_qlock); initnsema(&dqp->q_flock, 1, "fdq"); sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); @@ -1382,7 +1382,7 @@ void xfs_dqlock( xfs_dquot_t *dqp) { - mutex_lock(&(dqp->q_qlock), PINOD); + mutex_lock(&(dqp->q_qlock)); } void diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 5328a2937127..bb6991a7a617 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -167,7 +167,7 @@ xfs_Gqm_init(void) xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; xqm->qm_nrefs = 0; #ifdef DEBUG - mutex_init(&qcheck_lock, MUTEX_DEFAULT, "qchk"); + xfs_mutex_init(&qcheck_lock, MUTEX_DEFAULT, "qchk"); #endif return xqm; } @@ -1166,7 +1166,7 @@ xfs_qm_init_quotainfo( qinf->qi_dqreclaims = 0; /* mutex used to serialize quotaoffs */ - mutex_init(&qinf->qi_quotaofflock, MUTEX_DEFAULT, "qoff"); + mutex_init(&qinf->qi_quotaofflock); /* Precalc some constants */ qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); @@ -1285,7 +1285,7 @@ xfs_qm_list_init( char *str, int n) { - mutex_init(&list->qh_lock, MUTEX_DEFAULT, str); + mutex_init(&list->qh_lock); list->qh_next = NULL; list->qh_version = 0; list->qh_nelems = 0; @@ -2762,7 +2762,7 @@ STATIC void xfs_qm_freelist_init(xfs_frlist_t *ql) { ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql; - mutex_init(&ql->qh_lock, MUTEX_DEFAULT, "dqf"); + mutex_init(&ql->qh_lock); ql->qh_version = 0; ql->qh_nelems = 0; } @@ -2772,7 +2772,7 @@ xfs_qm_freelist_destroy(xfs_frlist_t *ql) { xfs_dquot_t *dqp, *nextdqp; - mutex_lock(&ql->qh_lock, PINOD); + mutex_lock(&ql->qh_lock); for (dqp = ql->qh_next; dqp != (xfs_dquot_t *)ql; ) { xfs_dqlock(dqp); diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index 12da259f2fcb..4568deb6da86 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct { #define XFS_QM_IWARNLIMIT 5 #define XFS_QM_RTBWARNLIMIT 5 -#define XFS_QM_LOCK(xqm) (mutex_lock(&xqm##_lock, PINOD)) +#define XFS_QM_LOCK(xqm) (mutex_lock(&xqm##_lock)) #define XFS_QM_UNLOCK(xqm) (mutex_unlock(&xqm##_lock)) #define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++) #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index d9d2993de435..90402a1c3983 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -363,7 +363,7 @@ xfs_qm_init(void) KERN_INFO "SGI XFS Quota Management subsystem\n"; printk(message); - mutex_init(&xfs_Gqm_lock, MUTEX_DEFAULT, "xfs_qmlock"); + mutex_init(&xfs_Gqm_lock); vfs_bhv_set_custom(&xfs_qmops, &xfs_qmcore_xfs); xfs_qm_init_procfs(); } diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 24690e1af659..86a1d09f48d5 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -233,7 +233,7 @@ xfs_qm_scall_quotaoff( */ ASSERT(mp->m_quotainfo); if (mp->m_quotainfo) - mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD); + mutex_lock(&(XFS_QI_QOFFLOCK(mp))); ASSERT(mp->m_quotainfo); @@ -508,7 +508,7 @@ xfs_qm_scall_quotaon( /* * Switch on quota enforcement in core. */ - mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD); + mutex_lock(&(XFS_QI_QOFFLOCK(mp))); mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); @@ -617,7 +617,7 @@ xfs_qm_scall_setqlim( * a quotaoff from happening). (XXXThis doesn't currently happen * because we take the vfslock before calling xfs_qm_sysent). */ - mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD); + mutex_lock(&(XFS_QI_QOFFLOCK(mp))); /* * Get the dquot (locked), and join it to the transaction. @@ -1426,7 +1426,7 @@ xfs_qm_internalqcheck( xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); XFS_bflush(mp->m_ddev_targp); - mutex_lock(&qcheck_lock, PINOD); + mutex_lock(&qcheck_lock); /* There should be absolutely no quota activity while this is going on. */ qmtest_udqtab = kmem_zalloc(qmtest_hashmask * diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h index 7a9f3beb818c..b7ddd04aae32 100644 --- a/fs/xfs/quota/xfs_quota_priv.h +++ b/fs/xfs/quota/xfs_quota_priv.h @@ -51,7 +51,7 @@ #define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next) #define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems) -#define XQMLCK(h) (mutex_lock(&((h)->qh_lock), PINOD)) +#define XQMLCK(h) (mutex_lock(&((h)->qh_lock))) #define XQMUNLCK(h) (mutex_unlock(&((h)->qh_lock))) #ifdef DEBUG struct xfs_dqhash; diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c index 70ce40914c8a..69ec4f540c3a 100644 --- a/fs/xfs/support/uuid.c +++ b/fs/xfs/support/uuid.c @@ -24,7 +24,7 @@ static uuid_t *uuid_table; void uuid_init(void) { - mutex_init(&uuid_monitor, MUTEX_DEFAULT, "uuid_monitor"); + mutex_init(&uuid_monitor); } /* @@ -94,7 +94,7 @@ uuid_table_insert(uuid_t *uuid) { int i, hole; - mutex_lock(&uuid_monitor, PVFS); + mutex_lock(&uuid_monitor); for (i = 0, hole = -1; i < uuid_table_size; i++) { if (uuid_is_nil(&uuid_table[i])) { hole = i; @@ -122,7 +122,7 @@ uuid_table_remove(uuid_t *uuid) { int i; - mutex_lock(&uuid_monitor, PVFS); + mutex_lock(&uuid_monitor); for (i = 0; i < uuid_table_size; i++) { if (uuid_is_nil(&uuid_table[i])) continue; diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 5484eeb460c8..1a11c2b51701 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -117,11 +117,6 @@ xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen, ip->i_d.di_anextents == 0)) return(ENOATTR); - if (!(flags & (ATTR_KERNACCESS|ATTR_SECURE))) { - if ((error = xfs_iaccess(ip, S_IRUSR, cred))) - return(XFS_ERROR(error)); - } - /* * Fill in the arg structure for this request. */ @@ -425,7 +420,7 @@ xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int f struct cred *cred) { xfs_inode_t *dp; - int namelen, error; + int namelen; namelen = strlen(name); if (namelen >= MAXNAMELEN) @@ -437,14 +432,6 @@ xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int f if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return (EIO); - xfs_ilock(dp, XFS_ILOCK_SHARED); - if (!(flags & ATTR_SECURE) && - (error = xfs_iaccess(dp, S_IWUSR, cred))) { - xfs_iunlock(dp, XFS_ILOCK_SHARED); - return(XFS_ERROR(error)); - } - xfs_iunlock(dp, XFS_ILOCK_SHARED); - return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags); } @@ -579,7 +566,7 @@ int xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred) { xfs_inode_t *dp; - int namelen, error; + int namelen; namelen = strlen(name); if (namelen >= MAXNAMELEN) @@ -592,11 +579,7 @@ xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred) return (EIO); xfs_ilock(dp, XFS_ILOCK_SHARED); - if (!(flags & ATTR_SECURE) && - (error = xfs_iaccess(dp, S_IWUSR, cred))) { - xfs_iunlock(dp, XFS_ILOCK_SHARED); - return(XFS_ERROR(error)); - } else if (XFS_IFORK_Q(dp) == 0 || + if (XFS_IFORK_Q(dp) == 0 || (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && dp->i_d.di_anextents == 0)) { xfs_iunlock(dp, XFS_ILOCK_SHARED); @@ -668,12 +651,6 @@ xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags, return (EIO); xfs_ilock(dp, XFS_ILOCK_SHARED); - if (!(flags & ATTR_SECURE) && - (error = xfs_iaccess(dp, S_IRUSR, cred))) { - xfs_iunlock(dp, XFS_ILOCK_SHARED); - return(XFS_ERROR(error)); - } - /* * Decide on what work routines to call based on the inode size. */ diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index 864bf6955689..b4c7f2bc55a0 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -152,7 +152,7 @@ typedef enum { #define DM_FLAGS_NDELAY 0x001 /* return EAGAIN after dm_pending() */ #define DM_FLAGS_UNWANTED 0x002 /* event not in fsys dm_eventset_t */ -#define DM_FLAGS_ISEM 0x004 /* thread holds i_sem */ +#define DM_FLAGS_IMUX 0x004 /* thread holds i_mutex */ #define DM_FLAGS_IALLOCSEM_RD 0x010 /* thread holds i_alloc_sem rd */ #define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */ @@ -161,21 +161,21 @@ typedef enum { */ #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ - DM_FLAGS_ISEM : 0) -#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM) + DM_FLAGS_IMUX : 0) +#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX) #endif #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \ (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22)) #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ - DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_ISEM) -#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM) + DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_IMUX) +#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX) #endif #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21) #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ - 0 : DM_FLAGS_ISEM) -#define DM_SEM_FLAG_WR (DM_FLAGS_ISEM) + 0 : DM_FLAGS_IMUX) +#define DM_SEM_FLAG_WR (DM_FLAGS_IMUX) #endif diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 158829ca56f6..f40d4391fcfc 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -30,13 +30,7 @@ * By comparing each compnent, we don't have to worry about extra * endian issues in treating two 32 bit numbers as one 64 bit number */ -static -#if defined(__GNUC__) && (__GNUC__ == 2) && ( (__GNUC_MINOR__ == 95) || (__GNUC_MINOR__ == 96)) -__attribute__((unused)) /* gcc 2.95, 2.96 miscompile this when inlined */ -#else -__inline__ -#endif -xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) +static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) { if (CYCLE_LSN(lsn1) != CYCLE_LSN(lsn2)) return (CYCLE_LSN(lsn1)<CYCLE_LSN(lsn2))? -999 : 999; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 541d5dd474be..303af86739bf 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -117,7 +117,7 @@ xfs_mount_init(void) AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); spinlock_init(&mp->m_sb_lock, "xfs_sb"); - mutex_init(&mp->m_ilock, MUTEX_DEFAULT, "xfs_ilock"); + mutex_init(&mp->m_ilock); initnsema(&mp->m_growlock, 1, "xfs_grow"); /* * Initialize the AIL. diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 08b2e0a5d807..3432fd5a3986 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -533,7 +533,7 @@ typedef struct xfs_mod_sb { int msb_delta; /* Change to make to specified field */ } xfs_mod_sb_t; -#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock), PINOD) +#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) #define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) #define XFS_SB_LOCK(mp) mutex_spinlock(&(mp)->m_sb_lock) #define XFS_SB_UNLOCK(mp,s) mutex_spinunlock(&(mp)->m_sb_lock,(s)) |