summaryrefslogtreecommitdiffstats
path: root/fs/nfsd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/export.c161
-rw-r--r--fs/nfsd/nfs2acl.c5
-rw-r--r--fs/nfsd/nfs3acl.c3
-rw-r--r--fs/nfsd/nfs3proc.c18
-rw-r--r--fs/nfsd/nfs3xdr.c56
-rw-r--r--fs/nfsd/nfs4acl.c711
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4idmap.c3
-rw-r--r--fs/nfsd/nfs4proc.c34
-rw-r--r--fs/nfsd/nfs4recover.c2
-rw-r--r--fs/nfsd/nfs4state.c8
-rw-r--r--fs/nfsd/nfs4xdr.c234
-rw-r--r--fs/nfsd/nfsctl.c189
-rw-r--r--fs/nfsd/nfsproc.c12
-rw-r--r--fs/nfsd/nfssvc.c335
-rw-r--r--fs/nfsd/nfsxdr.c45
-rw-r--r--fs/nfsd/vfs.c94
17 files changed, 1206 insertions, 706 deletions
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 01bc68c628ad..e13fa23bd108 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -319,12 +319,25 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
static struct cache_head *export_table[EXPORT_HASHMAX];
+static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
+{
+ int i;
+
+ for (i = 0; i < fsloc->locations_count; i++) {
+ kfree(fsloc->locations[i].path);
+ kfree(fsloc->locations[i].hosts);
+ }
+ kfree(fsloc->locations);
+}
+
static void svc_export_put(struct kref *ref)
{
struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
dput(exp->ex_dentry);
mntput(exp->ex_mnt);
auth_domain_put(exp->ex_client);
+ kfree(exp->ex_path);
+ nfsd4_fslocs_free(&exp->ex_fslocs);
kfree(exp);
}
@@ -370,7 +383,7 @@ static int check_export(struct inode *inode, int flags)
*/
if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
!(flags & NFSEXP_FSID)) {
- dprintk("exp_export: export of non-dev fs without fsid");
+ dprintk("exp_export: export of non-dev fs without fsid\n");
return -EINVAL;
}
if (!inode->i_sb->s_export_op) {
@@ -386,6 +399,69 @@ static int check_export(struct inode *inode, int flags)
}
+#ifdef CONFIG_NFSD_V4
+
+static int
+fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
+{
+ int len;
+ int migrated, i, err;
+
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len != 5 || memcmp(buf, "fsloc", 5))
+ return 0;
+
+ /* listsize */
+ err = get_int(mesg, &fsloc->locations_count);
+ if (err)
+ return err;
+ if (fsloc->locations_count > MAX_FS_LOCATIONS)
+ return -EINVAL;
+ if (fsloc->locations_count == 0)
+ return 0;
+
+ fsloc->locations = kzalloc(fsloc->locations_count
+ * sizeof(struct nfsd4_fs_location), GFP_KERNEL);
+ if (!fsloc->locations)
+ return -ENOMEM;
+ for (i=0; i < fsloc->locations_count; i++) {
+ /* colon separated host list */
+ err = -EINVAL;
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len <= 0)
+ goto out_free_all;
+ err = -ENOMEM;
+ fsloc->locations[i].hosts = kstrdup(buf, GFP_KERNEL);
+ if (!fsloc->locations[i].hosts)
+ goto out_free_all;
+ err = -EINVAL;
+ /* slash separated path component list */
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len <= 0)
+ goto out_free_all;
+ err = -ENOMEM;
+ fsloc->locations[i].path = kstrdup(buf, GFP_KERNEL);
+ if (!fsloc->locations[i].path)
+ goto out_free_all;
+ }
+ /* migrated */
+ err = get_int(mesg, &migrated);
+ if (err)
+ goto out_free_all;
+ err = -EINVAL;
+ if (migrated < 0 || migrated > 1)
+ goto out_free_all;
+ fsloc->migrated = migrated;
+ return 0;
+out_free_all:
+ nfsd4_fslocs_free(fsloc);
+ return err;
+}
+
+#else /* CONFIG_NFSD_V4 */
+static inline int fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) { return 0; }
+#endif
+
static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{
/* client path expiry [flags anonuid anongid fsid] */
@@ -398,6 +474,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
int an_int;
nd.dentry = NULL;
+ exp.ex_path = NULL;
if (mesg[mlen-1] != '\n')
return -EINVAL;
@@ -428,6 +505,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
exp.ex_client = dom;
exp.ex_mnt = nd.mnt;
exp.ex_dentry = nd.dentry;
+ exp.ex_path = kstrdup(buf, GFP_KERNEL);
+ err = -ENOMEM;
+ if (!exp.ex_path)
+ goto out;
/* expiry */
err = -EINVAL;
@@ -435,6 +516,11 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (exp.h.expiry_time == 0)
goto out;
+ /* fs locations */
+ exp.ex_fslocs.locations = NULL;
+ exp.ex_fslocs.locations_count = 0;
+ exp.ex_fslocs.migrated = 0;
+
/* flags */
err = get_int(&mesg, &an_int);
if (err == -ENOENT)
@@ -460,6 +546,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
err = check_export(nd.dentry->d_inode, exp.ex_flags);
if (err) goto out;
+
+ err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
+ if (err)
+ goto out;
}
expp = svc_export_lookup(&exp);
@@ -473,6 +563,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
else
exp_put(expp);
out:
+ kfree(exp.ex_path);
if (nd.dentry)
path_release(&nd);
out_no_path:
@@ -482,7 +573,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
return err;
}
-static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong);
+static void exp_flags(struct seq_file *m, int flag, int fsid,
+ uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs);
static int svc_export_show(struct seq_file *m,
struct cache_detail *cd,
@@ -501,8 +593,8 @@ static int svc_export_show(struct seq_file *m,
seq_putc(m, '(');
if (test_bit(CACHE_VALID, &h->flags) &&
!test_bit(CACHE_NEGATIVE, &h->flags))
- exp_flags(m, exp->ex_flags, exp->ex_fsid,
- exp->ex_anon_uid, exp->ex_anon_gid);
+ exp_flags(m, exp->ex_flags, exp->ex_fsid,
+ exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs);
seq_puts(m, ")\n");
return 0;
}
@@ -524,6 +616,10 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
new->ex_client = item->ex_client;
new->ex_dentry = dget(item->ex_dentry);
new->ex_mnt = mntget(item->ex_mnt);
+ new->ex_path = NULL;
+ new->ex_fslocs.locations = NULL;
+ new->ex_fslocs.locations_count = 0;
+ new->ex_fslocs.migrated = 0;
}
static void export_update(struct cache_head *cnew, struct cache_head *citem)
@@ -535,6 +631,14 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
new->ex_anon_uid = item->ex_anon_uid;
new->ex_anon_gid = item->ex_anon_gid;
new->ex_fsid = item->ex_fsid;
+ new->ex_path = item->ex_path;
+ item->ex_path = NULL;
+ new->ex_fslocs.locations = item->ex_fslocs.locations;
+ item->ex_fslocs.locations = NULL;
+ new->ex_fslocs.locations_count = item->ex_fslocs.locations_count;
+ item->ex_fslocs.locations_count = 0;
+ new->ex_fslocs.migrated = item->ex_fslocs.migrated;
+ item->ex_fslocs.migrated = 0;
}
static struct cache_head *svc_export_alloc(void)
@@ -1048,36 +1152,28 @@ int
exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
struct cache_req *creq)
{
- struct svc_expkey *fsid_key;
struct svc_export *exp;
int rv;
u32 fsidv[2];
mk_fsid_v1(fsidv, 0);
- fsid_key = exp_find_key(clp, 1, fsidv, creq);
- if (IS_ERR(fsid_key) && PTR_ERR(fsid_key) == -EAGAIN)
+ exp = exp_find(clp, 1, fsidv, creq);
+ if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN)
return nfserr_dropit;
- if (!fsid_key || IS_ERR(fsid_key))
- return nfserr_perm;
-
- exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq);
if (exp == NULL)
- rv = nfserr_perm;
+ return nfserr_perm;
else if (IS_ERR(exp))
- rv = nfserrno(PTR_ERR(exp));
- else {
- rv = fh_compose(fhp, exp,
- fsid_key->ek_dentry, NULL);
- exp_put(exp);
- }
- cache_put(&fsid_key->h, &svc_expkey_cache);
+ return nfserrno(PTR_ERR(exp));
+ rv = fh_compose(fhp, exp, exp->ex_dentry, NULL);
+ exp_put(exp);
return rv;
}
/* Iterator */
static void *e_start(struct seq_file *m, loff_t *pos)
+ __acquires(svc_export_cache.hash_lock)
{
loff_t n = *pos;
unsigned hash, export;
@@ -1086,7 +1182,7 @@ static void *e_start(struct seq_file *m, loff_t *pos)
exp_readlock();
read_lock(&svc_export_cache.hash_lock);
if (!n--)
- return (void *)1;
+ return SEQ_START_TOKEN;
hash = n >> 32;
export = n & ((1LL<<32) - 1);
@@ -1110,7 +1206,7 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos)
struct cache_head *ch = p;
int hash = (*pos >> 32);
- if (p == (void *)1)
+ if (p == SEQ_START_TOKEN)
hash = 0;
else if (ch->next == NULL) {
hash++;
@@ -1131,6 +1227,7 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos)
}
static void e_stop(struct seq_file *m, void *p)
+ __releases(svc_export_cache.hash_lock)
{
read_unlock(&svc_export_cache.hash_lock);
exp_readunlock();
@@ -1156,7 +1253,8 @@ static struct flags {
{ 0, {"", ""}}
};
-static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong)
+static void exp_flags(struct seq_file *m, int flag, int fsid,
+ uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc)
{
int first = 0;
struct flags *flg;
@@ -1172,21 +1270,34 @@ static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t
seq_printf(m, "%sanonuid=%d", first++?",":"", anonu);
if (anong != (gid_t)-2 && anong != (0x10000-2))
seq_printf(m, "%sanongid=%d", first++?",":"", anong);
+ if (fsloc && fsloc->locations_count > 0) {
+ char *loctype = (fsloc->migrated) ? "refer" : "replicas";
+ int i;
+
+ seq_printf(m, "%s%s=", first++?",":"", loctype);
+ seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\");
+ seq_putc(m, '@');
+ seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\");
+ for (i = 1; i < fsloc->locations_count; i++) {
+ seq_putc(m, ';');
+ seq_escape(m, fsloc->locations[i].path, ",;@ \t\n\\");
+ seq_putc(m, '@');
+ seq_escape(m, fsloc->locations[i].hosts, ",;@ \t\n\\");
+ }
+ }
}
static int e_show(struct seq_file *m, void *p)
{
struct cache_head *cp = p;
struct svc_export *exp = container_of(cp, struct svc_export, h);
- svc_client *clp;
- if (p == (void *)1) {
+ if (p == SEQ_START_TOKEN) {
seq_puts(m, "# Version 1.1\n");
seq_puts(m, "# Path Client(Flags) # IPs\n");
return 0;
}
- clp = exp->ex_client;
cache_get(&exp->h);
if (cache_check(&svc_export_cache, &exp->h, NULL))
return 0;
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index fc95c4df6693..9187755661df 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -1,5 +1,5 @@
/*
- * linux/fs/nfsd/nfsacl.c
+ * linux/fs/nfsd/nfs2acl.c
*
* Process version 2 NFSACL requests.
*
@@ -241,7 +241,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = w;
while (w > 0) {
- if (!svc_take_res_page(rqstp))
+ if (!rqstp->rq_respages[rqstp->rq_resused++])
return 0;
w -= PAGE_SIZE;
}
@@ -333,4 +333,5 @@ struct svc_version nfsd_acl_version2 = {
.vs_proc = nfsd_acl_procedures2,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
+ .vs_hidden = 1,
};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 16e10c170aed..d4bdc00c1169 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -185,7 +185,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = w;
while (w > 0) {
- if (!svc_take_res_page(rqstp))
+ if (!rqstp->rq_respages[rqstp->rq_resused++])
return 0;
w -= PAGE_SIZE;
}
@@ -263,5 +263,6 @@ struct svc_version nfsd_acl_version3 = {
.vs_proc = nfsd_acl_procedures3,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
+ .vs_hidden = 1,
};
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index f61142afea44..a5ebc7dbb384 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -160,6 +160,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
struct nfsd3_readres *resp)
{
int nfserr;
+ u32 max_blocksize = svc_max_payload(rqstp);
dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
SVCFH_fmt(&argp->fh),
@@ -172,15 +173,15 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
*/
resp->count = argp->count;
- if (NFSSVC_MAXBLKSIZE < resp->count)
- resp->count = NFSSVC_MAXBLKSIZE;
+ if (max_blocksize < resp->count)
+ resp->count = max_blocksize;
svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh);
nfserr = nfsd_read(rqstp, &resp->fh, NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
&resp->count);
if (nfserr == 0) {
struct inode *inode = resp->fh.fh_dentry->d_inode;
@@ -210,7 +211,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
resp->committed = argp->stable;
nfserr = nfsd_write(rqstp, &resp->fh, NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
argp->len,
&resp->committed);
resp->count = argp->count;
@@ -538,15 +539,16 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
struct nfsd3_fsinfores *resp)
{
int nfserr;
+ u32 max_blocksize = svc_max_payload(rqstp);
dprintk("nfsd: FSINFO(3) %s\n",
SVCFH_fmt(&argp->fh));
- resp->f_rtmax = NFSSVC_MAXBLKSIZE;
- resp->f_rtpref = NFSSVC_MAXBLKSIZE;
+ resp->f_rtmax = max_blocksize;
+ resp->f_rtpref = max_blocksize;
resp->f_rtmult = PAGE_SIZE;
- resp->f_wtmax = NFSSVC_MAXBLKSIZE;
- resp->f_wtpref = NFSSVC_MAXBLKSIZE;
+ resp->f_wtmax = max_blocksize;
+ resp->f_wtpref = max_blocksize;
resp->f_wtmult = PAGE_SIZE;
resp->f_dtpref = PAGE_SIZE;
resp->f_maxfilesize = ~(u32) 0;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 243d94b9653a..247d518248bf 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -330,6 +330,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
{
unsigned int len;
int v,pn;
+ u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset)))
@@ -337,17 +338,16 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
len = args->count = ntohl(*p++);
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > max_blocksize)
+ len = max_blocksize;
/* set up the kvec */
v=0;
while (len > 0) {
- pn = rqstp->rq_resused;
- svc_take_page(rqstp);
- args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
- args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
- len -= args->vec[v].iov_len;
+ pn = rqstp->rq_resused++;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+ rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
+ len -= rqstp->rq_vec[v].iov_len;
v++;
}
args->vlen = v;
@@ -359,6 +359,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_writeargs *args)
{
unsigned int len, v, hdr;
+ u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset)))
@@ -373,22 +374,22 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_arg.len - hdr < len)
return 0;
- args->vec[0].iov_base = (void*)p;
- args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
+ rqstp->rq_vec[0].iov_base = (void*)p;
+ rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > max_blocksize)
+ len = max_blocksize;
v= 0;
- while (len > args->vec[v].iov_len) {
- len -= args->vec[v].iov_len;
+ while (len > rqstp->rq_vec[v].iov_len) {
+ len -= rqstp->rq_vec[v].iov_len;
v++;
- args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
- args->vec[v].iov_len = PAGE_SIZE;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
+ rqstp->rq_vec[v].iov_len = PAGE_SIZE;
}
- args->vec[v].iov_len = len;
+ rqstp->rq_vec[v].iov_len = len;
args->vlen = v+1;
- return args->count == args->len && args->vec[0].iov_len > 0;
+ return args->count == args->len && rqstp->rq_vec[0].iov_len > 0;
}
int
@@ -446,11 +447,11 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
* This page appears in the rq_res.pages list, but as pages_len is always
* 0, it won't get in the way
*/
- svc_take_page(rqstp);
len = ntohl(*p++);
if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
return 0;
- args->tname = new = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->tname = new =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
args->tlen = len;
/* first copy and check from the first page */
old = (char*)p;
@@ -522,8 +523,8 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p,
{
if (!(p = decode_fh(p, &args->fh)))
return 0;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -554,8 +555,8 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
if (args->count > PAGE_SIZE)
args->count = PAGE_SIZE;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -565,6 +566,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_readdirargs *args)
{
int len, pn;
+ u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh)))
return 0;
@@ -573,13 +575,12 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
args->dircount = ntohl(*p++);
args->count = ntohl(*p++);
- len = (args->count > NFSSVC_MAXBLKSIZE) ? NFSSVC_MAXBLKSIZE :
+ len = (args->count > max_blocksize) ? max_blocksize :
args->count;
args->count = len;
while (len > 0) {
- pn = rqstp->rq_resused;
- svc_take_page(rqstp);
+ pn = rqstp->rq_resused++;
if (!args->buffer)
args->buffer = page_address(rqstp->rq_respages[pn]);
len -= PAGE_SIZE;
@@ -668,7 +669,6 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->len;
if (resp->len & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
@@ -693,7 +693,6 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
@@ -768,7 +767,6 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = (resp->count) << 2;
/* add the 'tail' to the end of the 'head' page - page 0. */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p++ = 0; /* no more entries */
*p++ = htonl(resp->common.err == nfserr_eof);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index edb107e61b91..5d94555cdc83 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -63,6 +63,8 @@
#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
| NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE)
+#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS | NFS4_ACE_IDENTIFIER_GROUP)
+
#define MASK_EQUAL(mask1, mask2) \
( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
@@ -96,24 +98,26 @@ deny_mask(u32 allow_mask, unsigned int flags)
/* XXX: modify functions to return NFS errors; they're only ever
* used by nfs code, after all.... */
-static int
-mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
+/* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the
+ * side of being more restrictive, so the mode bit mapping below is
+ * pessimistic. An optimistic version would be needed to handle DENY's,
+ * but we espect to coalesce all ALLOWs and DENYs before mapping to mode
+ * bits. */
+
+static void
+low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
{
- u32 ignore = 0;
+ u32 write_mode = NFS4_WRITE_MODE;
- if (!(flags & NFS4_ACL_DIR))
- ignore |= NFS4_ACE_DELETE_CHILD; /* ignore it */
- perm |= ignore;
+ if (flags & NFS4_ACL_DIR)
+ write_mode |= NFS4_ACE_DELETE_CHILD;
*mode = 0;
if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE)
*mode |= ACL_READ;
- if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE)
+ if ((perm & write_mode) == write_mode)
*mode |= ACL_WRITE;
if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE)
*mode |= ACL_EXECUTE;
- if (!MASK_EQUAL(perm, ignore|mask_from_posix(*mode, flags)))
- return -EINVAL;
- return 0;
}
struct ace_container {
@@ -338,38 +342,6 @@ sort_pacl(struct posix_acl *pacl)
return;
}
-static int
-write_pace(struct nfs4_ace *ace, struct posix_acl *pacl,
- struct posix_acl_entry **pace, short tag, unsigned int flags)
-{
- struct posix_acl_entry *this = *pace;
-
- if (*pace == pacl->a_entries + pacl->a_count)
- return -EINVAL; /* fell off the end */
- (*pace)++;
- this->e_tag = tag;
- if (tag == ACL_USER_OBJ)
- flags |= NFS4_ACL_OWNER;
- if (mode_from_nfs4(ace->access_mask, &this->e_perm, flags))
- return -EINVAL;
- this->e_id = (tag == ACL_USER || tag == ACL_GROUP ?
- ace->who : ACL_UNDEFINED_ID);
- return 0;
-}
-
-static struct nfs4_ace *
-get_next_v4_ace(struct list_head **p, struct list_head *head)
-{
- struct nfs4_ace *ace;
-
- *p = (*p)->next;
- if (*p == head)
- return NULL;
- ace = list_entry(*p, struct nfs4_ace, l_ace);
-
- return ace;
-}
-
int
nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
struct posix_acl **dpacl, unsigned int flags)
@@ -385,42 +357,23 @@ nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
goto out;
error = nfs4_acl_split(acl, dacl);
- if (error < 0)
+ if (error)
goto out_acl;
- if (pacl != NULL) {
- if (acl->naces == 0) {
- error = -ENODATA;
- goto try_dpacl;
- }
-
- *pacl = _nfsv4_to_posix_one(acl, flags);
- if (IS_ERR(*pacl)) {
- error = PTR_ERR(*pacl);
- *pacl = NULL;
- goto out_acl;
- }
+ *pacl = _nfsv4_to_posix_one(acl, flags);
+ if (IS_ERR(*pacl)) {
+ error = PTR_ERR(*pacl);
+ *pacl = NULL;
+ goto out_acl;
}
-try_dpacl:
- if (dpacl != NULL) {
- if (dacl->naces == 0) {
- if (pacl == NULL || *pacl == NULL)
- error = -ENODATA;
- goto out_acl;
- }
-
- error = 0;
- *dpacl = _nfsv4_to_posix_one(dacl, flags);
- if (IS_ERR(*dpacl)) {
- error = PTR_ERR(*dpacl);
- *dpacl = NULL;
- goto out_acl;
- }
+ *dpacl = _nfsv4_to_posix_one(dacl, flags);
+ if (IS_ERR(*dpacl)) {
+ error = PTR_ERR(*dpacl);
+ *dpacl = NULL;
}
-
out_acl:
- if (error && pacl) {
+ if (error) {
posix_acl_release(*pacl);
*pacl = NULL;
}
@@ -429,349 +382,311 @@ out:
return error;
}
+/*
+ * While processing the NFSv4 ACE, this maintains bitmasks representing
+ * which permission bits have been allowed and which denied to a given
+ * entity: */
+struct posix_ace_state {
+ u32 allow;
+ u32 deny;
+};
+
+struct posix_user_ace_state {
+ uid_t uid;
+ struct posix_ace_state perms;
+};
+
+struct posix_ace_state_array {
+ int n;
+ struct posix_user_ace_state aces[];
+};
+
+/*
+ * While processing the NFSv4 ACE, this maintains the partial permissions
+ * calculated so far: */
+
+struct posix_acl_state {
+ struct posix_ace_state owner;
+ struct posix_ace_state group;
+ struct posix_ace_state other;
+ struct posix_ace_state everyone;
+ struct posix_ace_state mask; /* Deny unused in this case */
+ struct posix_ace_state_array *users;
+ struct posix_ace_state_array *groups;
+};
+
static int
-same_who(struct nfs4_ace *a, struct nfs4_ace *b)
+init_state(struct posix_acl_state *state, int cnt)
{
- return a->whotype == b->whotype &&
- (a->whotype != NFS4_ACL_WHO_NAMED || a->who == b->who);
+ int alloc;
+
+ memset(state, 0, sizeof(struct posix_acl_state));
+ /*
+ * In the worst case, each individual acl could be for a distinct
+ * named user or group, but we don't no which, so we allocate
+ * enough space for either:
+ */
+ alloc = sizeof(struct posix_ace_state_array)
+ + cnt*sizeof(struct posix_ace_state);
+ state->users = kzalloc(alloc, GFP_KERNEL);
+ if (!state->users)
+ return -ENOMEM;
+ state->groups = kzalloc(alloc, GFP_KERNEL);
+ if (!state->groups) {
+ kfree(state->users);
+ return -ENOMEM;
+ }
+ return 0;
}
-static int
-complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny,
- unsigned int flags)
-{
- int ignore = 0;
- if (!(flags & NFS4_ACL_DIR))
- ignore |= NFS4_ACE_DELETE_CHILD;
- return MASK_EQUAL(ignore|deny_mask(allow->access_mask, flags),
- ignore|deny->access_mask) &&
- allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
- deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE &&
- allow->flag == deny->flag &&
- same_who(allow, deny);
+static void
+free_state(struct posix_acl_state *state) {
+ kfree(state->users);
+ kfree(state->groups);
}
-static inline int
-user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_state *astate)
{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
-
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace2type(ace) != ACL_USER_OBJ)
- goto out;
- error = write_pace(ace, pacl, pace, ACL_USER_OBJ, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace2 = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace2 == NULL)
- goto out;
- if (!complementary_ace_pair(ace, ace2, flags))
- goto out;
- error = 0;
-out:
- return error;
+ state->mask.allow |= astate->allow;
}
-static inline int
-users_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct nfs4_ace **mask_ace,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
-{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
+/*
+ * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS,
+ * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate
+ * to traditional read/write/execute permissions.
+ *
+ * It's problematic to reject acls that use certain mode bits, because it
+ * places the burden on users to learn the rules about which bits one
+ * particular server sets, without giving the user a lot of help--we return an
+ * error that could mean any number of different things. To make matters
+ * worse, the problematic bits might be introduced by some application that's
+ * automatically mapping from some other acl model.
+ *
+ * So wherever possible we accept anything, possibly erring on the side of
+ * denying more permissions than necessary.
+ *
+ * However we do reject *explicit* DENY's of a few bits representing
+ * permissions we could never deny:
+ */
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- while (ace2type(ace) == ACL_USER) {
- if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
- goto out;
- if (*mask_ace &&
- !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
- goto out;
- *mask_ace = ace;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
- goto out;
- error = write_pace(ace, pacl, pace, ACL_USER, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace2 = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace2 == NULL)
- goto out;
- if (!complementary_ace_pair(ace, ace2, flags))
- goto out;
- if ((*mask_ace)->flag != ace2->flag ||
- !same_who(*mask_ace, ace2))
- goto out;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- }
- error = 0;
-out:
- return error;
+static inline int check_deny(u32 mask, int isowner)
+{
+ if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL))
+ return -EINVAL;
+ if (!isowner)
+ return 0;
+ if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL))
+ return -EINVAL;
+ return 0;
}
-static inline int
-group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct nfs4_ace **mask_ace,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static struct posix_acl *
+posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
- struct ace_container *ac;
- struct list_head group_l;
-
- INIT_LIST_HEAD(&group_l);
- ace = list_entry(*p, struct nfs4_ace, l_ace);
-
- /* group owner (mask and allow aces) */
+ struct posix_acl_entry *pace;
+ struct posix_acl *pacl;
+ int nace;
+ int i, error = 0;
- if (pacl->a_count != 3) {
- /* then the group owner should be preceded by mask */
- if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
- goto out;
- if (*mask_ace &&
- !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
- goto out;
- *mask_ace = ace;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
+ nace = 4 + state->users->n + state->groups->n;
+ pacl = posix_acl_alloc(nace, GFP_KERNEL);
+ if (!pacl)
+ return ERR_PTR(-ENOMEM);
- if ((*mask_ace)->flag != ace->flag || !same_who(*mask_ace, ace))
- goto out;
+ pace = pacl->a_entries;
+ pace->e_tag = ACL_USER_OBJ;
+ error = check_deny(state->owner.deny, 1);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
+
+ for (i=0; i < state->users->n; i++) {
+ pace++;
+ pace->e_tag = ACL_USER;
+ error = check_deny(state->users->aces[i].perms.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->users->aces[i].perms.allow,
+ &pace->e_perm, flags);
+ pace->e_id = state->users->aces[i].uid;
+ add_to_mask(state, &state->users->aces[i].perms);
}
- if (ace2type(ace) != ACL_GROUP_OBJ)
- goto out;
-
- ac = kmalloc(sizeof(*ac), GFP_KERNEL);
- error = -ENOMEM;
- if (ac == NULL)
- goto out;
- ac->ace = ace;
- list_add_tail(&ac->ace_l, &group_l);
-
- error = -EINVAL;
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
- goto out;
-
- error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, flags);
- if (error < 0)
- goto out;
-
- error = -EINVAL;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
-
- /* groups (mask and allow aces) */
-
- while (ace2type(ace) == ACL_GROUP) {
- if (*mask_ace == NULL)
- goto out;
-
- if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE ||
- !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
- goto out;
- *mask_ace = ace;
+ pace++;
+ pace->e_tag = ACL_GROUP_OBJ;
+ error = check_deny(state->group.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
+ add_to_mask(state, &state->group);
+
+ for (i=0; i < state->groups->n; i++) {
+ pace++;
+ pace->e_tag = ACL_GROUP;
+ error = check_deny(state->groups->aces[i].perms.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->groups->aces[i].perms.allow,
+ &pace->e_perm, flags);
+ pace->e_id = state->groups->aces[i].uid;
+ add_to_mask(state, &state->groups->aces[i].perms);
+ }
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- ac = kmalloc(sizeof(*ac), GFP_KERNEL);
- error = -ENOMEM;
- if (ac == NULL)
- goto out;
- error = -EINVAL;
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE ||
- !same_who(ace, *mask_ace))
- goto out;
+ pace++;
+ pace->e_tag = ACL_MASK;
+ low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
- ac->ace = ace;
- list_add_tail(&ac->ace_l, &group_l);
+ pace++;
+ pace->e_tag = ACL_OTHER;
+ error = check_deny(state->other.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
- error = write_pace(ace, pacl, pace, ACL_GROUP, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- }
+ return pacl;
+out_err:
+ posix_acl_release(pacl);
+ return ERR_PTR(error);
+}
- /* group owner (deny ace) */
+static inline void allow_bits(struct posix_ace_state *astate, u32 mask)
+{
+ /* Allow all bits in the mask not already denied: */
+ astate->allow |= mask & ~astate->deny;
+}
- if (ace2type(ace) != ACL_GROUP_OBJ)
- goto out;
- ac = list_entry(group_l.next, struct ace_container, ace_l);
- ace2 = ac->ace;
- if (!complementary_ace_pair(ace2, ace, flags))
- goto out;
- list_del(group_l.next);
- kfree(ac);
+static inline void deny_bits(struct posix_ace_state *astate, u32 mask)
+{
+ /* Deny all bits in the mask not already allowed: */
+ astate->deny |= mask & ~astate->allow;
+}
- /* groups (deny aces) */
+static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid)
+{
+ int i;
- while (!list_empty(&group_l)) {
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace2type(ace) != ACL_GROUP)
- goto out;
- ac = list_entry(group_l.next, struct ace_container, ace_l);
- ace2 = ac->ace;
- if (!complementary_ace_pair(ace2, ace, flags))
- goto out;
- list_del(group_l.next);
- kfree(ac);
- }
+ for (i = 0; i < a->n; i++)
+ if (a->aces[i].uid == uid)
+ return i;
+ /* Not found: */
+ a->n++;
+ a->aces[i].uid = uid;
+ a->aces[i].perms.allow = state->everyone.allow;
+ a->aces[i].perms.deny = state->everyone.deny;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace2type(ace) != ACL_OTHER)
- goto out;
- error = 0;
-out:
- while (!list_empty(&group_l)) {
- ac = list_entry(group_l.next, struct ace_container, ace_l);
- list_del(group_l.next);
- kfree(ac);
- }
- return error;
+ return i;
}
-static inline int
-mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct nfs4_ace **mask_ace,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static void deny_bits_array(struct posix_ace_state_array *a, u32 mask)
{
- int error = -EINVAL;
- struct nfs4_ace *ace;
+ int i;
- ace = list_entry(*p, struct nfs4_ace, l_ace);
- if (pacl->a_count != 3) {
- if (*mask_ace == NULL)
- goto out;
- (*mask_ace)->access_mask = deny_mask((*mask_ace)->access_mask, flags);
- write_pace(*mask_ace, pacl, pace, ACL_MASK, flags);
- }
- error = 0;
-out:
- return error;
+ for (i=0; i < a->n; i++)
+ deny_bits(&a->aces[i].perms, mask);
}
-static inline int
-other_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static void allow_bits_array(struct posix_ace_state_array *a, u32 mask)
{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
+ int i;
- ace = list_entry(*p, struct nfs4_ace, l_ace);
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
- goto out;
- error = write_pace(ace, pacl, pace, ACL_OTHER, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace2 = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace2 == NULL)
- goto out;
- if (!complementary_ace_pair(ace, ace2, flags))
- goto out;
- error = 0;
-out:
- return error;
+ for (i=0; i < a->n; i++)
+ allow_bits(&a->aces[i].perms, mask);
}
-static int
-calculate_posix_ace_count(struct nfs4_acl *n4acl)
+static void process_one_v4_ace(struct posix_acl_state *state,
+ struct nfs4_ace *ace)
{
- if (n4acl->naces == 6) /* owner, owner group, and other only */
- return 3;
- else { /* Otherwise there must be a mask entry. */
- /* Also, the remaining entries are for named users and
- * groups, and come in threes (mask, allow, deny): */
- if (n4acl->naces < 7)
- return -EINVAL;
- if ((n4acl->naces - 7) % 3)
- return -EINVAL;
- return 4 + (n4acl->naces - 7)/3;
+ u32 mask = ace->access_mask;
+ int i;
+
+ switch (ace2type(ace)) {
+ case ACL_USER_OBJ:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->owner, mask);
+ } else {
+ deny_bits(&state->owner, mask);
+ }
+ break;
+ case ACL_USER:
+ i = find_uid(state, state->users, ace->who);
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->users->aces[i].perms, mask);
+ } else {
+ deny_bits(&state->users->aces[i].perms, mask);
+ mask = state->users->aces[i].perms.deny;
+ deny_bits(&state->owner, mask);
+ }
+ break;
+ case ACL_GROUP_OBJ:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->group, mask);
+ } else {
+ deny_bits(&state->group, mask);
+ mask = state->group.deny;
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
+ break;
+ case ACL_GROUP:
+ i = find_uid(state, state->groups, ace->who);
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->groups->aces[i].perms, mask);
+ } else {
+ deny_bits(&state->groups->aces[i].perms, mask);
+ mask = state->groups->aces[i].perms.deny;
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->group, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
+ break;
+ case ACL_OTHER:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->owner, mask);
+ allow_bits(&state->group, mask);
+ allow_bits(&state->other, mask);
+ allow_bits(&state->everyone, mask);
+ allow_bits_array(state->users, mask);
+ allow_bits_array(state->groups, mask);
+ } else {
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->group, mask);
+ deny_bits(&state->other, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
}
}
-
static struct posix_acl *
_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags)
{
+ struct posix_acl_state state;
struct posix_acl *pacl;
- int error = -EINVAL, nace = 0;
- struct list_head *p;
- struct nfs4_ace *mask_ace = NULL;
- struct posix_acl_entry *pace;
-
- nace = calculate_posix_ace_count(n4acl);
- if (nace < 0)
- goto out_err;
-
- pacl = posix_acl_alloc(nace, GFP_KERNEL);
- error = -ENOMEM;
- if (pacl == NULL)
- goto out_err;
-
- pace = &pacl->a_entries[0];
- p = &n4acl->ace_head;
-
- error = user_obj_from_v4(n4acl, &p, pacl, &pace, flags);
- if (error)
- goto out_acl;
-
- error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
- if (error)
- goto out_acl;
+ struct nfs4_ace *ace;
+ int ret;
- error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace,
- flags);
- if (error)
- goto out_acl;
+ ret = init_state(&state, n4acl->naces);
+ if (ret)
+ return ERR_PTR(ret);
- error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
- if (error)
- goto out_acl;
- error = other_from_v4(n4acl, &p, pacl, &pace, flags);
- if (error)
- goto out_acl;
+ list_for_each_entry(ace, &n4acl->ace_head, l_ace)
+ process_one_v4_ace(&state, ace);
- error = -EINVAL;
- if (p->next != &n4acl->ace_head)
- goto out_acl;
- if (pace != pacl->a_entries + pacl->a_count)
- goto out_acl;
+ pacl = posix_state_to_acl(&state, flags);
- sort_pacl(pacl);
+ free_state(&state);
- return pacl;
-out_acl:
- posix_acl_release(pacl);
-out_err:
- pacl = ERR_PTR(error);
+ if (!IS_ERR(pacl))
+ sort_pacl(pacl);
return pacl;
}
@@ -785,22 +700,41 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
list_for_each_safe(h, n, &acl->ace_head) {
ace = list_entry(h, struct nfs4_ace, l_ace);
- if ((ace->flag & NFS4_INHERITANCE_FLAGS)
- != NFS4_INHERITANCE_FLAGS)
- continue;
+ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
+ ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
+ return -EINVAL;
- error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
- ace->access_mask, ace->whotype, ace->who);
- if (error < 0)
- goto out;
+ if (ace->flag & ~NFS4_SUPPORTED_FLAGS)
+ return -EINVAL;
- list_del(h);
- kfree(ace);
- acl->naces--;
+ switch (ace->flag & NFS4_INHERITANCE_FLAGS) {
+ case 0:
+ /* Leave this ace in the effective acl: */
+ continue;
+ case NFS4_INHERITANCE_FLAGS:
+ /* Add this ace to the default acl and remove it
+ * from the effective acl: */
+ error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
+ ace->access_mask, ace->whotype, ace->who);
+ if (error)
+ return error;
+ list_del(h);
+ kfree(ace);
+ acl->naces--;
+ break;
+ case NFS4_INHERITANCE_FLAGS & ~NFS4_ACE_INHERIT_ONLY_ACE:
+ /* Add this ace to the default, but leave it in
+ * the effective acl as well: */
+ error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
+ ace->access_mask, ace->whotype, ace->who);
+ if (error)
+ return error;
+ break;
+ default:
+ return -EINVAL;
+ }
}
-
-out:
- return error;
+ return 0;
}
static short
@@ -930,23 +864,6 @@ nfs4_acl_write_who(int who, char *p)
return -1;
}
-static inline int
-match_who(struct nfs4_ace *ace, uid_t owner, gid_t group, uid_t who)
-{
- switch (ace->whotype) {
- case NFS4_ACL_WHO_NAMED:
- return who == ace->who;
- case NFS4_ACL_WHO_OWNER:
- return who == owner;
- case NFS4_ACL_WHO_GROUP:
- return who == group;
- case NFS4_ACL_WHO_EVERYONE:
- return 1;
- default:
- return 0;
- }
-}
-
EXPORT_SYMBOL(nfs4_acl_new);
EXPORT_SYMBOL(nfs4_acl_free);
EXPORT_SYMBOL(nfs4_acl_add_ace);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 8583d99ee740..f6ca9fb3fc63 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -131,7 +131,7 @@ xdr_error: \
#define READ_BUF(nbytes) do { \
p = xdr_inline_decode(xdr, nbytes); \
if (!p) { \
- dprintk("NFSD: %s: reply buffer overflowed in line %d.", \
+ dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
__FUNCTION__, __LINE__); \
return -EIO; \
} \
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index bea6b9478114..b1902ebaab41 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -573,10 +573,9 @@ idmap_lookup(struct svc_rqst *rqstp,
struct idmap_defer_req *mdr;
int ret;
- mdr = kmalloc(sizeof(*mdr), GFP_KERNEL);
+ mdr = kzalloc(sizeof(*mdr), GFP_KERNEL);
if (!mdr)
return -ENOMEM;
- memset(mdr, 0, sizeof(*mdr));
atomic_set(&mdr->count, 1);
init_waitqueue_head(&mdr->waitq);
mdr->req.defer = idmap_defer;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ee4eff27aedc..8333db12caca 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -600,7 +600,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_se
&setattr->sa_stateid, CHECK_FH | WR_STATE, NULL);
nfs4_unlock_state();
if (status) {
- dprintk("NFSD: nfsd4_setattr: couldn't process stateid!");
+ dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
}
}
@@ -646,7 +646,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
*p++ = nfssvc_boot.tv_usec;
status = nfsd_write(rqstp, current_fh, filp, write->wr_offset,
- write->wr_vec, write->wr_vlen, write->wr_buflen,
+ rqstp->rq_vec, write->wr_vlen, write->wr_buflen,
&write->wr_how_written);
if (filp)
fput(filp);
@@ -802,13 +802,29 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
* SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
* require a valid current filehandle
*/
- if ((!current_fh->fh_dentry) &&
- !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) ||
- (op->opnum == OP_SETCLIENTID) ||
- (op->opnum == OP_SETCLIENTID_CONFIRM) ||
- (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) ||
- (op->opnum == OP_RELEASE_LOCKOWNER))) {
- op->status = nfserr_nofilehandle;
+ if (!current_fh->fh_dentry) {
+ if (!((op->opnum == OP_PUTFH) ||
+ (op->opnum == OP_PUTROOTFH) ||
+ (op->opnum == OP_SETCLIENTID) ||
+ (op->opnum == OP_SETCLIENTID_CONFIRM) ||
+ (op->opnum == OP_RENEW) ||
+ (op->opnum == OP_RESTOREFH) ||
+ (op->opnum == OP_RELEASE_LOCKOWNER))) {
+ op->status = nfserr_nofilehandle;
+ goto encode_op;
+ }
+ }
+ /* Check must be done at start of each operation, except
+ * for GETATTR and ops not listed as returning NFS4ERR_MOVED
+ */
+ else if (current_fh->fh_export->ex_fslocs.migrated &&
+ !((op->opnum == OP_GETATTR) ||
+ (op->opnum == OP_PUTROOTFH) ||
+ (op->opnum == OP_PUTPUBFH) ||
+ (op->opnum == OP_RENEW) ||
+ (op->opnum == OP_SETCLIENTID) ||
+ (op->opnum == OP_RELEASE_LOCKOWNER))) {
+ op->status = nfserr_moved;
goto encode_op;
}
switch (op->opnum) {
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e35d7e52fdeb..1cbd2e4ee122 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -184,7 +184,7 @@ struct dentry_list_arg {
static int
nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
- loff_t offset, ino_t ino, unsigned int d_type)
+ loff_t offset, u64 ino, unsigned int d_type)
{
struct dentry_list_arg *dla = arg;
struct list_head *dentries = &dla->dentries;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9daa0b9feb8d..ebcf226a9e4a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -339,8 +339,7 @@ alloc_client(struct xdr_netobj name)
{
struct nfs4_client *clp;
- if ((clp = kmalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) {
- memset(clp, 0, sizeof(*clp));
+ if ((clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) {
if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) {
memcpy(clp->cl_name.data, name.data, name.len);
clp->cl_name.len = name.len;
@@ -1006,13 +1005,10 @@ alloc_init_file(struct inode *ino)
static void
nfsd4_free_slab(kmem_cache_t **slab)
{
- int status;
-
if (*slab == NULL)
return;
- status = kmem_cache_destroy(*slab);
+ kmem_cache_destroy(*slab);
*slab = NULL;
- WARN_ON(status);
}
static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5446a0861d1d..41fc241b729a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -60,6 +60,14 @@
#define NFSDDBG_FACILITY NFSDDBG_XDR
+/*
+ * As per referral draft, the fsid for a referral MUST be different from the fsid of the containing
+ * directory in order to indicate to the client that a filesystem boundary is present
+ * We use a fixed fsid for a referral
+ */
+#define NFS4_REFERRAL_FSID_MAJOR 0x8000000ULL
+#define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL
+
static int
check_filename(char *str, int len, int err)
{
@@ -198,8 +206,7 @@ static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
p = new;
memcpy(p, argp->tmp, nbytes);
} else {
- if (p != argp->tmpp)
- BUG();
+ BUG_ON(p != argp->tmpp);
argp->tmpp = NULL;
}
if (defer_free(argp, kfree, p)) {
@@ -927,26 +934,26 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__);
goto xdr_error;
}
- write->wr_vec[0].iov_base = p;
- write->wr_vec[0].iov_len = avail;
+ argp->rqstp->rq_vec[0].iov_base = p;
+ argp->rqstp->rq_vec[0].iov_len = avail;
v = 0;
len = write->wr_buflen;
- while (len > write->wr_vec[v].iov_len) {
- len -= write->wr_vec[v].iov_len;
+ while (len > argp->rqstp->rq_vec[v].iov_len) {
+ len -= argp->rqstp->rq_vec[v].iov_len;
v++;
- write->wr_vec[v].iov_base = page_address(argp->pagelist[0]);
+ argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]);
argp->pagelist++;
if (argp->pagelen >= PAGE_SIZE) {
- write->wr_vec[v].iov_len = PAGE_SIZE;
+ argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE;
argp->pagelen -= PAGE_SIZE;
} else {
- write->wr_vec[v].iov_len = argp->pagelen;
+ argp->rqstp->rq_vec[v].iov_len = argp->pagelen;
argp->pagelen -= len;
}
}
- argp->end = (u32*) (write->wr_vec[v].iov_base + write->wr_vec[v].iov_len);
- argp->p = (u32*) (write->wr_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
- write->wr_vec[v].iov_len = len;
+ argp->end = (u32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
+ argp->p = (u32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
+ argp->rqstp->rq_vec[v].iov_len = len;
write->wr_vlen = v+1;
DECODE_TAIL;
@@ -1224,6 +1231,119 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
stateowner->so_replay.rp_buflen); \
} } while (0);
+/* Encode as an array of strings the string given with components
+ * seperated @sep.
+ */
+static int nfsd4_encode_components(char sep, char *components,
+ u32 **pp, int *buflen)
+{
+ u32 *p = *pp;
+ u32 *countp = p;
+ int strlen, count=0;
+ char *str, *end;
+
+ dprintk("nfsd4_encode_components(%s)\n", components);
+ if ((*buflen -= 4) < 0)
+ return nfserr_resource;
+ WRITE32(0); /* We will fill this in with @count later */
+ end = str = components;
+ while (*end) {
+ for (; *end && (*end != sep); end++)
+ ; /* Point to end of component */
+ strlen = end - str;
+ if (strlen) {
+ if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0)
+ return nfserr_resource;
+ WRITE32(strlen);
+ WRITEMEM(str, strlen);
+ count++;
+ }
+ else
+ end++;
+ str = end;
+ }
+ *pp = p;
+ p = countp;
+ WRITE32(count);
+ return 0;
+}
+
+/*
+ * encode a location element of a fs_locations structure
+ */
+static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
+ u32 **pp, int *buflen)
+{
+ int status;
+ u32 *p = *pp;
+
+ status = nfsd4_encode_components(':', location->hosts, &p, buflen);
+ if (status)
+ return status;
+ status = nfsd4_encode_components('/', location->path, &p, buflen);
+ if (status)
+ return status;
+ *pp = p;
+ return 0;
+}
+
+/*
+ * Return the path to an export point in the pseudo filesystem namespace
+ * Returned string is safe to use as long as the caller holds a reference
+ * to @exp.
+ */
+static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp)
+{
+ struct svc_fh tmp_fh;
+ char *path, *rootpath;
+ int stat;
+
+ fh_init(&tmp_fh, NFS4_FHSIZE);
+ stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle);
+ if (stat)
+ return ERR_PTR(stat);
+ rootpath = tmp_fh.fh_export->ex_path;
+
+ path = exp->ex_path;
+
+ if (strncmp(path, rootpath, strlen(rootpath))) {
+ printk("nfsd: fs_locations failed;"
+ "%s is not contained in %s\n", path, rootpath);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ return path + strlen(rootpath);
+}
+
+/*
+ * encode a fs_locations structure
+ */
+static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
+ struct svc_export *exp,
+ u32 **pp, int *buflen)
+{
+ int status, i;
+ u32 *p = *pp;
+ struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
+ char *root = nfsd4_path(rqstp, exp);
+
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ status = nfsd4_encode_components('/', root, &p, buflen);
+ if (status)
+ return status;
+ if ((*buflen -= 4) < 0)
+ return nfserr_resource;
+ WRITE32(fslocs->locations_count);
+ for (i=0; i<fslocs->locations_count; i++) {
+ status = nfsd4_encode_fs_location4(&fslocs->locations[i],
+ &p, buflen);
+ if (status)
+ return status;
+ }
+ *pp = p;
+ return 0;
+}
static u32 nfs4_ftypes[16] = {
NF4BAD, NF4FIFO, NF4CHR, NF4BAD,
@@ -1273,6 +1393,25 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
}
+#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
+ FATTR4_WORD0_RDATTR_ERROR)
+#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
+
+static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
+{
+ /* As per referral draft: */
+ if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS ||
+ *bmval1 & ~WORD1_ABSENT_FS_ATTRS) {
+ if (*bmval0 & FATTR4_WORD0_RDATTR_ERROR ||
+ *bmval0 & FATTR4_WORD0_FS_LOCATIONS)
+ *rdattr_err = NFSERR_MOVED;
+ else
+ return nfserr_moved;
+ }
+ *bmval0 &= WORD0_ABSENT_FS_ATTRS;
+ *bmval1 &= WORD1_ABSENT_FS_ATTRS;
+ return 0;
+}
/*
* Note: @fhp can be NULL; in this case, we might have to compose the filehandle
@@ -1295,6 +1434,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
u32 *attrlenp;
u32 dummy;
u64 dummy64;
+ u32 rdattr_err = 0;
u32 *p = buffer;
int status;
int aclsupport = 0;
@@ -1304,6 +1444,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0);
BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1);
+ if (exp->ex_fslocs.migrated) {
+ status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err);
+ if (status)
+ goto out;
+ }
+
status = vfs_getattr(exp->ex_mnt, dentry, &stat);
if (status)
goto out_nfserr;
@@ -1335,6 +1481,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
goto out_nfserr;
}
}
+ if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
+ if (exp->ex_fslocs.locations == NULL) {
+ bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS;
+ }
+ }
if ((buflen -= 16) < 0)
goto out_resource;
@@ -1344,12 +1495,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
attrlenp = p++; /* to be backfilled later */
if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0;
if ((buflen -= 12) < 0)
goto out_resource;
+ if (!aclsupport)
+ word0 &= ~FATTR4_WORD0_ACL;
+ if (!exp->ex_fslocs.locations)
+ word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
WRITE32(2);
- WRITE32(aclsupport ?
- NFSD_SUPPORTED_ATTRS_WORD0 :
- NFSD_SUPPORTED_ATTRS_WORD0 & ~FATTR4_WORD0_ACL);
+ WRITE32(word0);
WRITE32(NFSD_SUPPORTED_ATTRS_WORD1);
}
if (bmval0 & FATTR4_WORD0_TYPE) {
@@ -1403,7 +1557,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
if (bmval0 & FATTR4_WORD0_FSID) {
if ((buflen -= 16) < 0)
goto out_resource;
- if (is_fsid(fhp, rqstp->rq_reffh)) {
+ if (exp->ex_fslocs.migrated) {
+ WRITE64(NFS4_REFERRAL_FSID_MAJOR);
+ WRITE64(NFS4_REFERRAL_FSID_MINOR);
+ } else if (is_fsid(fhp, rqstp->rq_reffh)) {
WRITE64((u64)exp->ex_fsid);
WRITE64((u64)0);
} else {
@@ -1426,7 +1583,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
if ((buflen -= 4) < 0)
goto out_resource;
- WRITE32(0);
+ WRITE32(rdattr_err);
}
if (bmval0 & FATTR4_WORD0_ACL) {
struct nfs4_ace *ace;
@@ -1514,6 +1671,13 @@ out_acl:
goto out_resource;
WRITE64((u64) statfs.f_files);
}
+ if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
+ status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen);
+ if (status == nfserr_resource)
+ goto out_resource;
+ if (status)
+ goto out;
+ }
if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) {
if ((buflen -= 4) < 0)
goto out_resource;
@@ -1537,12 +1701,12 @@ out_acl:
if (bmval0 & FATTR4_WORD0_MAXREAD) {
if ((buflen -= 8) < 0)
goto out_resource;
- WRITE64((u64) NFSSVC_MAXBLKSIZE);
+ WRITE64((u64) svc_max_payload(rqstp));
}
if (bmval0 & FATTR4_WORD0_MAXWRITE) {
if ((buflen -= 8) < 0)
goto out_resource;
- WRITE64((u64) NFSSVC_MAXBLKSIZE);
+ WRITE64((u64) svc_max_payload(rqstp));
}
if (bmval1 & FATTR4_WORD1_MODE) {
if ((buflen -= 4) < 0)
@@ -1846,7 +2010,6 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_ge
nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
resp->p, &buflen, getattr->ga_bmval,
resp->rqstp);
-
if (!nfserr)
resp->p += buflen;
return nfserr;
@@ -2040,7 +2203,8 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct n
}
static int
-nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read *read)
+nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_read *read)
{
u32 eof;
int v, pn;
@@ -2055,31 +2219,33 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
RESERVE_SPACE(8); /* eof flag and byte count */
- maxcount = NFSSVC_MAXBLKSIZE;
+ maxcount = svc_max_payload(resp->rqstp);
if (maxcount > read->rd_length)
maxcount = read->rd_length;
len = maxcount;
v = 0;
while (len > 0) {
- pn = resp->rqstp->rq_resused;
- svc_take_page(resp->rqstp);
- read->rd_iov[v].iov_base = page_address(resp->rqstp->rq_respages[pn]);
- read->rd_iov[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE;
+ pn = resp->rqstp->rq_resused++;
+ resp->rqstp->rq_vec[v].iov_base =
+ page_address(resp->rqstp->rq_respages[pn]);
+ resp->rqstp->rq_vec[v].iov_len =
+ len < PAGE_SIZE ? len : PAGE_SIZE;
v++;
len -= PAGE_SIZE;
}
read->rd_vlen = v;
nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
- read->rd_offset, read->rd_iov, read->rd_vlen,
+ read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
&maxcount);
if (nfserr == nfserr_symlink)
nfserr = nfserr_inval;
if (nfserr)
return nfserr;
- eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size);
+ eof = (read->rd_offset + maxcount >=
+ read->rd_fhp->fh_dentry->d_inode->i_size);
WRITE32(eof);
WRITE32(maxcount);
@@ -2089,7 +2255,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
resp->xbuf->page_len = maxcount;
/* Use rest of head for padding and remaining ops: */
- resp->rqstp->rq_restailpage = 0;
resp->xbuf->tail[0].iov_base = p;
resp->xbuf->tail[0].iov_len = 0;
if (maxcount&3) {
@@ -2114,8 +2279,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
if (resp->xbuf->page_len)
return nfserr_resource;
- svc_take_page(resp->rqstp);
- page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
maxcount = PAGE_SIZE;
RESERVE_SPACE(4);
@@ -2139,7 +2303,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
resp->xbuf->page_len = maxcount;
/* Use rest of head for padding and remaining ops: */
- resp->rqstp->rq_restailpage = 0;
resp->xbuf->tail[0].iov_base = p;
resp->xbuf->tail[0].iov_len = 0;
if (maxcount&3) {
@@ -2190,8 +2353,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
goto err_no_verf;
}
- svc_take_page(resp->rqstp);
- page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
readdir->common.err = 0;
readdir->buflen = maxcount;
readdir->buffer = page;
@@ -2216,10 +2378,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
p = readdir->buffer;
*p++ = 0; /* no more entries */
*p++ = htonl(readdir->common.err == nfserr_eof);
- resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ resp->xbuf->page_len = ((char*)p) - (char*)page_address(
+ resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
/* Use rest of head for padding and remaining ops: */
- resp->rqstp->rq_restailpage = 0;
resp->xbuf->tail[0].iov_base = tailbase;
resp->xbuf->tail[0].iov_len = 0;
resp->p = resp->xbuf->tail[0].iov_base;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7046ac9cf97f..39aed901514b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -23,10 +23,14 @@
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/ctype.h>
#include <linux/nfs.h>
#include <linux/nfsd_idmap.h>
+#include <linux/lockd/bind.h>
#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/cache.h>
#include <linux/nfsd/xdr.h>
@@ -35,8 +39,6 @@
#include <asm/uaccess.h>
-unsigned int nfsd_versbits = ~0;
-
/*
* We have a single directory with 9 nodes in it.
*/
@@ -52,7 +54,10 @@ enum {
NFSD_List,
NFSD_Fh,
NFSD_Threads,
+ NFSD_Pool_Threads,
NFSD_Versions,
+ NFSD_Ports,
+ NFSD_MaxBlkSize,
/*
* The below MUST come last. Otherwise we leave a hole in nfsd_files[]
* with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
@@ -75,7 +80,10 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size);
static ssize_t write_getfs(struct file *file, char *buf, size_t size);
static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
static ssize_t write_threads(struct file *file, char *buf, size_t size);
+static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
static ssize_t write_versions(struct file *file, char *buf, size_t size);
+static ssize_t write_ports(struct file *file, char *buf, size_t size);
+static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
#ifdef CONFIG_NFSD_V4
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
@@ -91,7 +99,10 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Getfs] = write_getfs,
[NFSD_Fh] = write_filehandle,
[NFSD_Threads] = write_threads,
+ [NFSD_Pool_Threads] = write_pool_threads,
[NFSD_Versions] = write_versions,
+ [NFSD_Ports] = write_ports,
+ [NFSD_MaxBlkSize] = write_maxblksize,
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = write_leasetime,
[NFSD_RecoveryDir] = write_recoverydir,
@@ -358,6 +369,72 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
return strlen(buf);
}
+extern int nfsd_nrpools(void);
+extern int nfsd_get_nrthreads(int n, int *);
+extern int nfsd_set_nrthreads(int n, int *);
+
+static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
+{
+ /* if size > 0, look for an array of number of threads per node
+ * and apply them then write out number of threads per node as reply
+ */
+ char *mesg = buf;
+ int i;
+ int rv;
+ int len;
+ int npools = nfsd_nrpools();
+ int *nthreads;
+
+ if (npools == 0) {
+ /*
+ * NFS is shut down. The admin can start it by
+ * writing to the threads file but NOT the pool_threads
+ * file, sorry. Report zero threads.
+ */
+ strcpy(buf, "0\n");
+ return strlen(buf);
+ }
+
+ nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
+ if (nthreads == NULL)
+ return -ENOMEM;
+
+ if (size > 0) {
+ for (i = 0; i < npools; i++) {
+ rv = get_int(&mesg, &nthreads[i]);
+ if (rv == -ENOENT)
+ break; /* fewer numbers than pools */
+ if (rv)
+ goto out_free; /* syntax error */
+ rv = -EINVAL;
+ if (nthreads[i] < 0)
+ goto out_free;
+ }
+ rv = nfsd_set_nrthreads(i, nthreads);
+ if (rv)
+ goto out_free;
+ }
+
+ rv = nfsd_get_nrthreads(npools, nthreads);
+ if (rv)
+ goto out_free;
+
+ mesg = buf;
+ size = SIMPLE_TRANSACTION_LIMIT;
+ for (i = 0; i < npools && size > 0; i++) {
+ snprintf(mesg, size, "%d%c", nthreads[i], (i == npools-1 ? '\n' : ' '));
+ len = strlen(mesg);
+ size -= len;
+ mesg += len;
+ }
+
+ return (mesg-buf);
+
+out_free:
+ kfree(nthreads);
+ return rv;
+}
+
static ssize_t write_versions(struct file *file, char *buf, size_t size)
{
/*
@@ -372,6 +449,10 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
if (size>0) {
if (nfsd_serv)
+ /* Cannot change versions without updating
+ * nfsd_serv->sv_xdrsize, and reallocing
+ * rq_argp and rq_resp
+ */
return -EBUSY;
if (buf[size-1] != '\n')
return -EINVAL;
@@ -390,10 +471,7 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
case 2:
case 3:
case 4:
- if (sign != '-')
- NFSCTL_VERSET(nfsd_versbits, num);
- else
- NFSCTL_VERUNSET(nfsd_versbits, num);
+ nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
break;
default:
return -EINVAL;
@@ -404,16 +482,15 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
/* If all get turned off, turn them back on, as
* having no versions is BAD
*/
- if ((nfsd_versbits & NFSCTL_VERALL)==0)
- nfsd_versbits = NFSCTL_VERALL;
+ nfsd_reset_versions();
}
/* Now write current state into reply buffer */
len = 0;
sep = "";
for (num=2 ; num <= 4 ; num++)
- if (NFSCTL_VERISSET(NFSCTL_VERALL, num)) {
+ if (nfsd_vers(num, NFSD_AVAIL)) {
len += sprintf(buf+len, "%s%c%d", sep,
- NFSCTL_VERISSET(nfsd_versbits, num)?'+':'-',
+ nfsd_vers(num, NFSD_TEST)?'+':'-',
num);
sep = " ";
}
@@ -421,6 +498,95 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
return len;
}
+static ssize_t write_ports(struct file *file, char *buf, size_t size)
+{
+ if (size == 0) {
+ int len = 0;
+ lock_kernel();
+ if (nfsd_serv)
+ len = svc_sock_names(buf, nfsd_serv, NULL);
+ unlock_kernel();
+ return len;
+ }
+ /* Either a single 'fd' number is written, in which
+ * case it must be for a socket of a supported family/protocol,
+ * and we use it as an nfsd socket, or
+ * A '-' followed by the 'name' of a socket in which case
+ * we close the socket.
+ */
+ if (isdigit(buf[0])) {
+ char *mesg = buf;
+ int fd;
+ int err;
+ err = get_int(&mesg, &fd);
+ if (err)
+ return -EINVAL;
+ if (fd < 0)
+ return -EINVAL;
+ err = nfsd_create_serv();
+ if (!err) {
+ int proto = 0;
+ err = svc_addsock(nfsd_serv, fd, buf, &proto);
+ if (err >= 0) {
+ err = lockd_up(proto);
+ if (err < 0)
+ svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf);
+ }
+ /* Decrease the count, but don't shutdown the
+ * the service
+ */
+ lock_kernel();
+ nfsd_serv->sv_nrthreads--;
+ unlock_kernel();
+ }
+ return err < 0 ? err : 0;
+ }
+ if (buf[0] == '-') {
+ char *toclose = kstrdup(buf+1, GFP_KERNEL);
+ int len = 0;
+ if (!toclose)
+ return -ENOMEM;
+ lock_kernel();
+ if (nfsd_serv)
+ len = svc_sock_names(buf, nfsd_serv, toclose);
+ unlock_kernel();
+ if (len >= 0)
+ lockd_down();
+ kfree(toclose);
+ return len;
+ }
+ return -EINVAL;
+}
+
+int nfsd_max_blksize;
+
+static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
+{
+ char *mesg = buf;
+ if (size > 0) {
+ int bsize;
+ int rv = get_int(&mesg, &bsize);
+ if (rv)
+ return rv;
+ /* force bsize into allowed range and
+ * required alignment.
+ */
+ if (bsize < 1024)
+ bsize = 1024;
+ if (bsize > NFSSVC_MAXBLKSIZE)
+ bsize = NFSSVC_MAXBLKSIZE;
+ bsize &= ~(1024-1);
+ lock_kernel();
+ if (nfsd_serv && nfsd_serv->sv_nrthreads) {
+ unlock_kernel();
+ return -EBUSY;
+ }
+ nfsd_max_blksize = bsize;
+ unlock_kernel();
+ }
+ return sprintf(buf, "%d\n", nfsd_max_blksize);
+}
+
#ifdef CONFIG_NFSD_V4
extern time_t nfs4_leasetime(void);
@@ -483,7 +649,10 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
[NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
+ [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 06cd0db0f32b..9ee1dab5d44a 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -146,20 +146,20 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
* status, 17 words for fattr, and 1 word for the byte count.
*/
- if (NFSSVC_MAXBLKSIZE < argp->count) {
+ if (NFSSVC_MAXBLKSIZE_V2 < argp->count) {
printk(KERN_NOTICE
"oversized read request from %u.%u.%u.%u:%d (%d bytes)\n",
NIPQUAD(rqstp->rq_addr.sin_addr.s_addr),
ntohs(rqstp->rq_addr.sin_port),
argp->count);
- argp->count = NFSSVC_MAXBLKSIZE;
+ argp->count = NFSSVC_MAXBLKSIZE_V2;
}
svc_reserve(rqstp, (19<<2) + argp->count + 4);
resp->count = argp->count;
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
&resp->count);
if (nfserr) return nfserr;
@@ -185,7 +185,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
argp->len,
&stable);
return nfsd_return_attrs(nfserr, resp);
@@ -225,7 +225,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
nfserr = nfserr_exist;
if (isdotent(argp->name, argp->len))
goto done;
- fh_lock(dirfhp);
+ fh_lock_nested(dirfhp, I_MUTEX_PARENT);
dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
if (IS_ERR(dchild)) {
nfserr = nfserrno(PTR_ERR(dchild));
@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = {
PROC(none, void, void, none, RC_NOCACHE, ST),
PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
- PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4),
+ PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4),
PROC(none, void, void, none, RC_NOCACHE, ST),
PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ec1decf29bab..6fa6340a5fb8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -57,12 +57,6 @@ static atomic_t nfsd_busy;
static unsigned long nfsd_last_call;
static DEFINE_SPINLOCK(nfsd_call_lock);
-struct nfsd_list {
- struct list_head list;
- struct task_struct *task;
-};
-static struct list_head nfsd_list = LIST_HEAD_INIT(nfsd_list);
-
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static struct svc_stat nfsd_acl_svcstats;
static struct svc_version * nfsd_acl_version[] = {
@@ -117,6 +111,32 @@ struct svc_program nfsd_program = {
};
+int nfsd_vers(int vers, enum vers_op change)
+{
+ if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
+ return -1;
+ switch(change) {
+ case NFSD_SET:
+ nfsd_versions[vers] = nfsd_version[vers];
+ break;
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+ if (vers < NFSD_ACL_NRVERS)
+ nfsd_acl_version[vers] = nfsd_acl_version[vers];
+#endif
+ case NFSD_CLEAR:
+ nfsd_versions[vers] = NULL;
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+ if (vers < NFSD_ACL_NRVERS)
+ nfsd_acl_version[vers] = NULL;
+#endif
+ break;
+ case NFSD_TEST:
+ return nfsd_versions[vers] != NULL;
+ case NFSD_AVAIL:
+ return nfsd_version[vers] != NULL;
+ }
+ return 0;
+}
/*
* Maximum number of nfsd processes
*/
@@ -130,16 +150,192 @@ int nfsd_nrthreads(void)
return nfsd_serv->sv_nrthreads;
}
+static int killsig; /* signal that was used to kill last nfsd */
+static void nfsd_last_thread(struct svc_serv *serv)
+{
+ /* When last nfsd thread exits we need to do some clean-up */
+ struct svc_sock *svsk;
+ list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
+ lockd_down();
+ nfsd_serv = NULL;
+ nfsd_racache_shutdown();
+ nfs4_state_shutdown();
+
+ printk(KERN_WARNING "nfsd: last server has exited\n");
+ if (killsig != SIG_NOCLEAN) {
+ printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
+ nfsd_export_flush();
+ }
+}
+
+void nfsd_reset_versions(void)
+{
+ int found_one = 0;
+ int i;
+
+ for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
+ if (nfsd_program.pg_vers[i])
+ found_one = 1;
+ }
+
+ if (!found_one) {
+ for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
+ nfsd_program.pg_vers[i] = nfsd_version[i];
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+ for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
+ nfsd_acl_program.pg_vers[i] =
+ nfsd_acl_version[i];
+#endif
+ }
+}
+
+int nfsd_create_serv(void)
+{
+ int err = 0;
+ lock_kernel();
+ if (nfsd_serv) {
+ svc_get(nfsd_serv);
+ unlock_kernel();
+ return 0;
+ }
+ if (nfsd_max_blksize == 0) {
+ /* choose a suitable default */
+ struct sysinfo i;
+ si_meminfo(&i);
+ /* Aim for 1/4096 of memory per thread
+ * This gives 1MB on 4Gig machines
+ * But only uses 32K on 128M machines.
+ * Bottom out at 8K on 32M and smaller.
+ * Of course, this is only a default.
+ */
+ nfsd_max_blksize = NFSSVC_MAXBLKSIZE;
+ i.totalram <<= PAGE_SHIFT - 12;
+ while (nfsd_max_blksize > i.totalram &&
+ nfsd_max_blksize >= 8*1024*2)
+ nfsd_max_blksize /= 2;
+ }
+
+ atomic_set(&nfsd_busy, 0);
+ nfsd_serv = svc_create_pooled(&nfsd_program,
+ NFSD_BUFSIZE - NFSSVC_MAXBLKSIZE + nfsd_max_blksize,
+ nfsd_last_thread,
+ nfsd, SIG_NOCLEAN, THIS_MODULE);
+ if (nfsd_serv == NULL)
+ err = -ENOMEM;
+ unlock_kernel();
+ do_gettimeofday(&nfssvc_boot); /* record boot time */
+ return err;
+}
+
+static int nfsd_init_socks(int port)
+{
+ int error;
+ if (!list_empty(&nfsd_serv->sv_permsocks))
+ return 0;
+
+ error = lockd_up(IPPROTO_UDP);
+ if (error >= 0) {
+ error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
+ if (error < 0)
+ lockd_down();
+ }
+ if (error < 0)
+ return error;
+
+#ifdef CONFIG_NFSD_TCP
+ error = lockd_up(IPPROTO_TCP);
+ if (error >= 0) {
+ error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
+ if (error < 0)
+ lockd_down();
+ }
+ if (error < 0)
+ return error;
+#endif
+ return 0;
+}
+
+int nfsd_nrpools(void)
+{
+ if (nfsd_serv == NULL)
+ return 0;
+ else
+ return nfsd_serv->sv_nrpools;
+}
+
+int nfsd_get_nrthreads(int n, int *nthreads)
+{
+ int i = 0;
+
+ if (nfsd_serv != NULL) {
+ for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++)
+ nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads;
+ }
+
+ return 0;
+}
+
+int nfsd_set_nrthreads(int n, int *nthreads)
+{
+ int i = 0;
+ int tot = 0;
+ int err = 0;
+
+ if (nfsd_serv == NULL || n <= 0)
+ return 0;
+
+ if (n > nfsd_serv->sv_nrpools)
+ n = nfsd_serv->sv_nrpools;
+
+ /* enforce a global maximum number of threads */
+ tot = 0;
+ for (i = 0; i < n; i++) {
+ if (nthreads[i] > NFSD_MAXSERVS)
+ nthreads[i] = NFSD_MAXSERVS;
+ tot += nthreads[i];
+ }
+ if (tot > NFSD_MAXSERVS) {
+ /* total too large: scale down requested numbers */
+ for (i = 0; i < n && tot > 0; i++) {
+ int new = nthreads[i] * NFSD_MAXSERVS / tot;
+ tot -= (nthreads[i] - new);
+ nthreads[i] = new;
+ }
+ for (i = 0; i < n && tot > 0; i++) {
+ nthreads[i]--;
+ tot--;
+ }
+ }
+
+ /*
+ * There must always be a thread in pool 0; the admin
+ * can't shut down NFS completely using pool_threads.
+ */
+ if (nthreads[0] == 0)
+ nthreads[0] = 1;
+
+ /* apply the new numbers */
+ lock_kernel();
+ svc_get(nfsd_serv);
+ for (i = 0; i < n; i++) {
+ err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
+ nthreads[i]);
+ if (err)
+ break;
+ }
+ svc_destroy(nfsd_serv);
+ unlock_kernel();
+
+ return err;
+}
+
int
nfsd_svc(unsigned short port, int nrservs)
{
int error;
- int none_left, found_one, i;
- struct list_head *victim;
lock_kernel();
- dprintk("nfsd: creating service: vers 0x%x\n",
- nfsd_versbits);
+ dprintk("nfsd: creating service\n");
error = -EINVAL;
if (nrservs <= 0)
nrservs = 0;
@@ -153,91 +349,20 @@ nfsd_svc(unsigned short port, int nrservs)
error = nfs4_state_start();
if (error<0)
goto out;
- if (!nfsd_serv) {
- /*
- * Use the nfsd_ctlbits to define which
- * versions that will be advertised.
- * If nfsd_ctlbits doesn't list any version,
- * export them all.
- */
- found_one = 0;
-
- for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
- if (NFSCTL_VERISSET(nfsd_versbits, i)) {
- nfsd_program.pg_vers[i] = nfsd_version[i];
- found_one = 1;
- } else
- nfsd_program.pg_vers[i] = NULL;
- }
- if (!found_one) {
- for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
- nfsd_program.pg_vers[i] = nfsd_version[i];
- }
+ nfsd_reset_versions();
+ error = nfsd_create_serv();
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- found_one = 0;
-
- for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) {
- if (NFSCTL_VERISSET(nfsd_versbits, i)) {
- nfsd_acl_program.pg_vers[i] =
- nfsd_acl_version[i];
- found_one = 1;
- } else
- nfsd_acl_program.pg_vers[i] = NULL;
- }
-
- if (!found_one) {
- for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
- nfsd_acl_program.pg_vers[i] =
- nfsd_acl_version[i];
- }
-#endif
-
- atomic_set(&nfsd_busy, 0);
- error = -ENOMEM;
- nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE);
- if (nfsd_serv == NULL)
- goto out;
- error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
- if (error < 0)
- goto failure;
+ if (error)
+ goto out;
+ error = nfsd_init_socks(port);
+ if (error)
+ goto failure;
-#ifdef CONFIG_NFSD_TCP
- error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
- if (error < 0)
- goto failure;
-#endif
- do_gettimeofday(&nfssvc_boot); /* record boot time */
- } else
- nfsd_serv->sv_nrthreads++;
- nrservs -= (nfsd_serv->sv_nrthreads-1);
- while (nrservs > 0) {
- nrservs--;
- __module_get(THIS_MODULE);
- error = svc_create_thread(nfsd, nfsd_serv);
- if (error < 0) {
- module_put(THIS_MODULE);
- break;
- }
- }
- victim = nfsd_list.next;
- while (nrservs < 0 && victim != &nfsd_list) {
- struct nfsd_list *nl =
- list_entry(victim,struct nfsd_list, list);
- victim = victim->next;
- send_sig(SIG_NOCLEAN, nl->task, 1);
- nrservs++;
- }
+ error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
failure:
- none_left = (nfsd_serv->sv_nrthreads == 1);
svc_destroy(nfsd_serv); /* Release server */
- if (none_left) {
- nfsd_serv = NULL;
- nfsd_racache_shutdown();
- nfs4_state_shutdown();
- }
out:
unlock_kernel();
return error;
@@ -270,10 +395,8 @@ update_thread_usage(int busy_threads)
static void
nfsd(struct svc_rqst *rqstp)
{
- struct svc_serv *serv = rqstp->rq_server;
struct fs_struct *fsp;
int err;
- struct nfsd_list me;
sigset_t shutdown_mask, allowed_mask;
/* Lock module and set up kernel thread */
@@ -297,10 +420,7 @@ nfsd(struct svc_rqst *rqstp)
nfsdstats.th_cnt++;
- lockd_up(); /* start lockd */
-
- me.task = current;
- list_add(&me.list, &nfsd_list);
+ rqstp->rq_task = current;
unlock_kernel();
@@ -322,8 +442,7 @@ nfsd(struct svc_rqst *rqstp)
* Find a socket with data available and call its
* recvfrom routine.
*/
- while ((err = svc_recv(serv, rqstp,
- 60*60*HZ)) == -EAGAIN)
+ while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
;
if (err < 0)
break;
@@ -336,7 +455,7 @@ nfsd(struct svc_rqst *rqstp)
/* Process request with signals blocked. */
sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
- svc_process(serv, rqstp);
+ svc_process(rqstp);
/* Unlock export hash tables */
exp_readunlock();
@@ -353,29 +472,13 @@ nfsd(struct svc_rqst *rqstp)
if (sigismember(&current->pending.signal, signo) &&
!sigismember(&current->blocked, signo))
break;
- err = signo;
+ killsig = signo;
}
- /* Clear signals before calling lockd_down() and svc_exit_thread() */
+ /* Clear signals before calling svc_exit_thread() */
flush_signals(current);
lock_kernel();
- /* Release lockd */
- lockd_down();
-
- /* Check if this is last thread */
- if (serv->sv_nrthreads==1) {
-
- printk(KERN_WARNING "nfsd: last server has exited\n");
- if (err != SIG_NOCLEAN) {
- printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
- nfsd_export_flush();
- }
- nfsd_serv = NULL;
- nfsd_racache_shutdown(); /* release read-ahead cache */
- nfs4_state_shutdown();
- }
- list_del(&me.list);
nfsdstats.th_cnt --;
out:
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index e3a0797dd56b..1135c0d14557 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -1,5 +1,5 @@
/*
- * linux/fs/nfsd/xdr.c
+ * linux/fs/nfsd/nfsxdr.c
*
* XDR support for nfsd
*
@@ -254,19 +254,18 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
len = args->count = ntohl(*p++);
p++; /* totalcount - unused */
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > NFSSVC_MAXBLKSIZE_V2)
+ len = NFSSVC_MAXBLKSIZE_V2;
/* set up somewhere to store response.
* We take pages, put them on reslist and include in iovec
*/
v=0;
while (len > 0) {
- pn=rqstp->rq_resused;
- svc_take_page(rqstp);
- args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
- args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
- len -= args->vec[v].iov_len;
+ pn = rqstp->rq_resused++;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+ rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
+ len -= rqstp->rq_vec[v].iov_len;
v++;
}
args->vlen = v;
@@ -286,21 +285,21 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
args->offset = ntohl(*p++); /* offset */
p++; /* totalcount */
len = args->len = ntohl(*p++);
- args->vec[0].iov_base = (void*)p;
- args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
+ rqstp->rq_vec[0].iov_base = (void*)p;
+ rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
(((void*)p) - rqstp->rq_arg.head[0].iov_base);
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > NFSSVC_MAXBLKSIZE_V2)
+ len = NFSSVC_MAXBLKSIZE_V2;
v = 0;
- while (len > args->vec[v].iov_len) {
- len -= args->vec[v].iov_len;
+ while (len > rqstp->rq_vec[v].iov_len) {
+ len -= rqstp->rq_vec[v].iov_len;
v++;
- args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
- args->vec[v].iov_len = PAGE_SIZE;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
+ rqstp->rq_vec[v].iov_len = PAGE_SIZE;
}
- args->vec[v].iov_len = len;
+ rqstp->rq_vec[v].iov_len = len;
args->vlen = v+1;
- return args->vec[0].iov_len > 0;
+ return rqstp->rq_vec[0].iov_len > 0;
}
int
@@ -333,8 +332,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinka
{
if (!(p = decode_fh(p, &args->fh)))
return 0;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -375,8 +373,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
if (args->count > PAGE_SIZE)
args->count = PAGE_SIZE;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -416,7 +413,6 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->len;
if (resp->len & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
@@ -436,7 +432,6 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
@@ -463,7 +458,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p,
{
struct kstatfs *stat = &resp->stats;
- *p++ = htonl(NFSSVC_MAXBLKSIZE); /* max transfer size */
+ *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */
*p++ = htonl(stat->f_bsize);
*p++ = htonl(stat->f_blocks);
*p++ = htonl(stat->f_bfree);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c9e3b5a8fe07..1141bd29e4e3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -54,6 +54,7 @@
#include <linux/nfsd_idmap.h>
#include <linux/security.h>
#endif /* CONFIG_NFSD_V4 */
+#include <linux/jhash.h>
#include <asm/uaccess.h>
@@ -81,10 +82,19 @@ struct raparms {
dev_t p_dev;
int p_set;
struct file_ra_state p_ra;
+ unsigned int p_hindex;
};
+struct raparm_hbucket {
+ struct raparms *pb_head;
+ spinlock_t pb_lock;
+} ____cacheline_aligned_in_smp;
+
static struct raparms * raparml;
-static struct raparms * raparm_cache;
+#define RAPARM_HASH_BITS 4
+#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
+#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
+static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
@@ -437,13 +447,11 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
} else if (error < 0)
goto out_nfserr;
- if (pacl) {
- error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
- if (error < 0)
- goto out_nfserr;
- }
+ error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
+ if (error < 0)
+ goto out_nfserr;
- if (dpacl) {
+ if (S_ISDIR(inode->i_mode)) {
error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
if (error < 0)
goto out_nfserr;
@@ -743,16 +751,20 @@ nfsd_sync_dir(struct dentry *dp)
* Obtain the readahead parameters for the file
* specified by (dev, ino).
*/
-static DEFINE_SPINLOCK(ra_lock);
static inline struct raparms *
nfsd_get_raparms(dev_t dev, ino_t ino)
{
struct raparms *ra, **rap, **frap = NULL;
int depth = 0;
+ unsigned int hash;
+ struct raparm_hbucket *rab;
- spin_lock(&ra_lock);
- for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) {
+ hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
+ rab = &raparm_hash[hash];
+
+ spin_lock(&rab->pb_lock);
+ for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
if (ra->p_ino == ino && ra->p_dev == dev)
goto found;
depth++;
@@ -761,7 +773,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
}
depth = nfsdstats.ra_size*11/10;
if (!frap) {
- spin_unlock(&ra_lock);
+ spin_unlock(&rab->pb_lock);
return NULL;
}
rap = frap;
@@ -769,15 +781,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
ra->p_dev = dev;
ra->p_ino = ino;
ra->p_set = 0;
+ ra->p_hindex = hash;
found:
- if (rap != &raparm_cache) {
+ if (rap != &rab->pb_head) {
*rap = ra->p_next;
- ra->p_next = raparm_cache;
- raparm_cache = ra;
+ ra->p_next = rab->pb_head;
+ rab->pb_head = ra;
}
ra->p_count++;
nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
- spin_unlock(&ra_lock);
+ spin_unlock(&rab->pb_lock);
return ra;
}
@@ -791,22 +804,26 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
{
unsigned long count = desc->count;
struct svc_rqst *rqstp = desc->arg.data;
+ struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
if (size > count)
size = count;
if (rqstp->rq_res.page_len == 0) {
get_page(page);
- rqstp->rq_respages[rqstp->rq_resused++] = page;
+ put_page(*pp);
+ *pp = page;
+ rqstp->rq_resused++;
rqstp->rq_res.page_base = offset;
rqstp->rq_res.page_len = size;
- } else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) {
+ } else if (page != pp[-1]) {
get_page(page);
- rqstp->rq_respages[rqstp->rq_resused++] = page;
+ put_page(*pp);
+ *pp = page;
+ rqstp->rq_resused++;
rqstp->rq_res.page_len += size;
- } else {
+ } else
rqstp->rq_res.page_len += size;
- }
desc->count = count - size;
desc->written += size;
@@ -837,7 +854,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
file->f_ra = ra->p_ra;
if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
- svc_pushback_unused_pages(rqstp);
+ rqstp->rq_resused = 1;
err = file->f_op->sendfile(file, &offset, *count,
nfsd_read_actor, rqstp);
} else {
@@ -849,11 +866,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
/* Write back readahead params */
if (ra) {
- spin_lock(&ra_lock);
+ struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+ spin_lock(&rab->pb_lock);
ra->p_ra = file->f_ra;
ra->p_set = 1;
ra->p_count--;
- spin_unlock(&ra_lock);
+ spin_unlock(&rab->pb_lock);
}
if (err >= 0) {
@@ -1114,7 +1132,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
*/
if (!resfhp->fh_dentry) {
/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
- fh_lock(fhp);
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
dchild = lookup_one_len(fname, dentry, flen);
err = PTR_ERR(dchild);
if (IS_ERR(dchild))
@@ -1240,7 +1258,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfserr_notdir;
if(!dirp->i_op || !dirp->i_op->lookup)
goto out;
- fh_lock(fhp);
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
/*
* Compose the response file handle.
@@ -1494,7 +1512,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
if (isdotent(name, len))
goto out;
- fh_lock(ffhp);
+ fh_lock_nested(ffhp, I_MUTEX_PARENT);
ddir = ffhp->fh_dentry;
dirp = ddir->d_inode;
@@ -1644,7 +1662,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (err)
goto out;
- fh_lock(fhp);
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
dentry = fhp->fh_dentry;
dirp = dentry->d_inode;
@@ -1829,11 +1847,11 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
void
nfsd_racache_shutdown(void)
{
- if (!raparm_cache)
+ if (!raparml)
return;
dprintk("nfsd: freeing readahead buffers.\n");
kfree(raparml);
- raparm_cache = raparml = NULL;
+ raparml = NULL;
}
/*
* Initialize readahead param cache
@@ -1842,19 +1860,31 @@ int
nfsd_racache_init(int cache_size)
{
int i;
+ int j = 0;
+ int nperbucket;
+
- if (raparm_cache)
+ if (raparml)
return 0;
+ if (cache_size < 2*RAPARM_HASH_SIZE)
+ cache_size = 2*RAPARM_HASH_SIZE;
raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
if (raparml != NULL) {
dprintk("nfsd: allocating %d readahead buffers.\n",
cache_size);
+ for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) {
+ raparm_hash[i].pb_head = NULL;
+ spin_lock_init(&raparm_hash[i].pb_lock);
+ }
+ nperbucket = cache_size >> RAPARM_HASH_BITS;
memset(raparml, 0, sizeof(struct raparms) * cache_size);
for (i = 0; i < cache_size - 1; i++) {
- raparml[i].p_next = raparml + i + 1;
+ if (i % nperbucket == 0)
+ raparm_hash[j++].pb_head = raparml + i;
+ if (i % nperbucket < nperbucket-1)
+ raparml[i].p_next = raparml + i + 1;
}
- raparm_cache = raparml;
} else {
printk(KERN_WARNING
"nfsd: Could not allocate memory read-ahead cache.\n");