summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_super.c2
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/binfmt_aout.c1
-rw-r--r--fs/binfmt_elf.c25
-rw-r--r--fs/binfmt_elf_fdpic.c1
-rw-r--r--fs/binfmt_flat.c1
-rw-r--r--fs/buffer.c15
-rw-r--r--fs/ceph/inode.c11
-rw-r--r--fs/ceph/mds_client.c7
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.c19
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c202
-rw-r--r--fs/coda/inode.c1
-rw-r--r--fs/coda/psdev.c1
-rw-r--r--fs/coda/upcall.c1
-rw-r--r--fs/compat.c26
-rw-r--r--fs/dcache.c3
-rw-r--r--fs/eventpoll.c1
-rw-r--r--fs/exec.c9
-rw-r--r--fs/exofs/super.c7
-rw-r--r--fs/ext3/balloc.c84
-rw-r--r--fs/ext3/inode.c9
-rw-r--r--fs/ext4/balloc.c63
-rw-r--r--fs/ext4/dir.c227
-rw-r--r--fs/ext4/ext4.h40
-rw-r--r--fs/ext4/ext4_extents.h4
-rw-r--r--fs/ext4/ext4_jbd2.h128
-rw-r--r--fs/ext4/extents.c330
-rw-r--r--fs/ext4/fsync.c2
-rw-r--r--fs/ext4/hash.c4
-rw-r--r--fs/ext4/ialloc.c260
-rw-r--r--fs/ext4/inode.c95
-rw-r--r--fs/ext4/mballoc.c342
-rw-r--r--fs/ext4/mballoc.h20
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/mmp.c4
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/page-io.c11
-rw-r--r--fs/ext4/resize.c37
-rw-r--r--fs/ext4/super.c1075
-rw-r--r--fs/ext4/xattr.c25
-rw-r--r--fs/fcntl.c18
-rw-r--r--fs/file.c52
-rw-r--r--fs/fs-writeback.c22
-rw-r--r--fs/jbd2/checkpoint.c140
-rw-r--r--fs/jbd2/commit.c48
-rw-r--r--fs/jbd2/journal.c362
-rw-r--r--fs/jbd2/recovery.c5
-rw-r--r--fs/jbd2/revoke.c12
-rw-r--r--fs/jbd2/transaction.c48
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/ncpfs/file.c1
-rw-r--r--fs/ncpfs/inode.c1
-rw-r--r--fs/ncpfs/mmap.c1
-rw-r--r--fs/nfs/client.c1
-rw-r--r--fs/nfs/direct.c1
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/getroot.c1
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs/nfs4filelayout.c1
-rw-r--r--fs/nfs/nfs4proc.c43
-rw-r--r--fs/nfs/read.c1
-rw-r--r--fs/nfs/super.c1
-rw-r--r--fs/nfsd/current_stateid.h28
-rw-r--r--fs/nfsd/export.c2
-rw-r--r--fs/nfsd/netns.h34
-rw-r--r--fs/nfsd/nfs4callback.c19
-rw-r--r--fs/nfsd/nfs4idmap.c53
-rw-r--r--fs/nfsd/nfs4proc.c118
-rw-r--r--fs/nfsd/nfs4recover.c647
-rw-r--r--fs/nfsd/nfs4state.c365
-rw-r--r--fs/nfsd/nfs4xdr.c132
-rw-r--r--fs/nfsd/nfsctl.c22
-rw-r--r--fs/nfsd/nfsd.h7
-rw-r--r--fs/nfsd/nfssvc.c44
-rw-r--r--fs/nfsd/state.h47
-rw-r--r--fs/nfsd/vfs.c33
-rw-r--r--fs/nfsd/vfs.h2
-rw-r--r--fs/nfsd/xdr4.h34
-rw-r--r--fs/open.c4
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/inode.c1
-rw-r--r--fs/proc/namespaces.c6
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--fs/quota/dquot.c189
-rw-r--r--fs/reiserfs/journal.c1
-rw-r--r--fs/select.c2
-rw-r--r--fs/squashfs/block.c3
-rw-r--r--fs/squashfs/dir.c7
-rw-r--r--fs/squashfs/namei.c5
-rw-r--r--fs/squashfs/squashfs_fs.h19
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/udf/balloc.c84
-rw-r--r--fs/udf/ialloc.c1
-rw-r--r--fs/udf/inode.c20
-rw-r--r--fs/udf/super.c5
-rw-r--r--fs/udf/udf_i.h1
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/ufs/super.c1
-rw-r--r--fs/xfs/xfs_alloc.c36
-rw-r--r--fs/xfs/xfs_alloc.h12
-rw-r--r--fs/xfs/xfs_attr.c16
-rw-r--r--fs/xfs/xfs_attr_leaf.c40
-rw-r--r--fs/xfs/xfs_bmap.c9
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_da_btree.c32
-rw-r--r--fs/xfs/xfs_discard.c61
-rw-r--r--fs/xfs/xfs_dquot.c2
-rw-r--r--fs/xfs/xfs_iget.c8
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_ioctl.c14
-rw-r--r--fs/xfs/xfs_itable.c3
-rw-r--r--fs/xfs/xfs_log.c3
-rw-r--r--fs/xfs/xfs_log_recover.c33
-rw-r--r--fs/xfs/xfs_rtalloc.c9
-rw-r--r--fs/xfs/xfs_super.c33
-rw-r--r--fs/xfs/xfs_trace.h78
119 files changed, 3822 insertions, 2392 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 10b7d3c9dba8..8c92a9ba8330 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -259,7 +259,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
if (v9fs_proto_dotl(v9ses)) {
res = p9_client_statfs(fid, &rs);
if (res == 0) {
- buf->f_type = V9FS_MAGIC;
+ buf->f_type = rs.type;
buf->f_bsize = rs.bsize;
buf->f_blocks = rs.blocks;
buf->f_bfree = rs.bfree;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 85f1fcdb30e7..9dacb8586701 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -230,7 +230,7 @@ static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
fdt = files_fdtable(files);
BUG_ON(fdt->fd[fd] != NULL);
rcu_assign_pointer(fdt->fd[fd], file);
- FD_SET(fd, fdt->close_on_exec);
+ __set_close_on_exec(fd, fdt);
spin_unlock(&files->file_lock);
}
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 4d5e6d26578c..2eb12f13593d 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -26,7 +26,6 @@
#include <linux/coredump.h>
#include <linux/slab.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/a.out-core.h>
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 504b6eee50a9..48ffb3dc610a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -35,6 +35,7 @@
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/page.h>
+#include <asm/exec.h>
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
static int load_elf_library(struct file *);
@@ -1414,6 +1415,22 @@ static void do_thread_regset_writeback(struct task_struct *task,
regset->writeback(task, regset, 1);
}
+#ifndef PR_REG_SIZE
+#define PR_REG_SIZE(S) sizeof(S)
+#endif
+
+#ifndef PRSTATUS_SIZE
+#define PRSTATUS_SIZE(S) sizeof(S)
+#endif
+
+#ifndef PR_REG_PTR
+#define PR_REG_PTR(S) (&((S)->pr_reg))
+#endif
+
+#ifndef SET_PR_FPVALID
+#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
+#endif
+
static int fill_thread_core_info(struct elf_thread_core_info *t,
const struct user_regset_view *view,
long signr, size_t *total)
@@ -1428,11 +1445,11 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
*/
fill_prstatus(&t->prstatus, t->task, signr);
(void) view->regsets[0].get(t->task, &view->regsets[0],
- 0, sizeof(t->prstatus.pr_reg),
- &t->prstatus.pr_reg, NULL);
+ 0, PR_REG_SIZE(t->prstatus.pr_reg),
+ PR_REG_PTR(&t->prstatus), NULL);
fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
- sizeof(t->prstatus), &t->prstatus);
+ PRSTATUS_SIZE(t->prstatus), &t->prstatus);
*total += notesize(&t->notes[0]);
do_thread_regset_writeback(t->task, &view->regsets[0]);
@@ -1462,7 +1479,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
regset->core_note_type,
size, data);
else {
- t->prstatus.pr_fpvalid = 1;
+ SET_PR_FPVALID(&t->prstatus, 1);
fill_note(&t->notes[i], "CORE",
NT_PRFPREG, size, data);
}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c64bf5ee2df4..9bd5612a8224 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -39,6 +39,7 @@
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/pgalloc.h>
+#include <asm/exec.h>
typedef char *elf_caddr_t;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 5979027451b3..024d20ee3ca3 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -37,7 +37,6 @@
#include <linux/syscalls.h>
#include <asm/byteorder.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <asm/cacheflush.h>
diff --git a/fs/buffer.c b/fs/buffer.c
index 70e2017edd70..36d66653b931 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1384,10 +1384,23 @@ static void invalidate_bh_lru(void *arg)
}
put_cpu_var(bh_lrus);
}
+
+static bool has_bh_in_lru(int cpu, void *dummy)
+{
+ struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
+ int i;
+ for (i = 0; i < BH_LRU_SIZE; i++) {
+ if (b->bhs[i])
+ return 1;
+ }
+
+ return 0;
+}
+
void invalidate_bh_lrus(void)
{
- on_each_cpu(invalidate_bh_lru, NULL, 1);
+ on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2c489378b4cd..9fff9f3b17e4 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -677,18 +677,19 @@ static int fill_inode(struct inode *inode,
case S_IFLNK:
inode->i_op = &ceph_symlink_iops;
if (!ci->i_symlink) {
- int symlen = iinfo->symlink_len;
+ u32 symlen = iinfo->symlink_len;
char *sym;
- BUG_ON(symlen != inode->i_size);
spin_unlock(&ci->i_ceph_lock);
+ err = -EINVAL;
+ if (WARN_ON(symlen != inode->i_size))
+ goto out;
+
err = -ENOMEM;
- sym = kmalloc(symlen+1, GFP_NOFS);
+ sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
if (!sym)
goto out;
- memcpy(sym, iinfo->symlink, symlen);
- sym[symlen] = 0;
spin_lock(&ci->i_ceph_lock);
if (!ci->i_symlink)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 866e8d7ca37d..89971e137aab 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -402,7 +402,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
spin_lock_init(&s->s_gen_ttl_lock);
s->s_cap_gen = 0;
- s->s_cap_ttl = 0;
+ s->s_cap_ttl = jiffies - 1;
spin_lock_init(&s->s_cap_lock);
s->s_renew_requested = 0;
@@ -1083,8 +1083,7 @@ static void renewed_caps(struct ceph_mds_client *mdsc,
int wake = 0;
spin_lock(&session->s_cap_lock);
- was_stale = is_renew && (session->s_cap_ttl == 0 ||
- time_after_eq(jiffies, session->s_cap_ttl));
+ was_stale = is_renew && time_after_eq(jiffies, session->s_cap_ttl);
session->s_cap_ttl = session->s_renew_requested +
mdsc->mdsmap->m_session_timeout*HZ;
@@ -2332,7 +2331,7 @@ static void handle_session(struct ceph_mds_session *session,
session->s_mds);
spin_lock(&session->s_gen_ttl_lock);
session->s_cap_gen++;
- session->s_cap_ttl = 0;
+ session->s_cap_ttl = jiffies - 1;
spin_unlock(&session->s_gen_ttl_lock);
send_renew_caps(mdsc, session);
break;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index a559c80f127a..f04c0961f993 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -331,7 +331,7 @@ static int build_snap_context(struct ceph_snap_realm *realm)
/* alloc new snap context */
err = -ENOMEM;
- if (num > ULONG_MAX / sizeof(u64) - sizeof(*snapc))
+ if (num > (ULONG_MAX - sizeof(*snapc)) / sizeof(u64))
goto fail;
snapc = kzalloc(sizeof(*snapc) + num*sizeof(u64), GFP_NOFS);
if (!snapc)
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 256f85221926..1e67dd7305a4 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -130,10 +130,12 @@ enum {
Opt_nodirstat,
Opt_rbytes,
Opt_norbytes,
+ Opt_asyncreaddir,
Opt_noasyncreaddir,
Opt_dcache,
Opt_nodcache,
Opt_ino32,
+ Opt_noino32,
};
static match_table_t fsopt_tokens = {
@@ -153,10 +155,12 @@ static match_table_t fsopt_tokens = {
{Opt_nodirstat, "nodirstat"},
{Opt_rbytes, "rbytes"},
{Opt_norbytes, "norbytes"},
+ {Opt_asyncreaddir, "asyncreaddir"},
{Opt_noasyncreaddir, "noasyncreaddir"},
{Opt_dcache, "dcache"},
{Opt_nodcache, "nodcache"},
{Opt_ino32, "ino32"},
+ {Opt_noino32, "noino32"},
{-1, NULL}
};
@@ -232,6 +236,9 @@ static int parse_fsopt_token(char *c, void *private)
case Opt_norbytes:
fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
break;
+ case Opt_asyncreaddir:
+ fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
+ break;
case Opt_noasyncreaddir:
fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
break;
@@ -244,6 +251,9 @@ static int parse_fsopt_token(char *c, void *private)
case Opt_ino32:
fsopt->flags |= CEPH_MOUNT_OPT_INO32;
break;
+ case Opt_noino32:
+ fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
+ break;
default:
BUG_ON(token);
}
@@ -334,10 +344,12 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
*path += 2;
dout("server path '%s'\n", *path);
- err = ceph_parse_options(popt, options, dev_name, dev_name_end,
+ *popt = ceph_parse_options(options, dev_name, dev_name_end,
parse_fsopt_token, (void *)fsopt);
- if (err)
+ if (IS_ERR(*popt)) {
+ err = PTR_ERR(*popt);
goto out;
+ }
/* success */
*pfsopt = fsopt;
@@ -926,6 +938,7 @@ static int __init init_ceph(void)
if (ret)
goto out;
+ ceph_xattr_init();
ret = register_filesystem(&ceph_fs_type);
if (ret)
goto out_icache;
@@ -935,6 +948,7 @@ static int __init init_ceph(void)
return 0;
out_icache:
+ ceph_xattr_exit();
destroy_caches();
out:
return ret;
@@ -944,6 +958,7 @@ static void __exit exit_ceph(void)
{
dout("exit_ceph\n");
unregister_filesystem(&ceph_fs_type);
+ ceph_xattr_exit();
destroy_caches();
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1421f3d875a2..fc35036d258d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -367,7 +367,7 @@ static inline u32 ceph_ino_to_ino32(__u64 vino)
u32 ino = vino & 0xffffffff;
ino ^= vino >> 32;
if (!ino)
- ino = 1;
+ ino = 2;
return ino;
}
@@ -733,6 +733,8 @@ extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
extern int ceph_removexattr(struct dentry *, const char *);
extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci);
extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
+extern void __init ceph_xattr_init(void);
+extern void ceph_xattr_exit(void);
/* caps.c */
extern const char *ceph_cap_string(int c);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index a76f697303d9..35b86331d8a5 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -8,9 +8,12 @@
#include <linux/xattr.h>
#include <linux/slab.h>
+#define XATTR_CEPH_PREFIX "ceph."
+#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
+
static bool ceph_is_valid_xattr(const char *name)
{
- return !strncmp(name, "ceph.", 5) ||
+ return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
!strncmp(name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN) ||
!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
@@ -21,79 +24,91 @@ static bool ceph_is_valid_xattr(const char *name)
* These define virtual xattrs exposing the recursive directory
* statistics and layout metadata.
*/
-struct ceph_vxattr_cb {
- bool readonly;
+struct ceph_vxattr {
char *name;
+ size_t name_size; /* strlen(name) + 1 (for '\0') */
size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
size_t size);
+ bool readonly;
};
/* directories */
-static size_t ceph_vxattrcb_entries(struct ceph_inode_info *ci, char *val,
+static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
size_t size)
{
return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
}
-static size_t ceph_vxattrcb_files(struct ceph_inode_info *ci, char *val,
+static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
size_t size)
{
return snprintf(val, size, "%lld", ci->i_files);
}
-static size_t ceph_vxattrcb_subdirs(struct ceph_inode_info *ci, char *val,
+static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
size_t size)
{
return snprintf(val, size, "%lld", ci->i_subdirs);
}
-static size_t ceph_vxattrcb_rentries(struct ceph_inode_info *ci, char *val,
+static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
size_t size)
{
return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
}
-static size_t ceph_vxattrcb_rfiles(struct ceph_inode_info *ci, char *val,
+static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
size_t size)
{
return snprintf(val, size, "%lld", ci->i_rfiles);
}
-static size_t ceph_vxattrcb_rsubdirs(struct ceph_inode_info *ci, char *val,
+static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
size_t size)
{
return snprintf(val, size, "%lld", ci->i_rsubdirs);
}
-static size_t ceph_vxattrcb_rbytes(struct ceph_inode_info *ci, char *val,
+static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
size_t size)
{
return snprintf(val, size, "%lld", ci->i_rbytes);
}
-static size_t ceph_vxattrcb_rctime(struct ceph_inode_info *ci, char *val,
+static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
size_t size)
{
- return snprintf(val, size, "%ld.%ld", (long)ci->i_rctime.tv_sec,
+ return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
(long)ci->i_rctime.tv_nsec);
}
-static struct ceph_vxattr_cb ceph_dir_vxattrs[] = {
- { true, "ceph.dir.entries", ceph_vxattrcb_entries},
- { true, "ceph.dir.files", ceph_vxattrcb_files},
- { true, "ceph.dir.subdirs", ceph_vxattrcb_subdirs},
- { true, "ceph.dir.rentries", ceph_vxattrcb_rentries},
- { true, "ceph.dir.rfiles", ceph_vxattrcb_rfiles},
- { true, "ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs},
- { true, "ceph.dir.rbytes", ceph_vxattrcb_rbytes},
- { true, "ceph.dir.rctime", ceph_vxattrcb_rctime},
- { true, NULL, NULL }
+#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
+
+#define XATTR_NAME_CEPH(_type, _name) \
+ { \
+ .name = CEPH_XATTR_NAME(_type, _name), \
+ .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
+ .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
+ .readonly = true, \
+ }
+
+static struct ceph_vxattr ceph_dir_vxattrs[] = {
+ XATTR_NAME_CEPH(dir, entries),
+ XATTR_NAME_CEPH(dir, files),
+ XATTR_NAME_CEPH(dir, subdirs),
+ XATTR_NAME_CEPH(dir, rentries),
+ XATTR_NAME_CEPH(dir, rfiles),
+ XATTR_NAME_CEPH(dir, rsubdirs),
+ XATTR_NAME_CEPH(dir, rbytes),
+ XATTR_NAME_CEPH(dir, rctime),
+ { 0 } /* Required table terminator */
};
+static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
/* files */
-static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
+static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val,
size_t size)
{
int ret;
@@ -103,21 +118,32 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
(unsigned long long)ceph_file_layout_su(ci->i_layout),
(unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
(unsigned long long)ceph_file_layout_object_size(ci->i_layout));
- if (ceph_file_layout_pg_preferred(ci->i_layout))
- ret += snprintf(val + ret, size, "preferred_osd=%lld\n",
+
+ if (ceph_file_layout_pg_preferred(ci->i_layout) >= 0) {
+ val += ret;
+ size -= ret;
+ ret += snprintf(val, size, "preferred_osd=%lld\n",
(unsigned long long)ceph_file_layout_pg_preferred(
ci->i_layout));
+ }
+
return ret;
}
-static struct ceph_vxattr_cb ceph_file_vxattrs[] = {
- { true, "ceph.file.layout", ceph_vxattrcb_layout},
+static struct ceph_vxattr ceph_file_vxattrs[] = {
+ XATTR_NAME_CEPH(file, layout),
/* The following extended attribute name is deprecated */
- { true, "ceph.layout", ceph_vxattrcb_layout},
- { true, NULL, NULL }
+ {
+ .name = XATTR_CEPH_PREFIX "layout",
+ .name_size = sizeof (XATTR_CEPH_PREFIX "layout"),
+ .getxattr_cb = ceph_vxattrcb_file_layout,
+ .readonly = true,
+ },
+ { 0 } /* Required table terminator */
};
+static size_t ceph_file_vxattrs_name_size; /* total size of all names */
-static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode)
+static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
{
if (S_ISDIR(inode->i_mode))
return ceph_dir_vxattrs;
@@ -126,14 +152,59 @@ static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode)
return NULL;
}
-static struct ceph_vxattr_cb *ceph_match_vxattr(struct ceph_vxattr_cb *vxattr,
+static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
+{
+ if (vxattrs == ceph_dir_vxattrs)
+ return ceph_dir_vxattrs_name_size;
+ if (vxattrs == ceph_file_vxattrs)
+ return ceph_file_vxattrs_name_size;
+ BUG();
+
+ return 0;
+}
+
+/*
+ * Compute the aggregate size (including terminating '\0') of all
+ * virtual extended attribute names in the given vxattr table.
+ */
+static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
+{
+ struct ceph_vxattr *vxattr;
+ size_t size = 0;
+
+ for (vxattr = vxattrs; vxattr->name; vxattr++)
+ size += vxattr->name_size;
+
+ return size;
+}
+
+/* Routines called at initialization and exit time */
+
+void __init ceph_xattr_init(void)
+{
+ ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
+ ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
+}
+
+void ceph_xattr_exit(void)
+{
+ ceph_dir_vxattrs_name_size = 0;
+ ceph_file_vxattrs_name_size = 0;
+}
+
+static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
const char *name)
{
- do {
- if (strcmp(vxattr->name, name) == 0)
- return vxattr;
- vxattr++;
- } while (vxattr->name);
+ struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
+
+ if (vxattr) {
+ while (vxattr->name) {
+ if (!strcmp(vxattr->name, name))
+ return vxattr;
+ vxattr++;
+ }
+ }
+
return NULL;
}
@@ -502,17 +573,15 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
{
struct inode *inode = dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
int err;
struct ceph_inode_xattr *xattr;
- struct ceph_vxattr_cb *vxattr = NULL;
+ struct ceph_vxattr *vxattr = NULL;
if (!ceph_is_valid_xattr(name))
return -ENODATA;
/* let's see if a virtual xattr was requested */
- if (vxattrs)
- vxattr = ceph_match_vxattr(vxattrs, name);
+ vxattr = ceph_match_vxattr(inode, name);
spin_lock(&ci->i_ceph_lock);
dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
@@ -568,7 +637,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
{
struct inode *inode = dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
+ struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
u32 vir_namelen = 0;
u32 namelen;
int err;
@@ -596,11 +665,12 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
goto out;
list_xattr:
- vir_namelen = 0;
- /* include virtual dir xattrs */
- if (vxattrs)
- for (i = 0; vxattrs[i].name; i++)
- vir_namelen += strlen(vxattrs[i].name) + 1;
+ /*
+ * Start with virtual dir xattr names (if any) (including
+ * terminating '\0' characters for each).
+ */
+ vir_namelen = ceph_vxattrs_name_size(vxattrs);
+
/* adding 1 byte per each variable due to the null termination */
namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count;
err = -ERANGE;
@@ -698,17 +768,17 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
+ struct ceph_vxattr *vxattr;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
+ int issued;
int err;
+ int dirty;
int name_len = strlen(name);
int val_len = size;
char *newname = NULL;
char *newval = NULL;
struct ceph_inode_xattr *xattr = NULL;
- int issued;
int required_blob_size;
- int dirty;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
@@ -716,12 +786,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
if (!ceph_is_valid_xattr(name))
return -EOPNOTSUPP;
- if (vxattrs) {
- struct ceph_vxattr_cb *vxattr =
- ceph_match_vxattr(vxattrs, name);
- if (vxattr && vxattr->readonly)
- return -EOPNOTSUPP;
- }
+ vxattr = ceph_match_vxattr(inode, name);
+ if (vxattr && vxattr->readonly)
+ return -EOPNOTSUPP;
/* preallocate memory for xattr name, value, index node */
err = -ENOMEM;
@@ -730,11 +797,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
goto out;
if (val_len) {
- newval = kmalloc(val_len + 1, GFP_NOFS);
+ newval = kmemdup(value, val_len, GFP_NOFS);
if (!newval)
goto out;
- memcpy(newval, value, val_len);
- newval[val_len] = '\0';
}
xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
@@ -744,6 +809,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
spin_lock(&ci->i_ceph_lock);
retry:
issued = __ceph_caps_issued(ci, NULL);
+ dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
if (!(issued & CEPH_CAP_XATTR_EXCL))
goto do_sync;
__build_xattrs(inode);
@@ -752,7 +818,7 @@ retry:
if (!ci->i_xattrs.prealloc_blob ||
required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
- struct ceph_buffer *blob = NULL;
+ struct ceph_buffer *blob;
spin_unlock(&ci->i_ceph_lock);
dout(" preaallocating new blob size=%d\n", required_blob_size);
@@ -766,12 +832,13 @@ retry:
goto retry;
}
- dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
err = __set_xattr(ci, newname, name_len, newval,
val_len, 1, 1, 1, &xattr);
+
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
ci->i_xattrs.dirty = true;
inode->i_ctime = CURRENT_TIME;
+
spin_unlock(&ci->i_ceph_lock);
if (dirty)
__mark_inode_dirty(inode, dirty);
@@ -816,8 +883,8 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
int ceph_removexattr(struct dentry *dentry, const char *name)
{
struct inode *inode = dentry->d_inode;
+ struct ceph_vxattr *vxattr;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
int issued;
int err;
int required_blob_size;
@@ -829,22 +896,19 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
if (!ceph_is_valid_xattr(name))
return -EOPNOTSUPP;
- if (vxattrs) {
- struct ceph_vxattr_cb *vxattr =
- ceph_match_vxattr(vxattrs, name);
- if (vxattr && vxattr->readonly)
- return -EOPNOTSUPP;
- }
+ vxattr = ceph_match_vxattr(inode, name);
+ if (vxattr && vxattr->readonly)
+ return -EOPNOTSUPP;
err = -ENOMEM;
spin_lock(&ci->i_ceph_lock);
- __build_xattrs(inode);
retry:
issued = __ceph_caps_issued(ci, NULL);
dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
if (!(issued & CEPH_CAP_XATTR_EXCL))
goto do_sync;
+ __build_xattrs(inode);
required_blob_size = __get_required_blob_size(ci, 0, 0);
@@ -865,10 +929,10 @@ retry:
}
err = __remove_xattr_by_name(ceph_inode(inode), name);
+
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
ci->i_xattrs.dirty = true;
inode->i_ctime = CURRENT_TIME;
-
spin_unlock(&ci->i_ceph_lock);
if (dirty)
__mark_inode_dirty(inode, dirty);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 05156c17b551..2870597b5c9d 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -21,7 +21,6 @@
#include <linux/vfs.h>
#include <linux/slab.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/fs.h>
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 8f616e0e252c..761d5b31b18d 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -38,7 +38,6 @@
#include <linux/mutex.h>
#include <linux/device.h>
#include <asm/io.h>
-#include <asm/system.h>
#include <asm/poll.h>
#include <asm/uaccess.h>
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 9727e0c52579..0c68fd31fbf2 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -14,7 +14,6 @@
* improvements to the Coda project. Contact Peter Braam <coda@cs.cmu.edu>.
*/
-#include <asm/system.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/types.h>
diff --git a/fs/compat.c b/fs/compat.c
index 14483a715bbb..f2944ace7a7b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1170,10 +1170,9 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
}
asmlinkage ssize_t
-compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
- unsigned long vlen, u32 pos_low, u32 pos_high)
+compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec,
+ unsigned long vlen, loff_t pos)
{
- loff_t pos = ((loff_t)pos_high << 32) | pos_low;
struct file *file;
int fput_needed;
ssize_t ret;
@@ -1190,6 +1189,14 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
return ret;
}
+asmlinkage ssize_t
+compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
+ unsigned long vlen, u32 pos_low, u32 pos_high)
+{
+ loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+ return compat_sys_preadv64(fd, vec, vlen, pos);
+}
+
static size_t compat_writev(struct file *file,
const struct compat_iovec __user *vec,
unsigned long vlen, loff_t *pos)
@@ -1229,10 +1236,9 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
}
asmlinkage ssize_t
-compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
- unsigned long vlen, u32 pos_low, u32 pos_high)
+compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec,
+ unsigned long vlen, loff_t pos)
{
- loff_t pos = ((loff_t)pos_high << 32) | pos_low;
struct file *file;
int fput_needed;
ssize_t ret;
@@ -1249,6 +1255,14 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
return ret;
}
+asmlinkage ssize_t
+compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
+ unsigned long vlen, u32 pos_low, u32 pos_high)
+{
+ loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+ return compat_sys_pwritev64(fd, vec, vlen, pos);
+}
+
asmlinkage long
compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
unsigned int nr_segs, unsigned int flags)
diff --git a/fs/dcache.c b/fs/dcache.c
index e9a07b2a0948..b60ddc41d783 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2404,6 +2404,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
if (d_ancestor(alias, dentry)) {
/* Check for loops */
actual = ERR_PTR(-ELOOP);
+ spin_unlock(&inode->i_lock);
} else if (IS_ROOT(alias)) {
/* Is this an anonymous mountpoint that we
* could splice into our tree? */
@@ -2413,7 +2414,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
goto found;
} else {
/* Nope, but we must(!) avoid directory
- * aliasing */
+ * aliasing. This drops inode->i_lock */
actual = __d_unalias(inode, dentry, alias);
}
write_sequnlock(&rename_lock);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 629e9ed99d0f..739b0985b398 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -34,7 +34,6 @@
#include <linux/mutex.h>
#include <linux/anon_inodes.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/io.h>
#include <asm/mman.h>
#include <linux/atomic.h>
diff --git a/fs/exec.c b/fs/exec.c
index 23559c227d9c..9a1d9f0a60ab 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -59,6 +59,7 @@
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/tlb.h>
+#include <asm/exec.h>
#include <trace/events/task.h>
#include "internal.h"
@@ -1027,10 +1028,10 @@ static void flush_old_files(struct files_struct * files)
fdt = files_fdtable(files);
if (i >= fdt->max_fds)
break;
- set = fdt->close_on_exec->fds_bits[j];
+ set = fdt->close_on_exec[j];
if (!set)
continue;
- fdt->close_on_exec->fds_bits[j] = 0;
+ fdt->close_on_exec[j] = 0;
spin_unlock(&files->file_lock);
for ( ; set ; i++,set >>= 1) {
if (set & 1) {
@@ -2066,8 +2067,8 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
fd_install(0, rp);
spin_lock(&cf->file_lock);
fdt = files_fdtable(cf);
- FD_SET(0, fdt->open_fds);
- FD_CLR(0, fdt->close_on_exec);
+ __set_open_fd(0, fdt);
+ __clear_close_on_exec(0, fdt);
spin_unlock(&cf->file_lock);
/* and disallow core files too */
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 7f2b590a36b7..735ca06430ac 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -389,7 +389,7 @@ static int exofs_sync_fs(struct super_block *sb, int wait)
ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
memset(fscb, 0, ios->length);
fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
- fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
+ fscb->s_numfiles = cpu_to_le64(sbi->s_numfiles);
fscb->s_magic = cpu_to_le16(sb->s_magic);
fscb->s_newfs = 0;
fscb->s_version = EXOFS_FSCB_VER;
@@ -529,7 +529,8 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
struct osd_dev_info *odi)
{
odi->systemid_len = le32_to_cpu(dt_dev->systemid_len);
- memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len);
+ if (likely(odi->systemid_len))
+ memcpy(odi->systemid, dt_dev->systemid, OSD_SYSTEMID_LEN);
odi->osdname_len = le32_to_cpu(dt_dev->osdname_len);
odi->osdname = dt_dev->osdname;
@@ -565,7 +566,7 @@ int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs,
aoded = kzalloc(sizeof(*aoded), GFP_KERNEL);
if (unlikely(!aoded)) {
- EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
+ EXOFS_ERR("ERROR: failed allocating Device array[%d]\n",
numdevs);
return -ENOMEM;
}
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index a2038928f9a3..1e036b79384c 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1743,8 +1743,11 @@ allocated:
*errp = 0;
brelse(bitmap_bh);
- dquot_free_block(inode, *count-num);
- *count = num;
+
+ if (num < *count) {
+ dquot_free_block(inode, *count-num);
+ *count = num;
+ }
trace_ext3_allocate_blocks(inode, goal, num,
(unsigned long long)ret_block);
@@ -1970,7 +1973,7 @@ static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
sbi = EXT3_SB(sb);
/* Walk through the whole group */
- while (start < max) {
+ while (start <= max) {
start = bitmap_search_next_usable_block(start, bitmap_bh, max);
if (start < 0)
break;
@@ -1980,7 +1983,7 @@ static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
* Allocate contiguous free extents by setting bits in the
* block bitmap
*/
- while (next < max
+ while (next <= max
&& claim_block(sb_bgl_lock(sbi, group),
next, bitmap_bh)) {
next++;
@@ -2091,73 +2094,74 @@ err_out:
*/
int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
{
- ext3_grpblk_t last_block, first_block, free_blocks;
- unsigned long first_group, last_group;
- unsigned long group, ngroups;
+ ext3_grpblk_t last_block, first_block;
+ unsigned long group, first_group, last_group;
struct ext3_group_desc *gdp;
struct ext3_super_block *es = EXT3_SB(sb)->s_es;
- uint64_t start, len, minlen, trimmed;
+ uint64_t start, minlen, end, trimmed = 0;
+ ext3_fsblk_t first_data_blk =
+ le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
int ret = 0;
- start = (range->start >> sb->s_blocksize_bits) +
- le32_to_cpu(es->s_first_data_block);
- len = range->len >> sb->s_blocksize_bits;
+ start = range->start >> sb->s_blocksize_bits;
+ end = start + (range->len >> sb->s_blocksize_bits) - 1;
minlen = range->minlen >> sb->s_blocksize_bits;
- trimmed = 0;
- if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
+ if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) ||
+ unlikely(start >= max_blks))
return -EINVAL;
- if (start >= max_blks)
- return -EINVAL;
- if (start + len > max_blks)
- len = max_blks - start;
+ if (end >= max_blks)
+ end = max_blks - 1;
+ if (end <= first_data_blk)
+ goto out;
+ if (start < first_data_blk)
+ start = first_data_blk;
- ngroups = EXT3_SB(sb)->s_groups_count;
smp_rmb();
/* Determine first and last group to examine based on start and len */
ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
&first_group, &first_block);
- ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len),
+ ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) end,
&last_group, &last_block);
- last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
- last_block = EXT3_BLOCKS_PER_GROUP(sb);
- if (first_group > last_group)
- return -EINVAL;
+ /* end now represents the last block to discard in this group */
+ end = EXT3_BLOCKS_PER_GROUP(sb) - 1;
for (group = first_group; group <= last_group; group++) {
gdp = ext3_get_group_desc(sb, group, NULL);
if (!gdp)
break;
- free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
- if (free_blocks < minlen)
- continue;
-
/*
* For all the groups except the last one, last block will
- * always be EXT3_BLOCKS_PER_GROUP(sb), so we only need to
- * change it for the last group in which case first_block +
- * len < EXT3_BLOCKS_PER_GROUP(sb).
+ * always be EXT3_BLOCKS_PER_GROUP(sb)-1, so we only need to
+ * change it for the last group, note that last_block is
+ * already computed earlier by ext3_get_group_no_and_offset()
*/
- if (first_block + len < EXT3_BLOCKS_PER_GROUP(sb))
- last_block = first_block + len;
- len -= last_block - first_block;
+ if (group == last_group)
+ end = last_block;
- ret = ext3_trim_all_free(sb, group, first_block,
- last_block, minlen);
- if (ret < 0)
- break;
+ if (le16_to_cpu(gdp->bg_free_blocks_count) >= minlen) {
+ ret = ext3_trim_all_free(sb, group, first_block,
+ end, minlen);
+ if (ret < 0)
+ break;
+ trimmed += ret;
+ }
- trimmed += ret;
+ /*
+ * For every group except the first one, we are sure
+ * that the first block to discard will be block #0.
+ */
first_block = 0;
}
- if (ret >= 0)
+ if (ret > 0)
ret = 0;
- range->len = trimmed * sb->s_blocksize;
+out:
+ range->len = trimmed * sb->s_blocksize;
return ret;
}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2d0afeca0b47..6d3418662b54 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -756,6 +756,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
struct ext3_block_alloc_info *block_i;
ext3_fsblk_t current_block;
struct ext3_inode_info *ei = EXT3_I(inode);
+ struct timespec now;
block_i = ei->i_block_alloc_info;
/*
@@ -795,9 +796,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
}
/* We are done with atomic stuff, now do the rest of housekeeping */
-
- inode->i_ctime = CURRENT_TIME_SEC;
- ext3_mark_inode_dirty(handle, inode);
+ now = CURRENT_TIME_SEC;
+ if (!timespec_equal(&inode->i_ctime, &now) || !where->bh) {
+ inode->i_ctime = now;
+ ext3_mark_inode_dirty(handle, inode);
+ }
/* ext3_mark_inode_dirty already updated i_sync_tid */
atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f9e2cd8cf711..4bbd07a6fa18 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -336,10 +336,10 @@ err_out:
* Return buffer_head on success or NULL in case of failure.
*/
struct buffer_head *
-ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
+ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
{
struct ext4_group_desc *desc;
- struct buffer_head *bh = NULL;
+ struct buffer_head *bh;
ext4_fsblk_t bitmap_blk;
desc = ext4_get_group_desc(sb, block_group, NULL);
@@ -348,9 +348,9 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
bitmap_blk = ext4_block_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
- ext4_error(sb, "Cannot read block bitmap - "
- "block_group = %u, block_bitmap = %llu",
- block_group, bitmap_blk);
+ ext4_error(sb, "Cannot get buffer for block bitmap - "
+ "block_group = %u, block_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
@@ -382,25 +382,50 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
return bh;
}
/*
- * submit the buffer_head for read. We can
- * safely mark the bitmap as uptodate now.
- * We do it here so the bitmap uptodate bit
- * get set with buffer lock held.
+ * submit the buffer_head for reading
*/
+ set_buffer_new(bh);
trace_ext4_read_block_bitmap_load(sb, block_group);
- set_bitmap_uptodate(bh);
- if (bh_submit_read(bh) < 0) {
- put_bh(bh);
+ bh->b_end_io = ext4_end_bitmap_read;
+ get_bh(bh);
+ submit_bh(READ, bh);
+ return bh;
+}
+
+/* Returns 0 on success, 1 on error */
+int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
+ struct buffer_head *bh)
+{
+ struct ext4_group_desc *desc;
+
+ if (!buffer_new(bh))
+ return 0;
+ desc = ext4_get_group_desc(sb, block_group, NULL);
+ if (!desc)
+ return 1;
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh)) {
ext4_error(sb, "Cannot read block bitmap - "
- "block_group = %u, block_bitmap = %llu",
- block_group, bitmap_blk);
- return NULL;
+ "block_group = %u, block_bitmap = %llu",
+ block_group, (unsigned long long) bh->b_blocknr);
+ return 1;
}
+ clear_buffer_new(bh);
+ /* Panic or remount fs read-only if block bitmap is invalid */
ext4_valid_block_bitmap(sb, desc, block_group, bh);
- /*
- * file system mounted not to panic on error,
- * continue with corrupt bitmap
- */
+ return 0;
+}
+
+struct buffer_head *
+ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
+{
+ struct buffer_head *bh;
+
+ bh = ext4_read_block_bitmap_nowait(sb, block_group);
+ if (ext4_wait_block_bitmap(sb, block_group, bh)) {
+ put_bh(bh);
+ return NULL;
+ }
return bh;
}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 164c56092e58..b86786202643 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
-static int ext4_readdir(struct file *, void *, filldir_t);
static int ext4_dx_readdir(struct file *filp,
void *dirent, filldir_t filldir);
-static int ext4_release_dir(struct inode *inode,
- struct file *filp);
-
-const struct file_operations ext4_dir_operations = {
- .llseek = ext4_llseek,
- .read = generic_read_dir,
- .readdir = ext4_readdir, /* we take BKL. needed?*/
- .unlocked_ioctl = ext4_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ext4_compat_ioctl,
-#endif
- .fsync = ext4_sync_file,
- .release = ext4_release_dir,
-};
-
static unsigned char get_dtype(struct super_block *sb, int filetype)
{
@@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
return (ext4_filetype_table[filetype]);
}
+/**
+ * Check if the given dir-inode refers to an htree-indexed directory
+ * (or a directory which chould potentially get coverted to use htree
+ * indexing).
+ *
+ * Return 1 if it is a dx dir, 0 if not
+ */
+static int is_dx_dir(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+
+ if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_COMPAT_DIR_INDEX) &&
+ ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
+ ((inode->i_size >> sb->s_blocksize_bits) == 1)))
+ return 1;
+
+ return 0;
+}
+
/*
* Return 0 if the directory entry is OK, and 1 if there is a problem
*
@@ -91,17 +95,17 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
return 0;
if (filp)
- ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0,
+ ext4_error_file(filp, function, line, bh->b_blocknr,
"bad entry in directory: %s - offset=%u(%u), "
"inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset%bh->b_size),
+ error_msg, (unsigned) (offset % bh->b_size),
offset, le32_to_cpu(de->inode),
rlen, de->name_len);
else
- ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0,
+ ext4_error_inode(dir, function, line, bh->b_blocknr,
"bad entry in directory: %s - offset=%u(%u), "
"inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset%bh->b_size),
+ error_msg, (unsigned) (offset % bh->b_size),
offset, le32_to_cpu(de->inode),
rlen, de->name_len);
@@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp,
unsigned int offset;
int i, stored;
struct ext4_dir_entry_2 *de;
- struct super_block *sb;
int err;
struct inode *inode = filp->f_path.dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
int ret = 0;
int dir_has_error = 0;
- sb = inode->i_sb;
-
- if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_COMPAT_DIR_INDEX) &&
- ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
- ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
+ if (is_dx_dir(inode)) {
err = ext4_dx_readdir(filp, dirent, filldir);
if (err != ERR_BAD_DX_DIR) {
ret = err;
@@ -254,22 +253,134 @@ out:
return ret;
}
+static inline int is_32bit_api(void)
+{
+#ifdef CONFIG_COMPAT
+ return is_compat_task();
+#else
+ return (BITS_PER_LONG == 32);
+#endif
+}
+
/*
* These functions convert from the major/minor hash to an f_pos
- * value.
+ * value for dx directories
+ *
+ * Upper layer (for example NFS) should specify FMODE_32BITHASH or
+ * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
+ * directly on both 32-bit and 64-bit nodes, under such case, neither
+ * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
+ */
+static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return major >> 1;
+ else
+ return ((__u64)(major >> 1) << 32) | (__u64)minor;
+}
+
+static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return (pos << 1) & 0xffffffff;
+ else
+ return ((pos >> 32) << 1) & 0xffffffff;
+}
+
+static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return 0;
+ else
+ return pos & 0xffffffff;
+}
+
+/*
+ * Return 32- or 64-bit end-of-file for dx directories
+ */
+static inline loff_t ext4_get_htree_eof(struct file *filp)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return EXT4_HTREE_EOF_32BIT;
+ else
+ return EXT4_HTREE_EOF_64BIT;
+}
+
+
+/*
+ * ext4_dir_llseek() based on generic_file_llseek() to handle both
+ * non-htree and htree directories, where the "offset" is in terms
+ * of the filename hash value instead of the byte offset.
*
- * Currently we only use major hash numer. This is unfortunate, but
- * on 32-bit machines, the same VFS interface is used for lseek and
- * llseek, so if we use the 64 bit offset, then the 32-bit versions of
- * lseek/telldir/seekdir will blow out spectacularly, and from within
- * the ext2 low-level routine, we don't know if we're being called by
- * a 64-bit version of the system call or the 32-bit version of the
- * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
- * cookie. Sigh.
+ * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
+ * will be invalid once the directory was converted into a dx directory
*/
-#define hash2pos(major, minor) (major >> 1)
-#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff)
-#define pos2min_hash(pos) (0)
+loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t ret = -EINVAL;
+ int dx_dir = is_dx_dir(inode);
+
+ mutex_lock(&inode->i_mutex);
+
+ /* NOTE: relative offsets with dx directories might not work
+ * as expected, as it is difficult to figure out the
+ * correct offset between dx hashes */
+
+ switch (origin) {
+ case SEEK_END:
+ if (unlikely(offset > 0))
+ goto out_err; /* not supported for directories */
+
+ /* so only negative offsets are left, does that have a
+ * meaning for directories at all? */
+ if (dx_dir)
+ offset += ext4_get_htree_eof(file);
+ else
+ offset += inode->i_size;
+ break;
+ case SEEK_CUR:
+ /*
+ * Here we special-case the lseek(fd, 0, SEEK_CUR)
+ * position-querying operation. Avoid rewriting the "same"
+ * f_pos value back to the file because a concurrent read(),
+ * write() or lseek() might have altered it
+ */
+ if (offset == 0) {
+ offset = file->f_pos;
+ goto out_ok;
+ }
+
+ offset += file->f_pos;
+ break;
+ }
+
+ if (unlikely(offset < 0))
+ goto out_err;
+
+ if (!dx_dir) {
+ if (offset > inode->i_sb->s_maxbytes)
+ goto out_err;
+ } else if (offset > ext4_get_htree_eof(file))
+ goto out_err;
+
+ /* Special lock needed here? */
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+
+out_ok:
+ ret = offset;
+out_err:
+ mutex_unlock(&inode->i_mutex);
+
+ return ret;
+}
/*
* This structure holds the nodes of the red-black tree used to store
@@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root)
}
-static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos)
+static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
+ loff_t pos)
{
struct dir_private_info *p;
p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
if (!p)
return NULL;
- p->curr_hash = pos2maj_hash(pos);
- p->curr_minor_hash = pos2min_hash(pos);
+ p->curr_hash = pos2maj_hash(filp, pos);
+ p->curr_minor_hash = pos2min_hash(filp, pos);
return p;
}
@@ -425,11 +537,12 @@ static int call_filldir(struct file *filp, void *dirent,
sb = inode->i_sb;
if (!fname) {
- printk(KERN_ERR "EXT4-fs: call_filldir: called with "
- "null fname?!?\n");
+ ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: "
+ "called with null fname?!?", __func__, __LINE__,
+ inode->i_ino, current->comm);
return 0;
}
- curr_pos = hash2pos(fname->hash, fname->minor_hash);
+ curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
while (fname) {
error = filldir(dirent, fname->name,
fname->name_len, curr_pos,
@@ -454,13 +567,13 @@ static int ext4_dx_readdir(struct file *filp,
int ret;
if (!info) {
- info = ext4_htree_create_dir_info(filp->f_pos);
+ info = ext4_htree_create_dir_info(filp, filp->f_pos);
if (!info)
return -ENOMEM;
filp->private_data = info;
}
- if (filp->f_pos == EXT4_HTREE_EOF)
+ if (filp->f_pos == ext4_get_htree_eof(filp))
return 0; /* EOF */
/* Some one has messed with f_pos; reset the world */
@@ -468,8 +581,8 @@ static int ext4_dx_readdir(struct file *filp,
free_rb_tree_fname(&info->root);
info->curr_node = NULL;
info->extra_fname = NULL;
- info->curr_hash = pos2maj_hash(filp->f_pos);
- info->curr_minor_hash = pos2min_hash(filp->f_pos);
+ info->curr_hash = pos2maj_hash(filp, filp->f_pos);
+ info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
}
/*
@@ -501,7 +614,7 @@ static int ext4_dx_readdir(struct file *filp,
if (ret < 0)
return ret;
if (ret == 0) {
- filp->f_pos = EXT4_HTREE_EOF;
+ filp->f_pos = ext4_get_htree_eof(filp);
break;
}
info->curr_node = rb_first(&info->root);
@@ -521,7 +634,7 @@ static int ext4_dx_readdir(struct file *filp,
info->curr_minor_hash = fname->minor_hash;
} else {
if (info->next_hash == ~0) {
- filp->f_pos = EXT4_HTREE_EOF;
+ filp->f_pos = ext4_get_htree_eof(filp);
break;
}
info->curr_hash = info->next_hash;
@@ -540,3 +653,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
return 0;
}
+
+const struct file_operations ext4_dir_operations = {
+ .llseek = ext4_dir_llseek,
+ .read = generic_read_dir,
+ .readdir = ext4_readdir,
+ .unlocked_ioctl = ext4_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ext4_compat_ioctl,
+#endif
+ .fsync = ext4_sync_file,
+ .release = ext4_release_dir,
+};
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 513004fc3d84..ab2594a30f86 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -53,7 +53,7 @@
printk(KERN_DEBUG f, ## a); \
} while (0)
#else
-#define ext4_debug(f, a...) do {} while (0)
+#define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
#define EXT4_ERROR_INODE(inode, fmt, a...) \
@@ -184,6 +184,8 @@ struct mpage_da_data {
#define EXT4_IO_END_UNWRITTEN 0x0001
#define EXT4_IO_END_ERROR 0x0002
#define EXT4_IO_END_QUEUED 0x0004
+#define EXT4_IO_END_DIRECT 0x0008
+#define EXT4_IO_END_IN_FSYNC 0x0010
struct ext4_io_page {
struct page *p_page;
@@ -192,18 +194,25 @@ struct ext4_io_page {
#define MAX_IO_PAGES 128
+/*
+ * For converting uninitialized extents on a work queue.
+ *
+ * 'page' is only used from the writepage() path; 'pages' is only used for
+ * buffered writes; they are used to keep page references until conversion
+ * takes place. For AIO/DIO, neither field is filled in.
+ */
typedef struct ext4_io_end {
struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */
- struct page *page; /* page struct for buffer write */
+ struct page *page; /* for writepage() path */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
struct work_struct work; /* data work queue */
struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */
- int num_io_pages;
- struct ext4_io_page *pages[MAX_IO_PAGES];
+ int num_io_pages; /* for writepages() */
+ struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */
} ext4_io_end_t;
struct ext4_io_submit {
@@ -923,6 +932,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
+#define EXT4_MOUNT_ERRORS_MASK 0x00070
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
@@ -941,7 +951,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
-#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
@@ -1142,6 +1151,7 @@ struct ext4_sb_info {
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
unsigned int s_mount_flags;
+ unsigned int s_def_mount_opt;
ext4_fsblk_t s_sb_block;
uid_t s_resuid;
gid_t s_resgid;
@@ -1420,8 +1430,9 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
-#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */
+#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
+#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x8000 /* data in inode */
#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1612,7 +1623,11 @@ struct dx_hash_info
u32 *seed;
};
-#define EXT4_HTREE_EOF 0x7fffffff
+
+/* 32 and 64 bit signed EOF for dx directories */
+#define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
+#define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
+
/*
* Control parameters used by ext4_htree_next_block
@@ -1794,8 +1809,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
ext4_group_t block_group,
struct buffer_head ** bh);
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
-struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
- ext4_group_t block_group);
+
+extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
+ ext4_group_t block_group);
+extern int ext4_wait_block_bitmap(struct super_block *sb,
+ ext4_group_t block_group,
+ struct buffer_head *bh);
+extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
+ ext4_group_t block_group);
extern void ext4_init_block_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t group,
@@ -1841,6 +1862,7 @@ extern void ext4_check_inodes_bitmap(struct super_block *);
extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
extern int ext4_init_inode_table(struct super_block *sb,
ext4_group_t group, int barrier);
+extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
/* mballoc.c */
extern long ext4_mb_stats;
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index a52db3a69a30..0f58b86e3a02 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -47,9 +47,9 @@
*/
#define EXT_DEBUG__
#ifdef EXT_DEBUG
-#define ext_debug(a...) printk(a)
+#define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
#else
-#define ext_debug(a...)
+#define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
/*
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 5802fa1dab18..83b20fcf9400 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -104,6 +104,78 @@
#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+/**
+ * struct ext4_journal_cb_entry - Base structure for callback information.
+ *
+ * This struct is a 'seed' structure for a using with your own callback
+ * structs. If you are using callbacks you must allocate one of these
+ * or another struct of your own definition which has this struct
+ * as it's first element and pass it to ext4_journal_callback_add().
+ */
+struct ext4_journal_cb_entry {
+ /* list information for other callbacks attached to the same handle */
+ struct list_head jce_list;
+
+ /* Function to call with this callback structure */
+ void (*jce_func)(struct super_block *sb,
+ struct ext4_journal_cb_entry *jce, int error);
+
+ /* user data goes here */
+};
+
+/**
+ * ext4_journal_callback_add: add a function to call after transaction commit
+ * @handle: active journal transaction handle to register callback on
+ * @func: callback function to call after the transaction has committed:
+ * @sb: superblock of current filesystem for transaction
+ * @jce: returned journal callback data
+ * @rc: journal state at commit (0 = transaction committed properly)
+ * @jce: journal callback data (internal and function private data struct)
+ *
+ * The registered function will be called in the context of the journal thread
+ * after the transaction for which the handle was created has completed.
+ *
+ * No locks are held when the callback function is called, so it is safe to
+ * call blocking functions from within the callback, but the callback should
+ * not block or run for too long, or the filesystem will be blocked waiting for
+ * the next transaction to commit. No journaling functions can be used, or
+ * there is a risk of deadlock.
+ *
+ * There is no guaranteed calling order of multiple registered callbacks on
+ * the same transaction.
+ */
+static inline void ext4_journal_callback_add(handle_t *handle,
+ void (*func)(struct super_block *sb,
+ struct ext4_journal_cb_entry *jce,
+ int rc),
+ struct ext4_journal_cb_entry *jce)
+{
+ struct ext4_sb_info *sbi =
+ EXT4_SB(handle->h_transaction->t_journal->j_private);
+
+ /* Add the jce to transaction's private list */
+ jce->jce_func = func;
+ spin_lock(&sbi->s_md_lock);
+ list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
+ spin_unlock(&sbi->s_md_lock);
+}
+
+/**
+ * ext4_journal_callback_del: delete a registered callback
+ * @handle: active journal transaction handle on which callback was registered
+ * @jce: registered journal callback entry to unregister
+ */
+static inline void ext4_journal_callback_del(handle_t *handle,
+ struct ext4_journal_cb_entry *jce)
+{
+ struct ext4_sb_info *sbi =
+ EXT4_SB(handle->h_transaction->t_journal->j_private);
+
+ spin_lock(&sbi->s_md_lock);
+ list_del_init(&jce->jce_list);
+ spin_unlock(&sbi->s_md_lock);
+}
+
int
ext4_mark_iloc_dirty(handle_t *handle,
struct inode *inode,
@@ -261,43 +333,45 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle,
/* super.c */
int ext4_force_commit(struct super_block *sb);
-static inline int ext4_should_journal_data(struct inode *inode)
+/*
+ * Ext4 inode journal modes
+ */
+#define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */
+#define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */
+#define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */
+
+static inline int ext4_inode_journal_mode(struct inode *inode)
{
if (EXT4_JOURNAL(inode) == NULL)
- return 0;
- if (!S_ISREG(inode->i_mode))
- return 1;
- if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
- return 1;
- if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
- return 1;
- return 0;
+ return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
+ /* We do not support data journalling with delayed allocation */
+ if (!S_ISREG(inode->i_mode) ||
+ test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+ return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
+ if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
+ !test_opt(inode->i_sb, DELALLOC))
+ return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+ return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
+ return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
+ else
+ BUG();
+}
+
+static inline int ext4_should_journal_data(struct inode *inode)
+{
+ return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE;
}
static inline int ext4_should_order_data(struct inode *inode)
{
- if (EXT4_JOURNAL(inode) == NULL)
- return 0;
- if (!S_ISREG(inode->i_mode))
- return 0;
- if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
- return 0;
- if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
- return 1;
- return 0;
+ return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE;
}
static inline int ext4_should_writeback_data(struct inode *inode)
{
- if (EXT4_JOURNAL(inode) == NULL)
- return 1;
- if (!S_ISREG(inode->i_mode))
- return 0;
- if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
- return 0;
- if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
- return 1;
- return 0;
+ return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
}
/*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74f23c292e1b..1421938e6792 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,6 +44,14 @@
#include <trace/events/ext4.h>
+/*
+ * used by extent splitting.
+ */
+#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
+ due to ENOSPC */
+#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
+#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
+
static int ext4_split_extent(handle_t *handle,
struct inode *inode,
struct ext4_ext_path *path,
@@ -51,6 +59,13 @@ static int ext4_split_extent(handle_t *handle,
int split_flag,
int flags);
+static int ext4_split_extent_at(handle_t *handle,
+ struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t split,
+ int split_flag,
+ int flags);
+
static int ext4_ext_truncate_extend_restart(handle_t *handle,
struct inode *inode,
int needed)
@@ -300,6 +315,8 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
+ if (len == 0)
+ return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -2308,7 +2325,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
struct ext4_extent *ex;
/* the header must be checked already in ext4_ext_remove_space() */
- ext_debug("truncate since %u in leaf\n", start);
+ ext_debug("truncate since %u in leaf to %u\n", start, end);
if (!path[depth].p_hdr)
path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
eh = path[depth].p_hdr;
@@ -2343,14 +2360,17 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ext_debug(" border %u:%u\n", a, b);
/* If this extent is beyond the end of the hole, skip it */
- if (end <= ex_ee_block) {
+ if (end < ex_ee_block) {
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
continue;
} else if (b != ex_ee_block + ex_ee_len - 1) {
- EXT4_ERROR_INODE(inode," bad truncate %u:%u\n",
- start, end);
+ EXT4_ERROR_INODE(inode,
+ "can not handle truncate %u:%u "
+ "on extent %u:%u",
+ start, end, ex_ee_block,
+ ex_ee_block + ex_ee_len - 1);
err = -EIO;
goto out;
} else if (a != ex_ee_block) {
@@ -2482,7 +2502,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
return 1;
}
-static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
+static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
+ ext4_lblk_t end)
{
struct super_block *sb = inode->i_sb;
int depth = ext_depth(inode);
@@ -2491,7 +2512,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
handle_t *handle;
int i, err;
- ext_debug("truncate since %u\n", start);
+ ext_debug("truncate since %u to %u\n", start, end);
/* probably first extent we're gonna free will be last in block */
handle = ext4_journal_start(inode, depth + 1);
@@ -2504,6 +2525,61 @@ again:
trace_ext4_ext_remove_space(inode, start, depth);
/*
+ * Check if we are removing extents inside the extent tree. If that
+ * is the case, we are going to punch a hole inside the extent tree
+ * so we have to check whether we need to split the extent covering
+ * the last block to remove so we can easily remove the part of it
+ * in ext4_ext_rm_leaf().
+ */
+ if (end < EXT_MAX_BLOCKS - 1) {
+ struct ext4_extent *ex;
+ ext4_lblk_t ee_block;
+
+ /* find extent for this block */
+ path = ext4_ext_find_extent(inode, end, NULL);
+ if (IS_ERR(path)) {
+ ext4_journal_stop(handle);
+ return PTR_ERR(path);
+ }
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ if (!ex)
+ goto cont;
+
+ ee_block = le32_to_cpu(ex->ee_block);
+
+ /*
+ * See if the last block is inside the extent, if so split
+ * the extent at 'end' block so we can easily remove the
+ * tail of the first part of the split extent in
+ * ext4_ext_rm_leaf().
+ */
+ if (end >= ee_block &&
+ end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
+ int split_flag = 0;
+
+ if (ext4_ext_is_uninitialized(ex))
+ split_flag = EXT4_EXT_MARK_UNINIT1 |
+ EXT4_EXT_MARK_UNINIT2;
+
+ /*
+ * Split the extent in two so that 'end' is the last
+ * block in the first new extent
+ */
+ err = ext4_split_extent_at(handle, inode, path,
+ end + 1, split_flag,
+ EXT4_GET_BLOCKS_PRE_IO |
+ EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
+
+ if (err < 0)
+ goto out;
+ }
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ }
+cont:
+
+ /*
* We start scanning from right side, freeing all the blocks
* after i_size and walking into the tree depth-wise.
*/
@@ -2515,6 +2591,7 @@ again:
}
path[0].p_depth = depth;
path[0].p_hdr = ext_inode_hdr(inode);
+
if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
err = -EIO;
goto out;
@@ -2526,7 +2603,7 @@ again:
/* this is leaf block */
err = ext4_ext_rm_leaf(handle, inode, path,
&partial_cluster, start,
- EXT_MAX_BLOCKS - 1);
+ end);
/* root level has p_bh == NULL, brelse() eats this */
brelse(path[i].p_bh);
path[i].p_bh = NULL;
@@ -2651,17 +2728,17 @@ void ext4_ext_init(struct super_block *sb)
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
#if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
- printk(KERN_INFO "EXT4-fs: file extents enabled");
+ printk(KERN_INFO "EXT4-fs: file extents enabled"
#ifdef AGGRESSIVE_TEST
- printk(", aggressive tests");
+ ", aggressive tests"
#endif
#ifdef CHECK_BINSEARCH
- printk(", check binsearch");
+ ", check binsearch"
#endif
#ifdef EXTENTS_STATS
- printk(", stats");
+ ", stats"
#endif
- printk("\n");
+ "\n");
#endif
#ifdef EXTENTS_STATS
spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
@@ -2709,14 +2786,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
}
/*
- * used by extent splitting.
- */
-#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
- due to ENOSPC */
-#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
-#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
-
-/*
* ext4_split_extent_at() splits an extent at given block.
*
* @handle: the journal handle
@@ -3224,11 +3293,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
depth = ext_depth(inode);
eh = path[depth].p_hdr;
- if (unlikely(!eh->eh_entries)) {
- EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
- "EOFBLOCKS_FL set");
- return -EIO;
- }
+ /*
+ * We're going to remove EOFBLOCKS_FL entirely in future so we
+ * do not care for this case anymore. Simply remove the flag
+ * if there are no extents.
+ */
+ if (unlikely(!eh->eh_entries))
+ goto out;
last_ex = EXT_LAST_EXTENT(eh);
/*
* We should clear the EOFBLOCKS_FL flag if we are writing the
@@ -3252,6 +3323,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
for (i = depth-1; i >= 0; i--)
if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
return 0;
+out:
ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
return ext4_mark_inode_dirty(handle, inode);
}
@@ -3710,8 +3782,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
int free_on_err = 0, err = 0, depth, ret;
unsigned int allocated = 0, offset = 0;
unsigned int allocated_clusters = 0;
- unsigned int punched_out = 0;
- unsigned int result = 0;
struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
ext4_lblk_t cluster_offset;
@@ -3721,8 +3791,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
/* check in cache */
- if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
- ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
+ if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
if (!newex.ee_start_lo && !newex.ee_start_hi) {
if ((sbi->s_cluster_ratio > 1) &&
ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
@@ -3790,113 +3859,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
/* if found extent covers block, simply return it */
if (in_range(map->m_lblk, ee_block, ee_len)) {
- struct ext4_map_blocks punch_map;
- ext4_fsblk_t partial_cluster = 0;
-
newblock = map->m_lblk - ee_block + ee_start;
/* number of remaining blocks in the extent */
allocated = ee_len - (map->m_lblk - ee_block);
ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
ee_block, ee_len, newblock);
- if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) {
- /*
- * Do not put uninitialized extent
- * in the cache
- */
- if (!ext4_ext_is_uninitialized(ex)) {
- ext4_ext_put_in_cache(inode, ee_block,
- ee_len, ee_start);
- goto out;
- }
- ret = ext4_ext_handle_uninitialized_extents(
- handle, inode, map, path, flags,
- allocated, newblock);
- return ret;
- }
-
- /*
- * Punch out the map length, but only to the
- * end of the extent
- */
- punched_out = allocated < map->m_len ?
- allocated : map->m_len;
-
/*
- * Sense extents need to be converted to
- * uninitialized, they must fit in an
- * uninitialized extent
+ * Do not put uninitialized extent
+ * in the cache
*/
- if (punched_out > EXT_UNINIT_MAX_LEN)
- punched_out = EXT_UNINIT_MAX_LEN;
-
- punch_map.m_lblk = map->m_lblk;
- punch_map.m_pblk = newblock;
- punch_map.m_len = punched_out;
- punch_map.m_flags = 0;
-
- /* Check to see if the extent needs to be split */
- if (punch_map.m_len != ee_len ||
- punch_map.m_lblk != ee_block) {
-
- ret = ext4_split_extent(handle, inode,
- path, &punch_map, 0,
- EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
- EXT4_GET_BLOCKS_PRE_IO);
-
- if (ret < 0) {
- err = ret;
- goto out2;
- }
- /*
- * find extent for the block at
- * the start of the hole
- */
- ext4_ext_drop_refs(path);
- kfree(path);
-
- path = ext4_ext_find_extent(inode,
- map->m_lblk, NULL);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- path = NULL;
- goto out2;
- }
-
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- ee_len = ext4_ext_get_actual_len(ex);
- ee_block = le32_to_cpu(ex->ee_block);
- ee_start = ext4_ext_pblock(ex);
-
- }
-
- ext4_ext_mark_uninitialized(ex);
-
- ext4_ext_invalidate_cache(inode);
-
- err = ext4_ext_rm_leaf(handle, inode, path,
- &partial_cluster, map->m_lblk,
- map->m_lblk + punched_out);
-
- if (!err && path->p_hdr->eh_entries == 0) {
- /*
- * Punch hole freed all of this sub tree,
- * so we need to correct eh_depth
- */
- err = ext4_ext_get_access(handle, inode, path);
- if (err == 0) {
- ext_inode_hdr(inode)->eh_depth = 0;
- ext_inode_hdr(inode)->eh_max =
- cpu_to_le16(ext4_ext_space_root(
- inode, 0));
-
- err = ext4_ext_dirty(
- handle, inode, path);
- }
+ if (!ext4_ext_is_uninitialized(ex)) {
+ ext4_ext_put_in_cache(inode, ee_block,
+ ee_len, ee_start);
+ goto out;
}
-
- goto out2;
+ ret = ext4_ext_handle_uninitialized_extents(
+ handle, inode, map, path, flags,
+ allocated, newblock);
+ return ret;
}
}
@@ -4165,13 +4146,11 @@ out2:
ext4_ext_drop_refs(path);
kfree(path);
}
- result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
- punched_out : allocated;
trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
- newblock, map->m_len, err ? err : result);
+ newblock, map->m_len, err ? err : allocated);
- return err ? err : result;
+ return err ? err : allocated;
}
void ext4_ext_truncate(struct inode *inode)
@@ -4228,7 +4207,7 @@ void ext4_ext_truncate(struct inode *inode)
last_block = (inode->i_size + sb->s_blocksize - 1)
>> EXT4_BLOCK_SIZE_BITS(sb);
- err = ext4_ext_remove_space(inode, last_block);
+ err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
/* In a multi-transaction truncate, we only make the final
* transaction synchronous.
@@ -4436,10 +4415,11 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
EXT4_GET_BLOCKS_IO_CONVERT_EXT);
if (ret <= 0) {
WARN_ON(ret <= 0);
- printk(KERN_ERR "%s: ext4_ext_map_blocks "
- "returned error inode#%lu, block=%u, "
- "max_blocks=%u", __func__,
- inode->i_ino, map.m_lblk, map.m_len);
+ ext4_msg(inode->i_sb, KERN_ERR,
+ "%s:%d: inode #%lu: block %u: len %u: "
+ "ext4_ext_map_blocks returned %d",
+ __func__, __LINE__, inode->i_ino, map.m_lblk,
+ map.m_len, ret);
}
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
@@ -4705,14 +4685,12 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct super_block *sb = inode->i_sb;
- struct ext4_ext_cache cache_ex;
- ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
+ ext4_lblk_t first_block, stop_block;
struct address_space *mapping = inode->i_mapping;
- struct ext4_map_blocks map;
handle_t *handle;
loff_t first_page, last_page, page_len;
loff_t first_page_offset, last_page_offset;
- int ret, credits, blocks_released, err = 0;
+ int credits, err = 0;
/* No need to punch hole beyond i_size */
if (offset >= inode->i_size)
@@ -4728,10 +4706,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
offset;
}
- first_block = (offset + sb->s_blocksize - 1) >>
- EXT4_BLOCK_SIZE_BITS(sb);
- last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
-
first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
last_page = (offset + length) >> PAGE_CACHE_SHIFT;
@@ -4810,7 +4784,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
}
}
-
/*
* If i_size is contained in the last page, we need to
* unmap and zero the partial page after i_size
@@ -4830,73 +4803,22 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
}
}
+ first_block = (offset + sb->s_blocksize - 1) >>
+ EXT4_BLOCK_SIZE_BITS(sb);
+ stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
+
/* If there are no blocks to remove, return now */
- if (first_block >= last_block)
+ if (first_block >= stop_block)
goto out;
down_write(&EXT4_I(inode)->i_data_sem);
ext4_ext_invalidate_cache(inode);
ext4_discard_preallocations(inode);
- /*
- * Loop over all the blocks and identify blocks
- * that need to be punched out
- */
- iblock = first_block;
- blocks_released = 0;
- while (iblock < last_block) {
- max_blocks = last_block - iblock;
- num_blocks = 1;
- memset(&map, 0, sizeof(map));
- map.m_lblk = iblock;
- map.m_len = max_blocks;
- ret = ext4_ext_map_blocks(handle, inode, &map,
- EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
-
- if (ret > 0) {
- blocks_released += ret;
- num_blocks = ret;
- } else if (ret == 0) {
- /*
- * If map blocks could not find the block,
- * then it is in a hole. If the hole was
- * not already cached, then map blocks should
- * put it in the cache. So we can get the hole
- * out of the cache
- */
- memset(&cache_ex, 0, sizeof(cache_ex));
- if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) &&
- !cache_ex.ec_start) {
-
- /* The hole is cached */
- num_blocks = cache_ex.ec_block +
- cache_ex.ec_len - iblock;
-
- } else {
- /* The block could not be identified */
- err = -EIO;
- break;
- }
- } else {
- /* Map blocks error */
- err = ret;
- break;
- }
-
- if (num_blocks == 0) {
- /* This condition should never happen */
- ext_debug("Block lookup failed");
- err = -EIO;
- break;
- }
-
- iblock += num_blocks;
- }
+ err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
- if (blocks_released > 0) {
- ext4_ext_invalidate_cache(inode);
- ext4_discard_preallocations(inode);
- }
+ ext4_ext_invalidate_cache(inode);
+ ext4_discard_preallocations(inode);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 00a2cb753efd..bb6c7d811313 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -89,6 +89,7 @@ int ext4_flush_completed_IO(struct inode *inode)
io = list_entry(ei->i_completed_io_list.next,
ext4_io_end_t, list);
list_del_init(&io->list);
+ io->flag |= EXT4_IO_END_IN_FSYNC;
/*
* Calling ext4_end_io_nolock() to convert completed
* IO to written.
@@ -108,6 +109,7 @@ int ext4_flush_completed_IO(struct inode *inode)
if (ret < 0)
ret2 = ret;
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+ io->flag &= ~EXT4_IO_END_IN_FSYNC;
}
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
return (ret2 < 0) ? ret2 : 0;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index ac8f168c8ab4..fa8e4911d354 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -200,8 +200,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
return -1;
}
hash = hash & ~1;
- if (hash == (EXT4_HTREE_EOF << 1))
- hash = (EXT4_HTREE_EOF-1) << 1;
+ if (hash == (EXT4_HTREE_EOF_32BIT << 1))
+ hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
hinfo->hash = hash;
hinfo->minor_hash = minor_hash;
return 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 25d8c9781ad9..409c2ee7750a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
return EXT4_INODES_PER_GROUP(sb);
}
+void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
+{
+ if (uptodate) {
+ set_buffer_uptodate(bh);
+ set_bitmap_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ put_bh(bh);
+}
+
/*
* Read the inode allocation bitmap for a given block_group, reading
* into the specified slot in the superblock's bitmap cache.
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
return bh;
}
/*
- * submit the buffer_head for read. We can
- * safely mark the bitmap as uptodate now.
- * We do it here so the bitmap uptodate bit
- * get set with buffer lock held.
+ * submit the buffer_head for reading
*/
trace_ext4_load_inode_bitmap(sb, block_group);
- set_bitmap_uptodate(bh);
- if (bh_submit_read(bh) < 0) {
+ bh->b_end_io = ext4_end_bitmap_read;
+ get_bh(bh);
+ submit_bh(READ, bh);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh)) {
put_bh(bh);
ext4_error(sb, "Cannot read inode bitmap - "
- "block_group = %u, inode_bitmap = %llu",
- block_group, bitmap_blk);
+ "block_group = %u, inode_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
return bh;
@@ -194,19 +204,20 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
struct ext4_sb_info *sbi;
int fatal = 0, err, count, cleared;
- if (atomic_read(&inode->i_count) > 1) {
- printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
- atomic_read(&inode->i_count));
+ if (!sb) {
+ printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
+ "nonexistent device\n", __func__, __LINE__);
return;
}
- if (inode->i_nlink) {
- printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
- inode->i_nlink);
+ if (atomic_read(&inode->i_count) > 1) {
+ ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
+ __func__, __LINE__, inode->i_ino,
+ atomic_read(&inode->i_count));
return;
}
- if (!sb) {
- printk(KERN_ERR "ext4_free_inode: inode on "
- "nonexistent device\n");
+ if (inode->i_nlink) {
+ ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n",
+ __func__, __LINE__, inode->i_ino, inode->i_nlink);
return;
}
sbi = EXT4_SB(sb);
@@ -593,94 +604,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
}
/*
- * claim the inode from the inode bitmap. If the group
- * is uninit we need to take the groups's ext4_group_lock
- * and clear the uninit flag. The inode bitmap update
- * and group desc uninit flag clear should be done
- * after holding ext4_group_lock so that ext4_read_inode_bitmap
- * doesn't race with the ext4_claim_inode
- */
-static int ext4_claim_inode(struct super_block *sb,
- struct buffer_head *inode_bitmap_bh,
- unsigned long ino, ext4_group_t group, umode_t mode)
-{
- int free = 0, retval = 0, count;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
- struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
-
- /*
- * We have to be sure that new inode allocation does not race with
- * inode table initialization, because otherwise we may end up
- * allocating and writing new inode right before sb_issue_zeroout
- * takes place and overwriting our new inode with zeroes. So we
- * take alloc_sem to prevent it.
- */
- down_read(&grp->alloc_sem);
- ext4_lock_group(sb, group);
- if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
- /* not a free inode */
- retval = 1;
- goto err_ret;
- }
- ino++;
- if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
- ino > EXT4_INODES_PER_GROUP(sb)) {
- ext4_unlock_group(sb, group);
- up_read(&grp->alloc_sem);
- ext4_error(sb, "reserved inode or inode > inodes count - "
- "block_group = %u, inode=%lu", group,
- ino + group * EXT4_INODES_PER_GROUP(sb));
- return 1;
- }
- /* If we didn't allocate from within the initialized part of the inode
- * table then we need to initialize up to this inode. */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
-
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
- /* When marking the block group with
- * ~EXT4_BG_INODE_UNINIT we don't want to depend
- * on the value of bg_itable_unused even though
- * mke2fs could have initialized the same for us.
- * Instead we calculated the value below
- */
-
- free = 0;
- } else {
- free = EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp);
- }
-
- /*
- * Check the relative inode number against the last used
- * relative inode number in this group. if it is greater
- * we need to update the bg_itable_unused count
- *
- */
- if (ino > free)
- ext4_itable_unused_set(sb, gdp,
- (EXT4_INODES_PER_GROUP(sb) - ino));
- }
- count = ext4_free_inodes_count(sb, gdp) - 1;
- ext4_free_inodes_set(sb, gdp, count);
- if (S_ISDIR(mode)) {
- count = ext4_used_dirs_count(sb, gdp) + 1;
- ext4_used_dirs_set(sb, gdp, count);
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t f = ext4_flex_group(sbi, group);
-
- atomic_inc(&sbi->s_flex_groups[f].used_dirs);
- }
- }
- gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-err_ret:
- ext4_unlock_group(sb, group);
- up_read(&grp->alloc_sem);
- return retval;
-}
-
-/*
* There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both
* free space and a low directory-to-inode ratio; if that fails, then of
@@ -741,6 +664,11 @@ got_group:
if (ret2 == -1)
goto out;
+ /*
+ * Normally we will only go through one pass of this loop,
+ * unless we get unlucky and it turns out the group we selected
+ * had its last inode grabbed by someone else.
+ */
for (i = 0; i < ngroups; i++, ino = 0) {
err = -EIO;
@@ -757,51 +685,24 @@ repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *)
inode_bitmap_bh->b_data,
EXT4_INODES_PER_GROUP(sb), ino);
-
- if (ino < EXT4_INODES_PER_GROUP(sb)) {
-
- BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle,
- inode_bitmap_bh);
- if (err)
- goto fail;
-
- BUFFER_TRACE(group_desc_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle,
- group_desc_bh);
- if (err)
- goto fail;
- if (!ext4_claim_inode(sb, inode_bitmap_bh,
- ino, group, mode)) {
- /* we won it */
- BUFFER_TRACE(inode_bitmap_bh,
- "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle,
- NULL,
- inode_bitmap_bh);
- if (err)
- goto fail;
- /* zero bit is inode number 1*/
- ino++;
- goto got;
- }
- /* we lost it */
- ext4_handle_release_buffer(handle, inode_bitmap_bh);
- ext4_handle_release_buffer(handle, group_desc_bh);
-
- if (++ino < EXT4_INODES_PER_GROUP(sb))
- goto repeat_in_this_group;
+ if (ino >= EXT4_INODES_PER_GROUP(sb)) {
+ if (++group == ngroups)
+ group = 0;
+ continue;
}
-
- /*
- * This case is possible in concurrent environment. It is very
- * rare. We cannot repeat the find_group_xxx() call because
- * that will simply return the same blockgroup, because the
- * group descriptor metadata has not yet been updated.
- * So we just go onto the next blockgroup.
- */
- if (++group == ngroups)
- group = 0;
+ if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
+ ext4_error(sb, "reserved inode found cleared - "
+ "inode=%lu", ino + 1);
+ continue;
+ }
+ ext4_lock_group(sb, group);
+ ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
+ ext4_unlock_group(sb, group);
+ ino++; /* the inode bitmap is zero-based */
+ if (!ret2)
+ goto got; /* we grabbed the inode! */
+ if (ino < EXT4_INODES_PER_GROUP(sb))
+ goto repeat_in_this_group;
}
err = -ENOSPC;
goto out;
@@ -838,6 +739,59 @@ got:
if (err)
goto fail;
}
+
+ BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+ if (err)
+ goto fail;
+
+ BUFFER_TRACE(group_desc_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, group_desc_bh);
+ if (err)
+ goto fail;
+
+ /* Update the relevant bg descriptor fields */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ int free;
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+
+ down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
+ ext4_lock_group(sb, group); /* while we modify the bg desc */
+ free = EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp);
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+ free = 0;
+ }
+ /*
+ * Check the relative inode number against the last used
+ * relative inode number in this group. if it is greater
+ * we need to update the bg_itable_unused count
+ */
+ if (ino > free)
+ ext4_itable_unused_set(sb, gdp,
+ (EXT4_INODES_PER_GROUP(sb) - ino));
+ up_read(&grp->alloc_sem);
+ }
+ ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
+ if (S_ISDIR(mode)) {
+ ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t f = ext4_flex_group(sbi, group);
+
+ atomic_inc(&sbi->s_flex_groups[f].used_dirs);
+ }
+ }
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+ ext4_unlock_group(sb, group);
+ }
+
+ BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
+ if (err)
+ goto fail;
+
BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
if (err)
@@ -1101,7 +1055,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
* where it is called from on active part of filesystem is ext4lazyinit
* thread, so we do not need any special locks, however we have to prevent
* inode allocation from the current group, so we take alloc_sem lock, to
- * block ext4_claim_inode until we are finished.
+ * block ext4_new_inode() until we are finished.
*/
int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
int barrier)
@@ -1149,9 +1103,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
sbi->s_inodes_per_block);
if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
- ext4_error(sb, "Something is wrong with group %u\n"
- "Used itable blocks: %d"
- "itable unused count: %u\n",
+ ext4_error(sb, "Something is wrong with group %u: "
+ "used itable blocks: %d; "
+ "itable unused count: %u",
group, used_blks,
ext4_itable_unused_count(sb, gdp));
ret = 1;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index feaa82fe629d..c77b0bd2c711 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -272,7 +272,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
trace_ext4_da_update_reserve_space(inode, used, quota_claim);
if (unlikely(used > ei->i_reserved_data_blocks)) {
ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
- "with only %d reserved data blocks\n",
+ "with only %d reserved data blocks",
__func__, inode->i_ino, used,
ei->i_reserved_data_blocks);
WARN_ON(1);
@@ -1165,7 +1165,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
*/
ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
"ino %lu, to_free %d with only %d reserved "
- "data blocks\n", inode->i_ino, to_free,
+ "data blocks", inode->i_ino, to_free,
ei->i_reserved_data_blocks);
WARN_ON(1);
to_free = ei->i_reserved_data_blocks;
@@ -1428,20 +1428,22 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
static void ext4_print_free_blocks(struct inode *inode)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- printk(KERN_CRIT "Total free blocks count %lld\n",
+ struct super_block *sb = inode->i_sb;
+
+ ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
EXT4_C2B(EXT4_SB(inode->i_sb),
ext4_count_free_clusters(inode->i_sb)));
- printk(KERN_CRIT "Free/Dirty block details\n");
- printk(KERN_CRIT "free_blocks=%lld\n",
+ ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
+ ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
(long long) EXT4_C2B(EXT4_SB(inode->i_sb),
percpu_counter_sum(&sbi->s_freeclusters_counter)));
- printk(KERN_CRIT "dirty_blocks=%lld\n",
+ ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
(long long) EXT4_C2B(EXT4_SB(inode->i_sb),
percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
- printk(KERN_CRIT "Block reservation details\n");
- printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
- EXT4_I(inode)->i_reserved_data_blocks);
- printk(KERN_CRIT "i_reserved_meta_blocks=%u\n",
+ ext4_msg(sb, KERN_CRIT, "Block reservation details");
+ ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
+ EXT4_I(inode)->i_reserved_data_blocks);
+ ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
EXT4_I(inode)->i_reserved_meta_blocks);
return;
}
@@ -2482,13 +2484,14 @@ static int ext4_da_write_end(struct file *file,
int write_mode = (int)(unsigned long)fsdata;
if (write_mode == FALL_BACK_TO_NONDELALLOC) {
- if (ext4_should_order_data(inode)) {
+ switch (ext4_inode_journal_mode(inode)) {
+ case EXT4_INODE_ORDERED_DATA_MODE:
return ext4_ordered_write_end(file, mapping, pos,
len, copied, page, fsdata);
- } else if (ext4_should_writeback_data(inode)) {
+ case EXT4_INODE_WRITEBACK_DATA_MODE:
return ext4_writeback_write_end(file, mapping, pos,
len, copied, page, fsdata);
- } else {
+ default:
BUG();
}
}
@@ -2763,7 +2766,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
goto out;
ext_debug("ext4_end_io_dio(): io_end 0x%p "
- "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
+ "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
iocb->private, io_end->inode->i_ino, iocb, offset,
size);
@@ -2795,9 +2798,6 @@ out:
/* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work);
-
- /* XXX: probably should move into the real I/O completion handler */
- inode_dio_done(inode);
}
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
@@ -2811,8 +2811,9 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
goto out;
if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
- printk("sb umounted, discard end_io request for inode %lu\n",
- io_end->inode->i_ino);
+ ext4_msg(io_end->inode->i_sb, KERN_INFO,
+ "sb umounted, discard end_io request for inode %lu",
+ io_end->inode->i_ino);
ext4_free_io_end(io_end);
goto out;
}
@@ -2921,9 +2922,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
iocb->private = NULL;
EXT4_I(inode)->cur_aio_dio = NULL;
if (!is_sync_kiocb(iocb)) {
- iocb->private = ext4_init_io_end(inode, GFP_NOFS);
- if (!iocb->private)
+ ext4_io_end_t *io_end =
+ ext4_init_io_end(inode, GFP_NOFS);
+ if (!io_end)
return -ENOMEM;
+ io_end->flag |= EXT4_IO_END_DIRECT;
+ iocb->private = io_end;
/*
* we save the io structure for current async
* direct IO, so that later ext4_map_blocks()
@@ -2940,7 +2944,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
ext4_get_block_write,
ext4_end_io_dio,
NULL,
- DIO_LOCKING | DIO_SKIP_HOLES);
+ DIO_LOCKING);
if (iocb->private)
EXT4_I(inode)->cur_aio_dio = NULL;
/*
@@ -3086,18 +3090,25 @@ static const struct address_space_operations ext4_da_aops = {
void ext4_set_aops(struct inode *inode)
{
- if (ext4_should_order_data(inode) &&
- test_opt(inode->i_sb, DELALLOC))
- inode->i_mapping->a_ops = &ext4_da_aops;
- else if (ext4_should_order_data(inode))
- inode->i_mapping->a_ops = &ext4_ordered_aops;
- else if (ext4_should_writeback_data(inode) &&
- test_opt(inode->i_sb, DELALLOC))
- inode->i_mapping->a_ops = &ext4_da_aops;
- else if (ext4_should_writeback_data(inode))
- inode->i_mapping->a_ops = &ext4_writeback_aops;
- else
+ switch (ext4_inode_journal_mode(inode)) {
+ case EXT4_INODE_ORDERED_DATA_MODE:
+ if (test_opt(inode->i_sb, DELALLOC))
+ inode->i_mapping->a_ops = &ext4_da_aops;
+ else
+ inode->i_mapping->a_ops = &ext4_ordered_aops;
+ break;
+ case EXT4_INODE_WRITEBACK_DATA_MODE:
+ if (test_opt(inode->i_sb, DELALLOC))
+ inode->i_mapping->a_ops = &ext4_da_aops;
+ else
+ inode->i_mapping->a_ops = &ext4_writeback_aops;
+ break;
+ case EXT4_INODE_JOURNAL_DATA_MODE:
inode->i_mapping->a_ops = &ext4_journalled_aops;
+ break;
+ default:
+ BUG();
+ }
}
@@ -3329,16 +3340,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
{
struct inode *inode = file->f_path.dentry->d_inode;
if (!S_ISREG(inode->i_mode))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
/* TODO: Add support for non extent hole punching */
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
/* TODO: Add support for bigalloc file systems */
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
return ext4_ext_punch_hole(file, offset, length);
@@ -3924,10 +3935,8 @@ static int ext4_do_update_inode(handle_t *handle,
ext4_update_dynamic_rev(sb);
EXT4_SET_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
- sb->s_dirt = 1;
ext4_handle_sync(handle);
- err = ext4_handle_dirty_metadata(handle, NULL,
- EXT4_SB(sb)->s_sbh);
+ err = ext4_handle_dirty_super(handle, sb);
}
}
raw_inode->i_generation = cpu_to_le32(inode->i_generation);
@@ -4152,11 +4161,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size != i_size_read(inode)) {
+ if (attr->ia_size != i_size_read(inode))
truncate_setsize(inode, attr->ia_size);
- ext4_truncate(inode);
- } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
- ext4_truncate(inode);
+ ext4_truncate(inode);
}
if (!rc) {
@@ -4314,7 +4321,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
{
int err = 0;
- if (test_opt(inode->i_sb, I_VERSION))
+ if (IS_I_VERSION(inode))
inode_inc_iversion(inode);
/* the do_update_inode consumes one bh->b_count */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index cb990b21c698..99ab428bcfa0 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,6 +21,7 @@
* mballoc.c contains the multiblocks allocation routines
*/
+#include "ext4_jbd2.h"
#include "mballoc.h"
#include <linux/debugfs.h>
#include <linux/slab.h>
@@ -339,7 +340,7 @@
*/
static struct kmem_cache *ext4_pspace_cachep;
static struct kmem_cache *ext4_ac_cachep;
-static struct kmem_cache *ext4_free_ext_cachep;
+static struct kmem_cache *ext4_free_data_cachep;
/* We create slab caches for groupinfo data structures based on the
* superblock block size. There will be one per mounted filesystem for
@@ -357,7 +358,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
+static void ext4_free_data_callback(struct super_block *sb,
+ struct ext4_journal_cb_entry *jce, int rc);
static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
{
@@ -425,7 +427,7 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
{
char *bb;
- BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
+ BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
BUG_ON(max == NULL);
if (order > e4b->bd_blkbits + 1) {
@@ -436,10 +438,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
/* at order 0 we see each particular block */
if (order == 0) {
*max = 1 << (e4b->bd_blkbits + 3);
- return EXT4_MB_BITMAP(e4b);
+ return e4b->bd_bitmap;
}
- bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
+ bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
*max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
return bb;
@@ -588,7 +590,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
for (j = 0; j < (1 << order); j++) {
k = (i * (1 << order)) + j;
MB_CHECK_ASSERT(
- !mb_test_bit(k, EXT4_MB_BITMAP(e4b)));
+ !mb_test_bit(k, e4b->bd_bitmap));
}
count++;
}
@@ -782,7 +784,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
int groups_per_page;
int err = 0;
int i;
- ext4_group_t first_group;
+ ext4_group_t first_group, group;
int first_block;
struct super_block *sb;
struct buffer_head *bhs;
@@ -806,24 +808,23 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
/* allocate buffer_heads to read bitmaps */
if (groups_per_page > 1) {
- err = -ENOMEM;
i = sizeof(struct buffer_head *) * groups_per_page;
bh = kzalloc(i, GFP_NOFS);
- if (bh == NULL)
+ if (bh == NULL) {
+ err = -ENOMEM;
goto out;
+ }
} else
bh = &bhs;
first_group = page->index * blocks_per_page / 2;
/* read all groups the page covers into the cache */
- for (i = 0; i < groups_per_page; i++) {
- struct ext4_group_desc *desc;
-
- if (first_group + i >= ngroups)
+ for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
+ if (group >= ngroups)
break;
- grinfo = ext4_get_group_info(sb, first_group + i);
+ grinfo = ext4_get_group_info(sb, group);
/*
* If page is uptodate then we came here after online resize
* which added some new uninitialized group info structs, so
@@ -834,69 +835,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
bh[i] = NULL;
continue;
}
-
- err = -EIO;
- desc = ext4_get_group_desc(sb, first_group + i, NULL);
- if (desc == NULL)
- goto out;
-
- err = -ENOMEM;
- bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
- if (bh[i] == NULL)
+ if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
+ err = -ENOMEM;
goto out;
-
- if (bitmap_uptodate(bh[i]))
- continue;
-
- lock_buffer(bh[i]);
- if (bitmap_uptodate(bh[i])) {
- unlock_buffer(bh[i]);
- continue;
- }
- ext4_lock_group(sb, first_group + i);
- if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
- ext4_init_block_bitmap(sb, bh[i],
- first_group + i, desc);
- set_bitmap_uptodate(bh[i]);
- set_buffer_uptodate(bh[i]);
- ext4_unlock_group(sb, first_group + i);
- unlock_buffer(bh[i]);
- continue;
}
- ext4_unlock_group(sb, first_group + i);
- if (buffer_uptodate(bh[i])) {
- /*
- * if not uninit if bh is uptodate,
- * bitmap is also uptodate
- */
- set_bitmap_uptodate(bh[i]);
- unlock_buffer(bh[i]);
- continue;
- }
- get_bh(bh[i]);
- /*
- * submit the buffer_head for read. We can
- * safely mark the bitmap as uptodate now.
- * We do it here so the bitmap uptodate bit
- * get set with buffer lock held.
- */
- set_bitmap_uptodate(bh[i]);
- bh[i]->b_end_io = end_buffer_read_sync;
- submit_bh(READ, bh[i]);
- mb_debug(1, "read bitmap for group %u\n", first_group + i);
+ mb_debug(1, "read bitmap for group %u\n", group);
}
/* wait for I/O completion */
- for (i = 0; i < groups_per_page; i++)
- if (bh[i])
- wait_on_buffer(bh[i]);
-
- err = -EIO;
- for (i = 0; i < groups_per_page; i++)
- if (bh[i] && !buffer_uptodate(bh[i]))
+ for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
+ if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
+ err = -EIO;
goto out;
+ }
+ }
- err = 0;
first_block = page->index * blocks_per_page;
for (i = 0; i < blocks_per_page; i++) {
int group;
@@ -1250,10 +1203,10 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
int order = 1;
void *bb;
- BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
+ BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
- bb = EXT4_MB_BUDDY(e4b);
+ bb = e4b->bd_buddy;
while (order <= e4b->bd_blkbits + 1) {
block = block >> 1;
if (!mb_test_bit(block, bb)) {
@@ -1323,9 +1276,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
/* let's maintain fragments counter */
if (first != 0)
- block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b));
+ block = !mb_test_bit(first - 1, e4b->bd_bitmap);
if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
- max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b));
+ max = !mb_test_bit(first + count, e4b->bd_bitmap);
if (block && max)
e4b->bd_info->bb_fragments--;
else if (!block && !max)
@@ -1336,7 +1289,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
block = first++;
order = 0;
- if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
+ if (!mb_test_bit(block, e4b->bd_bitmap)) {
ext4_fsblk_t blocknr;
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
@@ -1347,7 +1300,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
"freeing already freed block "
"(bit %u)", block);
}
- mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
+ mb_clear_bit(block, e4b->bd_bitmap);
e4b->bd_info->bb_counters[order]++;
/* start of the buddy */
@@ -1429,7 +1382,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
break;
next = (block + 1) * (1 << order);
- if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
+ if (mb_test_bit(next, e4b->bd_bitmap))
break;
order = mb_find_order_for_block(e4b, next);
@@ -1466,9 +1419,9 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
/* let's maintain fragments counter */
if (start != 0)
- mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b));
+ mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
- max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b));
+ max = !mb_test_bit(start + len, e4b->bd_bitmap);
if (mlen && max)
e4b->bd_info->bb_fragments++;
else if (!mlen && !max)
@@ -1511,7 +1464,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
}
mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
- ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
+ ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
mb_check_buddy(e4b);
return ret;
@@ -1810,7 +1763,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
struct ext4_buddy *e4b)
{
struct super_block *sb = ac->ac_sb;
- void *bitmap = EXT4_MB_BITMAP(e4b);
+ void *bitmap = e4b->bd_bitmap;
struct ext4_free_extent ex;
int i;
int free;
@@ -1870,7 +1823,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
{
struct super_block *sb = ac->ac_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- void *bitmap = EXT4_MB_BITMAP(e4b);
+ void *bitmap = e4b->bd_bitmap;
struct ext4_free_extent ex;
ext4_fsblk_t first_group_block;
ext4_fsblk_t a;
@@ -2224,7 +2177,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
EXT4_DESC_PER_BLOCK_BITS(sb);
meta_group_info = kmalloc(metalen, GFP_KERNEL);
if (meta_group_info == NULL) {
- ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem "
+ ext4_msg(sb, KERN_ERR, "can't allocate mem "
"for a buddy group");
goto exit_meta_group_info;
}
@@ -2238,7 +2191,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
if (meta_group_info[i] == NULL) {
- ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem");
+ ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
goto exit_group_info;
}
memset(meta_group_info[i], 0, kmem_cache_size(cachep));
@@ -2522,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
&ext4_mb_seq_groups_fops, sb);
- if (sbi->s_journal)
- sbi->s_journal->j_commit_callback = release_blocks_on_commit;
-
return 0;
out_free_locality_groups:
@@ -2637,58 +2587,55 @@ static inline int ext4_issue_discard(struct super_block *sb,
* This function is called by the jbd2 layer once the commit has finished,
* so we know we can free the blocks that were released with that commit.
*/
-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
+static void ext4_free_data_callback(struct super_block *sb,
+ struct ext4_journal_cb_entry *jce,
+ int rc)
{
- struct super_block *sb = journal->j_private;
+ struct ext4_free_data *entry = (struct ext4_free_data *)jce;
struct ext4_buddy e4b;
struct ext4_group_info *db;
int err, count = 0, count2 = 0;
- struct ext4_free_data *entry;
- struct list_head *l, *ltmp;
- list_for_each_safe(l, ltmp, &txn->t_private_list) {
- entry = list_entry(l, struct ext4_free_data, list);
+ mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
+ entry->efd_count, entry->efd_group, entry);
- mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
- entry->count, entry->group, entry);
+ if (test_opt(sb, DISCARD))
+ ext4_issue_discard(sb, entry->efd_group,
+ entry->efd_start_cluster, entry->efd_count);
- if (test_opt(sb, DISCARD))
- ext4_issue_discard(sb, entry->group,
- entry->start_cluster, entry->count);
+ err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
+ /* we expect to find existing buddy because it's pinned */
+ BUG_ON(err != 0);
- err = ext4_mb_load_buddy(sb, entry->group, &e4b);
- /* we expect to find existing buddy because it's pinned */
- BUG_ON(err != 0);
- db = e4b.bd_info;
- /* there are blocks to put in buddy to make them really free */
- count += entry->count;
- count2++;
- ext4_lock_group(sb, entry->group);
- /* Take it out of per group rb tree */
- rb_erase(&entry->node, &(db->bb_free_root));
- mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
+ db = e4b.bd_info;
+ /* there are blocks to put in buddy to make them really free */
+ count += entry->efd_count;
+ count2++;
+ ext4_lock_group(sb, entry->efd_group);
+ /* Take it out of per group rb tree */
+ rb_erase(&entry->efd_node, &(db->bb_free_root));
+ mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
- /*
- * Clear the trimmed flag for the group so that the next
- * ext4_trim_fs can trim it.
- * If the volume is mounted with -o discard, online discard
- * is supported and the free blocks will be trimmed online.
- */
- if (!test_opt(sb, DISCARD))
- EXT4_MB_GRP_CLEAR_TRIMMED(db);
+ /*
+ * Clear the trimmed flag for the group so that the next
+ * ext4_trim_fs can trim it.
+ * If the volume is mounted with -o discard, online discard
+ * is supported and the free blocks will be trimmed online.
+ */
+ if (!test_opt(sb, DISCARD))
+ EXT4_MB_GRP_CLEAR_TRIMMED(db);
- if (!db->bb_free_root.rb_node) {
- /* No more items in the per group rb tree
- * balance refcounts from ext4_mb_free_metadata()
- */
- page_cache_release(e4b.bd_buddy_page);
- page_cache_release(e4b.bd_bitmap_page);
- }
- ext4_unlock_group(sb, entry->group);
- kmem_cache_free(ext4_free_ext_cachep, entry);
- ext4_mb_unload_buddy(&e4b);
+ if (!db->bb_free_root.rb_node) {
+ /* No more items in the per group rb tree
+ * balance refcounts from ext4_mb_free_metadata()
+ */
+ page_cache_release(e4b.bd_buddy_page);
+ page_cache_release(e4b.bd_bitmap_page);
}
+ ext4_unlock_group(sb, entry->efd_group);
+ kmem_cache_free(ext4_free_data_cachep, entry);
+ ext4_mb_unload_buddy(&e4b);
mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
}
@@ -2741,9 +2688,9 @@ int __init ext4_init_mballoc(void)
return -ENOMEM;
}
- ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
- SLAB_RECLAIM_ACCOUNT);
- if (ext4_free_ext_cachep == NULL) {
+ ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
+ SLAB_RECLAIM_ACCOUNT);
+ if (ext4_free_data_cachep == NULL) {
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
return -ENOMEM;
@@ -2761,7 +2708,7 @@ void ext4_exit_mballoc(void)
rcu_barrier();
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
- kmem_cache_destroy(ext4_free_ext_cachep);
+ kmem_cache_destroy(ext4_free_data_cachep);
ext4_groupinfo_destroy_slabs();
ext4_remove_debugfs_entry();
}
@@ -2815,7 +2762,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
if (!ext4_data_block_valid(sbi, block, len)) {
ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
- "fs metadata\n", block, block+len);
+ "fs metadata", block, block+len);
/* File system mounted not to panic on error
* Fix the bitmap and repeat the block allocation
* We leak some of the blocks here.
@@ -2911,7 +2858,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
int bsbits, max;
ext4_lblk_t end;
- loff_t size, orig_size, start_off;
+ loff_t size, start_off;
+ loff_t orig_size __maybe_unused;
ext4_lblk_t start;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_prealloc_space *pa;
@@ -3321,8 +3269,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
n = rb_first(&(grp->bb_free_root));
while (n) {
- entry = rb_entry(n, struct ext4_free_data, node);
- ext4_set_bits(bitmap, entry->start_cluster, entry->count);
+ entry = rb_entry(n, struct ext4_free_data, efd_node);
+ ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
n = rb_next(n);
}
return;
@@ -3916,11 +3864,11 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
return;
- ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:"
+ ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
" Allocation context details:");
- ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d",
+ ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
ac->ac_status, ac->ac_flags);
- ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, "
+ ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
"goal %lu/%lu/%lu@%lu, "
"best %lu/%lu/%lu@%lu cr %d",
(unsigned long)ac->ac_o_ex.fe_group,
@@ -3936,9 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
(unsigned long)ac->ac_b_ex.fe_len,
(unsigned long)ac->ac_b_ex.fe_logical,
(int)ac->ac_criteria);
- ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found",
+ ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found",
ac->ac_ex_scanned, ac->ac_found);
- ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: ");
+ ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
ngroups = ext4_get_groups_count(sb);
for (i = 0; i < ngroups; i++) {
struct ext4_group_info *grp = ext4_get_group_info(sb, i);
@@ -4428,9 +4376,9 @@ out:
static int can_merge(struct ext4_free_data *entry1,
struct ext4_free_data *entry2)
{
- if ((entry1->t_tid == entry2->t_tid) &&
- (entry1->group == entry2->group) &&
- ((entry1->start_cluster + entry1->count) == entry2->start_cluster))
+ if ((entry1->efd_tid == entry2->efd_tid) &&
+ (entry1->efd_group == entry2->efd_group) &&
+ ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
return 1;
return 0;
}
@@ -4452,8 +4400,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
BUG_ON(e4b->bd_bitmap_page == NULL);
BUG_ON(e4b->bd_buddy_page == NULL);
- new_node = &new_entry->node;
- cluster = new_entry->start_cluster;
+ new_node = &new_entry->efd_node;
+ cluster = new_entry->efd_start_cluster;
if (!*n) {
/* first free block exent. We need to
@@ -4466,10 +4414,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
}
while (*n) {
parent = *n;
- entry = rb_entry(parent, struct ext4_free_data, node);
- if (cluster < entry->start_cluster)
+ entry = rb_entry(parent, struct ext4_free_data, efd_node);
+ if (cluster < entry->efd_start_cluster)
n = &(*n)->rb_left;
- else if (cluster >= (entry->start_cluster + entry->count))
+ else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
n = &(*n)->rb_right;
else {
ext4_grp_locked_error(sb, group, 0,
@@ -4486,34 +4434,29 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
/* Now try to see the extent can be merged to left and right */
node = rb_prev(new_node);
if (node) {
- entry = rb_entry(node, struct ext4_free_data, node);
+ entry = rb_entry(node, struct ext4_free_data, efd_node);
if (can_merge(entry, new_entry)) {
- new_entry->start_cluster = entry->start_cluster;
- new_entry->count += entry->count;
+ new_entry->efd_start_cluster = entry->efd_start_cluster;
+ new_entry->efd_count += entry->efd_count;
rb_erase(node, &(db->bb_free_root));
- spin_lock(&sbi->s_md_lock);
- list_del(&entry->list);
- spin_unlock(&sbi->s_md_lock);
- kmem_cache_free(ext4_free_ext_cachep, entry);
+ ext4_journal_callback_del(handle, &entry->efd_jce);
+ kmem_cache_free(ext4_free_data_cachep, entry);
}
}
node = rb_next(new_node);
if (node) {
- entry = rb_entry(node, struct ext4_free_data, node);
+ entry = rb_entry(node, struct ext4_free_data, efd_node);
if (can_merge(new_entry, entry)) {
- new_entry->count += entry->count;
+ new_entry->efd_count += entry->efd_count;
rb_erase(node, &(db->bb_free_root));
- spin_lock(&sbi->s_md_lock);
- list_del(&entry->list);
- spin_unlock(&sbi->s_md_lock);
- kmem_cache_free(ext4_free_ext_cachep, entry);
+ ext4_journal_callback_del(handle, &entry->efd_jce);
+ kmem_cache_free(ext4_free_data_cachep, entry);
}
}
/* Add the extent to transaction's private list */
- spin_lock(&sbi->s_md_lock);
- list_add(&new_entry->list, &handle->h_transaction->t_private_list);
- spin_unlock(&sbi->s_md_lock);
+ ext4_journal_callback_add(handle, ext4_free_data_callback,
+ &new_entry->efd_jce);
return 0;
}
@@ -4691,15 +4634,15 @@ do_more:
* blocks being freed are metadata. these blocks shouldn't
* be used until this transaction is committed
*/
- new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+ new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
if (!new_entry) {
err = -ENOMEM;
goto error_return;
}
- new_entry->start_cluster = bit;
- new_entry->group = block_group;
- new_entry->count = count_clusters;
- new_entry->t_tid = handle->h_transaction->t_tid;
+ new_entry->efd_start_cluster = bit;
+ new_entry->efd_group = block_group;
+ new_entry->efd_count = count_clusters;
+ new_entry->efd_tid = handle->h_transaction->t_tid;
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
@@ -4971,11 +4914,11 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
start = (e4b.bd_info->bb_first_free > start) ?
e4b.bd_info->bb_first_free : start;
- while (start < max) {
- start = mb_find_next_zero_bit(bitmap, max, start);
- if (start >= max)
+ while (start <= max) {
+ start = mb_find_next_zero_bit(bitmap, max + 1, start);
+ if (start > max)
break;
- next = mb_find_next_bit(bitmap, max, start);
+ next = mb_find_next_bit(bitmap, max + 1, start);
if ((next - start) >= minblocks) {
ext4_trim_extent(sb, start,
@@ -5027,37 +4970,36 @@ out:
int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
{
struct ext4_group_info *grp;
- ext4_group_t first_group, last_group;
- ext4_group_t group, ngroups = ext4_get_groups_count(sb);
+ ext4_group_t group, first_group, last_group;
ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
- uint64_t start, len, minlen, trimmed = 0;
+ uint64_t start, end, minlen, trimmed = 0;
ext4_fsblk_t first_data_blk =
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+ ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
int ret = 0;
start = range->start >> sb->s_blocksize_bits;
- len = range->len >> sb->s_blocksize_bits;
+ end = start + (range->len >> sb->s_blocksize_bits) - 1;
minlen = range->minlen >> sb->s_blocksize_bits;
- if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)))
+ if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
+ unlikely(start >= max_blks))
return -EINVAL;
- if (start + len <= first_data_blk)
+ if (end >= max_blks)
+ end = max_blks - 1;
+ if (end <= first_data_blk)
goto out;
- if (start < first_data_blk) {
- len -= first_data_blk - start;
+ if (start < first_data_blk)
start = first_data_blk;
- }
- /* Determine first and last group to examine based on start and len */
+ /* Determine first and last group to examine based on start and end */
ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
&first_group, &first_cluster);
- ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
+ ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
&last_group, &last_cluster);
- last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
- last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
- if (first_group > last_group)
- return -EINVAL;
+ /* end now represents the last cluster to discard in this group */
+ end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
for (group = first_group; group <= last_group; group++) {
grp = ext4_get_group_info(sb, group);
@@ -5069,31 +5011,35 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
}
/*
- * For all the groups except the last one, last block will
- * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to
- * change it for the last group in which case start +
- * len < EXT4_BLOCKS_PER_GROUP(sb).
+ * For all the groups except the last one, last cluster will
+ * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to
+ * change it for the last group, note that last_cluster is
+ * already computed earlier by ext4_get_group_no_and_offset()
*/
- if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb))
- last_cluster = first_cluster + len;
- len -= last_cluster - first_cluster;
+ if (group == last_group)
+ end = last_cluster;
if (grp->bb_free >= minlen) {
cnt = ext4_trim_all_free(sb, group, first_cluster,
- last_cluster, minlen);
+ end, minlen);
if (cnt < 0) {
ret = cnt;
break;
}
+ trimmed += cnt;
}
- trimmed += cnt;
+
+ /*
+ * For every group except the first one, we are sure
+ * that the first cluster to discard will be cluster #0.
+ */
first_cluster = 0;
}
- range->len = trimmed * sb->s_blocksize;
if (!ret)
atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
out:
+ range->len = trimmed * sb->s_blocksize;
return ret;
}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 47705f3285e3..c070618c21ce 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -96,21 +96,23 @@ extern u8 mb_enable_debug;
struct ext4_free_data {
- /* this links the free block information from group_info */
- struct rb_node node;
+ /* MUST be the first member */
+ struct ext4_journal_cb_entry efd_jce;
+
+ /* ext4_free_data private data starts from here */
- /* this links the free block information from ext4_sb_info */
- struct list_head list;
+ /* this links the free block information from group_info */
+ struct rb_node efd_node;
/* group which free block extent belongs */
- ext4_group_t group;
+ ext4_group_t efd_group;
/* free block extent */
- ext4_grpblk_t start_cluster;
- ext4_grpblk_t count;
+ ext4_grpblk_t efd_start_cluster;
+ ext4_grpblk_t efd_count;
/* transaction which freed this extent */
- tid_t t_tid;
+ tid_t efd_tid;
};
struct ext4_prealloc_space {
@@ -210,8 +212,6 @@ struct ext4_buddy {
__u16 bd_blkbits;
ext4_group_t bd_group;
};
-#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
-#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
struct ext4_free_extent *fex)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index e7d6bb0acfa6..f39f80f8f2c5 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -471,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode)
tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
S_IFREG, NULL, goal, owner);
if (IS_ERR(tmp_inode)) {
- retval = PTR_ERR(inode);
+ retval = PTR_ERR(tmp_inode);
ext4_journal_stop(handle);
return retval;
}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 7ea4ba4eff2a..ed6548d89165 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -257,8 +257,8 @@ int ext4_multi_mount_protect(struct super_block *sb,
* If check_interval in MMP block is larger, use that instead of
* update_interval from the superblock.
*/
- if (mmp->mmp_check_interval > mmp_check_interval)
- mmp_check_interval = mmp->mmp_check_interval;
+ if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
+ mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
seq = le32_to_cpu(mmp->mmp_seq);
if (seq == EXT4_MMP_SEQ_CLEAN)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2043f482375d..349d7b3671c8 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -468,7 +468,7 @@ fail2:
fail:
if (*err == ERR_BAD_DX_DIR)
ext4_warning(dir->i_sb,
- "Corrupt dir inode %ld, running e2fsck is "
+ "Corrupt dir inode %lu, running e2fsck is "
"recommended.", dir->i_ino);
return NULL;
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 475851896518..dcdeef169a69 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -110,6 +110,8 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
+ if (io->flag & EXT4_IO_END_DIRECT)
+ inode_dio_done(inode);
/* Wake up anyone waiting on unwritten extent conversion */
if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
wake_up_all(ext4_ioend_wq(io->inode));
@@ -127,12 +129,18 @@ static void ext4_end_io_work(struct work_struct *work)
unsigned long flags;
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+ if (io->flag & EXT4_IO_END_IN_FSYNC)
+ goto requeue;
if (list_empty(&io->list)) {
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
goto free;
}
if (!mutex_trylock(&inode->i_mutex)) {
+ bool was_queued;
+requeue:
+ was_queued = !!(io->flag & EXT4_IO_END_QUEUED);
+ io->flag |= EXT4_IO_END_QUEUED;
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
/*
* Requeue the work instead of waiting so that the work
@@ -145,9 +153,8 @@ static void ext4_end_io_work(struct work_struct *work)
* yield the cpu if it sees an end_io request that has already
* been requeued.
*/
- if (io->flag & EXT4_IO_END_QUEUED)
+ if (was_queued)
yield();
- io->flag |= EXT4_IO_END_QUEUED;
return;
}
list_del_init(&io->list);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f9d948f0eb86..59fa0be27251 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1163,8 +1163,11 @@ static void ext4_update_super(struct super_block *sb,
do_div(reserved_blocks, 100);
ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
+ ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks);
le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
flex_gd->count);
+ le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
+ flex_gd->count);
/*
* We need to protect s_groups_count against other CPUs seeing
@@ -1465,6 +1468,7 @@ static int ext4_group_extend_no_check(struct super_block *sb,
}
ext4_blocks_count_set(es, o_blocks_count + add);
+ ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add);
/* We add the blocks to the bitmap and set the group need init bit */
@@ -1512,16 +1516,17 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
o_blocks_count = ext4_blocks_count(es);
if (test_opt(sb, DEBUG))
- printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n",
- o_blocks_count, n_blocks_count);
+ ext4_msg(sb, KERN_DEBUG,
+ "extending last group from %llu to %llu blocks",
+ o_blocks_count, n_blocks_count);
if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
return 0;
if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
- printk(KERN_ERR "EXT4-fs: filesystem on %s:"
- " too large to resize to %llu blocks safely\n",
- sb->s_id, n_blocks_count);
+ ext4_msg(sb, KERN_ERR,
+ "filesystem too large to resize to %llu blocks safely",
+ n_blocks_count);
if (sizeof(sector_t) < 8)
ext4_warning(sb, "CONFIG_LBDAF not enabled");
return -EINVAL;
@@ -1582,7 +1587,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
ext4_fsblk_t o_blocks_count;
ext4_group_t o_group;
ext4_group_t n_group;
- ext4_grpblk_t offset;
+ ext4_grpblk_t offset, add;
unsigned long n_desc_blocks;
unsigned long o_desc_blocks;
unsigned long desc_blocks;
@@ -1591,8 +1596,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
o_blocks_count = ext4_blocks_count(es);
if (test_opt(sb, DEBUG))
- printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu "
- "upto %llu blocks\n", o_blocks_count, n_blocks_count);
+ ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu "
+ "to %llu blocks", o_blocks_count, n_blocks_count);
if (n_blocks_count < o_blocks_count) {
/* On-line shrinking not supported */
@@ -1605,7 +1610,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
return 0;
ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
- ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset);
+ ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
EXT4_DESC_PER_BLOCK(sb);
@@ -1634,10 +1639,12 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
}
brelse(bh);
- if (offset != 0) {
- /* extend the last group */
- ext4_grpblk_t add;
- add = EXT4_BLOCKS_PER_GROUP(sb) - offset;
+ /* extend the last group */
+ if (n_group == o_group)
+ add = n_blocks_count - o_blocks_count;
+ else
+ add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1);
+ if (add > 0) {
err = ext4_group_extend_no_check(sb, o_blocks_count, add);
if (err)
goto out;
@@ -1674,7 +1681,7 @@ out:
iput(resize_inode);
if (test_opt(sb, DEBUG))
- printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu "
- "upto %llu blocks\n", o_blocks_count, n_blocks_count);
+ ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu "
+ "upto %llu blocks", o_blocks_count, n_blocks_count);
return err;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 933900909ed0..ceebaf853beb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -62,6 +62,7 @@ static struct ext4_features *ext4_feat;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
+static int ext4_show_options(struct seq_file *seq, struct dentry *root);
static int ext4_commit_super(struct super_block *sb, int sync);
static void ext4_mark_recovery_complete(struct super_block *sb,
struct ext4_super_block *es);
@@ -375,7 +376,7 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line,
if (is_handle_aborted(handle))
return;
- printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n",
+ printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
caller, line, errstr, err_fn);
jbd2_journal_abort_handle(handle);
@@ -431,6 +432,22 @@ static int block_device_ejected(struct super_block *sb)
return bdi->dev == NULL;
}
+static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
+{
+ struct super_block *sb = journal->j_private;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int error = is_journal_aborted(journal);
+ struct ext4_journal_cb_entry *jce, *tmp;
+
+ spin_lock(&sbi->s_md_lock);
+ list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
+ list_del_init(&jce->jce_list);
+ spin_unlock(&sbi->s_md_lock);
+ jce->jce_func(sb, jce, error);
+ spin_lock(&sbi->s_md_lock);
+ }
+ spin_unlock(&sbi->s_md_lock);
+}
/* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures.
@@ -498,11 +515,16 @@ void ext4_error_inode(struct inode *inode, const char *function,
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
- inode->i_sb->s_id, function, line, inode->i_ino);
if (block)
- printk(KERN_CONT "block %llu: ", block);
- printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+ "inode #%lu: block %llu: comm %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ block, current->comm, &vaf);
+ else
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+ "inode #%lu: comm %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, &vaf);
va_end(args);
ext4_handle_error(inode->i_sb);
@@ -524,15 +546,21 @@ void ext4_error_file(struct file *file, const char *function,
path = d_path(&(file->f_path), pathname, sizeof(pathname));
if (IS_ERR(path))
path = "(unknown)";
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
- inode->i_sb->s_id, function, line, inode->i_ino);
- if (block)
- printk(KERN_CONT "block %llu: ", block);
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
- printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf);
+ if (block)
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+ "block %llu: comm %s: path %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ block, current->comm, path, &vaf);
+ else
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+ "comm %s: path %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, path, &vaf);
va_end(args);
ext4_handle_error(inode->i_sb);
@@ -808,9 +836,6 @@ static void ext4_put_super(struct super_block *sb)
destroy_workqueue(sbi->dio_unwritten_wq);
lock_super(sb);
- if (sb->s_dirt)
- ext4_commit_super(sb, 1);
-
if (sbi->s_journal) {
err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
@@ -827,9 +852,12 @@ static void ext4_put_super(struct super_block *sb)
if (!(sb->s_flags & MS_RDONLY)) {
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
es->s_state = cpu_to_le16(sbi->s_mount_state);
- ext4_commit_super(sb, 1);
}
+ if (sb->s_dirt || !(sb->s_flags & MS_RDONLY))
+ ext4_commit_super(sb, 1);
+
if (sbi->s_proc) {
+ remove_proc_entry("options", sbi->s_proc);
remove_proc_entry(sb->s_id, ext4_proc_root);
}
kobject_del(&sbi->s_kobj);
@@ -990,180 +1018,6 @@ void ext4_clear_inode(struct inode *inode)
}
}
-static inline void ext4_show_quota_options(struct seq_file *seq,
- struct super_block *sb)
-{
-#if defined(CONFIG_QUOTA)
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- if (sbi->s_jquota_fmt) {
- char *fmtname = "";
-
- switch (sbi->s_jquota_fmt) {
- case QFMT_VFS_OLD:
- fmtname = "vfsold";
- break;
- case QFMT_VFS_V0:
- fmtname = "vfsv0";
- break;
- case QFMT_VFS_V1:
- fmtname = "vfsv1";
- break;
- }
- seq_printf(seq, ",jqfmt=%s", fmtname);
- }
-
- if (sbi->s_qf_names[USRQUOTA])
- seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
-
- if (sbi->s_qf_names[GRPQUOTA])
- seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
-
- if (test_opt(sb, USRQUOTA))
- seq_puts(seq, ",usrquota");
-
- if (test_opt(sb, GRPQUOTA))
- seq_puts(seq, ",grpquota");
-#endif
-}
-
-/*
- * Show an option if
- * - it's set to a non-default value OR
- * - if the per-sb default is different from the global default
- */
-static int ext4_show_options(struct seq_file *seq, struct dentry *root)
-{
- int def_errors;
- unsigned long def_mount_opts;
- struct super_block *sb = root->d_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
-
- def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
- def_errors = le16_to_cpu(es->s_errors);
-
- if (sbi->s_sb_block != 1)
- seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
- if (test_opt(sb, MINIX_DF))
- seq_puts(seq, ",minixdf");
- if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
- seq_puts(seq, ",grpid");
- if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
- seq_puts(seq, ",nogrpid");
- if (sbi->s_resuid != EXT4_DEF_RESUID ||
- le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
- seq_printf(seq, ",resuid=%u", sbi->s_resuid);
- }
- if (sbi->s_resgid != EXT4_DEF_RESGID ||
- le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
- seq_printf(seq, ",resgid=%u", sbi->s_resgid);
- }
- if (test_opt(sb, ERRORS_RO)) {
- if (def_errors == EXT4_ERRORS_PANIC ||
- def_errors == EXT4_ERRORS_CONTINUE) {
- seq_puts(seq, ",errors=remount-ro");
- }
- }
- if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
- seq_puts(seq, ",errors=continue");
- if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
- seq_puts(seq, ",errors=panic");
- if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
- seq_puts(seq, ",nouid32");
- if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
- seq_puts(seq, ",debug");
-#ifdef CONFIG_EXT4_FS_XATTR
- if (test_opt(sb, XATTR_USER))
- seq_puts(seq, ",user_xattr");
- if (!test_opt(sb, XATTR_USER))
- seq_puts(seq, ",nouser_xattr");
-#endif
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
- if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
- seq_puts(seq, ",acl");
- if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
- seq_puts(seq, ",noacl");
-#endif
- if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
- seq_printf(seq, ",commit=%u",
- (unsigned) (sbi->s_commit_interval / HZ));
- }
- if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
- seq_printf(seq, ",min_batch_time=%u",
- (unsigned) sbi->s_min_batch_time);
- }
- if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
- seq_printf(seq, ",max_batch_time=%u",
- (unsigned) sbi->s_max_batch_time);
- }
-
- /*
- * We're changing the default of barrier mount option, so
- * let's always display its mount state so it's clear what its
- * status is.
- */
- seq_puts(seq, ",barrier=");
- seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
- if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
- seq_puts(seq, ",journal_async_commit");
- else if (test_opt(sb, JOURNAL_CHECKSUM))
- seq_puts(seq, ",journal_checksum");
- if (test_opt(sb, I_VERSION))
- seq_puts(seq, ",i_version");
- if (!test_opt(sb, DELALLOC) &&
- !(def_mount_opts & EXT4_DEFM_NODELALLOC))
- seq_puts(seq, ",nodelalloc");
-
- if (!test_opt(sb, MBLK_IO_SUBMIT))
- seq_puts(seq, ",nomblk_io_submit");
- if (sbi->s_stripe)
- seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
- /*
- * journal mode get enabled in different ways
- * So just print the value even if we didn't specify it
- */
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
- seq_puts(seq, ",data=journal");
- else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
- seq_puts(seq, ",data=ordered");
- else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
- seq_puts(seq, ",data=writeback");
-
- if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
- seq_printf(seq, ",inode_readahead_blks=%u",
- sbi->s_inode_readahead_blks);
-
- if (test_opt(sb, DATA_ERR_ABORT))
- seq_puts(seq, ",data_err=abort");
-
- if (test_opt(sb, NO_AUTO_DA_ALLOC))
- seq_puts(seq, ",noauto_da_alloc");
-
- if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD))
- seq_puts(seq, ",discard");
-
- if (test_opt(sb, NOLOAD))
- seq_puts(seq, ",norecovery");
-
- if (test_opt(sb, DIOREAD_NOLOCK))
- seq_puts(seq, ",dioread_nolock");
-
- if (test_opt(sb, BLOCK_VALIDITY) &&
- !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
- seq_puts(seq, ",block_validity");
-
- if (!test_opt(sb, INIT_INODE_TABLE))
- seq_puts(seq, ",noinit_itable");
- else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)
- seq_printf(seq, ",init_itable=%u",
- (unsigned) sbi->s_li_wait_mult);
-
- ext4_show_quota_options(seq, sb);
-
- return 0;
-}
-
static struct inode *ext4_nfs_get_inode(struct super_block *sb,
u64 ino, u32 generation)
{
@@ -1316,18 +1170,17 @@ static const struct export_operations ext4_export_ops = {
enum {
Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
- Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
+ Opt_nouid32, Opt_debug, Opt_removed,
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
- Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
+ Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
- Opt_journal_update, Opt_journal_dev,
- Opt_journal_checksum, Opt_journal_async_commit,
+ Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_data_err_abort, Opt_data_err_ignore,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
- Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
+ Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
+ Opt_usrquota, Opt_grpquota, Opt_i_version,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio,
@@ -1350,20 +1203,19 @@ static const match_table_t tokens = {
{Opt_err_ro, "errors=remount-ro"},
{Opt_nouid32, "nouid32"},
{Opt_debug, "debug"},
- {Opt_oldalloc, "oldalloc"},
- {Opt_orlov, "orlov"},
+ {Opt_removed, "oldalloc"},
+ {Opt_removed, "orlov"},
{Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
{Opt_acl, "acl"},
{Opt_noacl, "noacl"},
- {Opt_noload, "noload"},
{Opt_noload, "norecovery"},
- {Opt_nobh, "nobh"},
- {Opt_bh, "bh"},
+ {Opt_noload, "noload"},
+ {Opt_removed, "nobh"},
+ {Opt_removed, "bh"},
{Opt_commit, "commit=%u"},
{Opt_min_batch_time, "min_batch_time=%u"},
{Opt_max_batch_time, "max_batch_time=%u"},
- {Opt_journal_update, "journal=update"},
{Opt_journal_dev, "journal_dev=%u"},
{Opt_journal_checksum, "journal_checksum"},
{Opt_journal_async_commit, "journal_async_commit"},
@@ -1389,7 +1241,6 @@ static const match_table_t tokens = {
{Opt_nobarrier, "nobarrier"},
{Opt_i_version, "i_version"},
{Opt_stripe, "stripe=%u"},
- {Opt_resize, "resize"},
{Opt_delalloc, "delalloc"},
{Opt_nodelalloc, "nodelalloc"},
{Opt_mblk_io_submit, "mblk_io_submit"},
@@ -1408,6 +1259,11 @@ static const match_table_t tokens = {
{Opt_init_itable, "init_itable=%u"},
{Opt_init_itable, "init_itable"},
{Opt_noinit_itable, "noinit_itable"},
+ {Opt_removed, "check=none"}, /* mount option from ext2/3 */
+ {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
+ {Opt_removed, "reservation"}, /* mount option from ext2/3 */
+ {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
+ {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
{Opt_err, NULL},
};
@@ -1496,420 +1352,273 @@ static int clear_qf_name(struct super_block *sb, int qtype)
}
#endif
-static int parse_options(char *options, struct super_block *sb,
- unsigned long *journal_devnum,
- unsigned int *journal_ioprio,
- ext4_fsblk_t *n_blocks_count, int is_remount)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int data_opt = 0;
- int option;
+#define MOPT_SET 0x0001
+#define MOPT_CLEAR 0x0002
+#define MOPT_NOSUPPORT 0x0004
+#define MOPT_EXPLICIT 0x0008
+#define MOPT_CLEAR_ERR 0x0010
+#define MOPT_GTE0 0x0020
#ifdef CONFIG_QUOTA
- int qfmt;
+#define MOPT_Q 0
+#define MOPT_QFMT 0x0040
+#else
+#define MOPT_Q MOPT_NOSUPPORT
+#define MOPT_QFMT MOPT_NOSUPPORT
#endif
-
- if (!options)
- return 1;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
-
- /*
- * Initialize args struct so we know whether arg was
- * found; some options take optional arguments.
- */
- args[0].to = args[0].from = NULL;
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_bsd_df:
- ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- clear_opt(sb, MINIX_DF);
- break;
- case Opt_minix_df:
- ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- set_opt(sb, MINIX_DF);
-
- break;
- case Opt_grpid:
- ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- set_opt(sb, GRPID);
-
- break;
- case Opt_nogrpid:
- ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- clear_opt(sb, GRPID);
-
- break;
- case Opt_resuid:
- if (match_int(&args[0], &option))
- return 0;
- sbi->s_resuid = option;
- break;
- case Opt_resgid:
- if (match_int(&args[0], &option))
- return 0;
- sbi->s_resgid = option;
- break;
- case Opt_sb:
- /* handled by get_sb_block() instead of here */
- /* *sb_block = match_int(&args[0]); */
- break;
- case Opt_err_panic:
- clear_opt(sb, ERRORS_CONT);
- clear_opt(sb, ERRORS_RO);
- set_opt(sb, ERRORS_PANIC);
- break;
- case Opt_err_ro:
- clear_opt(sb, ERRORS_CONT);
- clear_opt(sb, ERRORS_PANIC);
- set_opt(sb, ERRORS_RO);
- break;
- case Opt_err_cont:
- clear_opt(sb, ERRORS_RO);
- clear_opt(sb, ERRORS_PANIC);
- set_opt(sb, ERRORS_CONT);
- break;
- case Opt_nouid32:
- set_opt(sb, NO_UID32);
- break;
- case Opt_debug:
- set_opt(sb, DEBUG);
- break;
- case Opt_oldalloc:
- ext4_msg(sb, KERN_WARNING,
- "Ignoring deprecated oldalloc option");
- break;
- case Opt_orlov:
- ext4_msg(sb, KERN_WARNING,
- "Ignoring deprecated orlov option");
- break;
+#define MOPT_DATAJ 0x0080
+
+static const struct mount_opts {
+ int token;
+ int mount_opt;
+ int flags;
+} ext4_mount_opts[] = {
+ {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
+ {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
+ {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
+ {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
+ {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
+ {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
+ {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
+ {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
+ {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET},
+ {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR},
+ {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
+ {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
+ {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT},
+ {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT},
+ {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET},
+ {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
+ EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET},
+ {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET},
+ {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
+ {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
+ {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
+ {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET},
+ {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR},
+ {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
+ {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
+ {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
+ {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
+ {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
+ {Opt_commit, 0, MOPT_GTE0},
+ {Opt_max_batch_time, 0, MOPT_GTE0},
+ {Opt_min_batch_time, 0, MOPT_GTE0},
+ {Opt_inode_readahead_blks, 0, MOPT_GTE0},
+ {Opt_init_itable, 0, MOPT_GTE0},
+ {Opt_stripe, 0, MOPT_GTE0},
+ {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ},
+ {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ},
+ {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ},
#ifdef CONFIG_EXT4_FS_XATTR
- case Opt_user_xattr:
- set_opt(sb, XATTR_USER);
- break;
- case Opt_nouser_xattr:
- clear_opt(sb, XATTR_USER);
- break;
+ {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
+ {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
#else
- case Opt_user_xattr:
- case Opt_nouser_xattr:
- ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
- break;
+ {Opt_user_xattr, 0, MOPT_NOSUPPORT},
+ {Opt_nouser_xattr, 0, MOPT_NOSUPPORT},
#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
- case Opt_acl:
- set_opt(sb, POSIX_ACL);
- break;
- case Opt_noacl:
- clear_opt(sb, POSIX_ACL);
- break;
+ {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
+ {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
#else
- case Opt_acl:
- case Opt_noacl:
- ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
- break;
+ {Opt_acl, 0, MOPT_NOSUPPORT},
+ {Opt_noacl, 0, MOPT_NOSUPPORT},
#endif
- case Opt_journal_update:
- /* @@@ FIXME */
- /* Eventually we will want to be able to create
- a journal file here. For now, only allow the
- user to specify an existing inode to be the
- journal file. */
- if (is_remount) {
- ext4_msg(sb, KERN_ERR,
- "Cannot specify journal on remount");
- return 0;
- }
- set_opt(sb, UPDATE_JOURNAL);
- break;
- case Opt_journal_dev:
- if (is_remount) {
+ {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
+ {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
+ {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
+ {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
+ MOPT_SET | MOPT_Q},
+ {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
+ MOPT_SET | MOPT_Q},
+ {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
+ EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
+ {Opt_usrjquota, 0, MOPT_Q},
+ {Opt_grpjquota, 0, MOPT_Q},
+ {Opt_offusrjquota, 0, MOPT_Q},
+ {Opt_offgrpjquota, 0, MOPT_Q},
+ {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
+ {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
+ {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
+ {Opt_err, 0, 0}
+};
+
+static int handle_mount_opt(struct super_block *sb, char *opt, int token,
+ substring_t *args, unsigned long *journal_devnum,
+ unsigned int *journal_ioprio, int is_remount)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ const struct mount_opts *m;
+ int arg = 0;
+
+ if (args->from && match_int(args, &arg))
+ return -1;
+ switch (token) {
+ case Opt_noacl:
+ case Opt_nouser_xattr:
+ ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
+ break;
+ case Opt_sb:
+ return 1; /* handled by get_sb_block() */
+ case Opt_removed:
+ ext4_msg(sb, KERN_WARNING,
+ "Ignoring removed %s option", opt);
+ return 1;
+ case Opt_resuid:
+ sbi->s_resuid = arg;
+ return 1;
+ case Opt_resgid:
+ sbi->s_resgid = arg;
+ return 1;
+ case Opt_abort:
+ sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
+ return 1;
+ case Opt_i_version:
+ sb->s_flags |= MS_I_VERSION;
+ return 1;
+ case Opt_journal_dev:
+ if (is_remount) {
+ ext4_msg(sb, KERN_ERR,
+ "Cannot specify journal on remount");
+ return -1;
+ }
+ *journal_devnum = arg;
+ return 1;
+ case Opt_journal_ioprio:
+ if (arg < 0 || arg > 7)
+ return -1;
+ *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
+ return 1;
+ }
+
+ for (m = ext4_mount_opts; m->token != Opt_err; m++) {
+ if (token != m->token)
+ continue;
+ if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
+ return -1;
+ if (m->flags & MOPT_EXPLICIT)
+ set_opt2(sb, EXPLICIT_DELALLOC);
+ if (m->flags & MOPT_CLEAR_ERR)
+ clear_opt(sb, ERRORS_MASK);
+ if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
+ ext4_msg(sb, KERN_ERR, "Cannot change quota "
+ "options when quota turned on");
+ return -1;
+ }
+
+ if (m->flags & MOPT_NOSUPPORT) {
+ ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
+ } else if (token == Opt_commit) {
+ if (arg == 0)
+ arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
+ sbi->s_commit_interval = HZ * arg;
+ } else if (token == Opt_max_batch_time) {
+ if (arg == 0)
+ arg = EXT4_DEF_MAX_BATCH_TIME;
+ sbi->s_max_batch_time = arg;
+ } else if (token == Opt_min_batch_time) {
+ sbi->s_min_batch_time = arg;
+ } else if (token == Opt_inode_readahead_blks) {
+ if (arg > (1 << 30))
+ return -1;
+ if (arg && !is_power_of_2(arg)) {
ext4_msg(sb, KERN_ERR,
- "Cannot specify journal on remount");
- return 0;
+ "EXT4-fs: inode_readahead_blks"
+ " must be a power of 2");
+ return -1;
}
- if (match_int(&args[0], &option))
- return 0;
- *journal_devnum = option;
- break;
- case Opt_journal_checksum:
- set_opt(sb, JOURNAL_CHECKSUM);
- break;
- case Opt_journal_async_commit:
- set_opt(sb, JOURNAL_ASYNC_COMMIT);
- set_opt(sb, JOURNAL_CHECKSUM);
- break;
- case Opt_noload:
- set_opt(sb, NOLOAD);
- break;
- case Opt_commit:
- if (match_int(&args[0], &option))
- return 0;
- if (option < 0)
- return 0;
- if (option == 0)
- option = JBD2_DEFAULT_MAX_COMMIT_AGE;
- sbi->s_commit_interval = HZ * option;
- break;
- case Opt_max_batch_time:
- if (match_int(&args[0], &option))
- return 0;
- if (option < 0)
- return 0;
- if (option == 0)
- option = EXT4_DEF_MAX_BATCH_TIME;
- sbi->s_max_batch_time = option;
- break;
- case Opt_min_batch_time:
- if (match_int(&args[0], &option))
- return 0;
- if (option < 0)
- return 0;
- sbi->s_min_batch_time = option;
- break;
- case Opt_data_journal:
- data_opt = EXT4_MOUNT_JOURNAL_DATA;
- goto datacheck;
- case Opt_data_ordered:
- data_opt = EXT4_MOUNT_ORDERED_DATA;
- goto datacheck;
- case Opt_data_writeback:
- data_opt = EXT4_MOUNT_WRITEBACK_DATA;
- datacheck:
+ sbi->s_inode_readahead_blks = arg;
+ } else if (token == Opt_init_itable) {
+ set_opt(sb, INIT_INODE_TABLE);
+ if (!args->from)
+ arg = EXT4_DEF_LI_WAIT_MULT;
+ sbi->s_li_wait_mult = arg;
+ } else if (token == Opt_stripe) {
+ sbi->s_stripe = arg;
+ } else if (m->flags & MOPT_DATAJ) {
if (is_remount) {
if (!sbi->s_journal)
ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
- else if (test_opt(sb, DATA_FLAGS) != data_opt) {
+ else if (test_opt(sb, DATA_FLAGS) !=
+ m->mount_opt) {
ext4_msg(sb, KERN_ERR,
- "Cannot change data mode on remount");
- return 0;
+ "Cannot change data mode on remount");
+ return -1;
}
} else {
clear_opt(sb, DATA_FLAGS);
- sbi->s_mount_opt |= data_opt;
+ sbi->s_mount_opt |= m->mount_opt;
}
- break;
- case Opt_data_err_abort:
- set_opt(sb, DATA_ERR_ABORT);
- break;
- case Opt_data_err_ignore:
- clear_opt(sb, DATA_ERR_ABORT);
- break;
#ifdef CONFIG_QUOTA
- case Opt_usrjquota:
+ } else if (token == Opt_usrjquota) {
if (!set_qf_name(sb, USRQUOTA, &args[0]))
- return 0;
- break;
- case Opt_grpjquota:
+ return -1;
+ } else if (token == Opt_grpjquota) {
if (!set_qf_name(sb, GRPQUOTA, &args[0]))
- return 0;
- break;
- case Opt_offusrjquota:
+ return -1;
+ } else if (token == Opt_offusrjquota) {
if (!clear_qf_name(sb, USRQUOTA))
- return 0;
- break;
- case Opt_offgrpjquota:
+ return -1;
+ } else if (token == Opt_offgrpjquota) {
if (!clear_qf_name(sb, GRPQUOTA))
- return 0;
- break;
-
- case Opt_jqfmt_vfsold:
- qfmt = QFMT_VFS_OLD;
- goto set_qf_format;
- case Opt_jqfmt_vfsv0:
- qfmt = QFMT_VFS_V0;
- goto set_qf_format;
- case Opt_jqfmt_vfsv1:
- qfmt = QFMT_VFS_V1;
-set_qf_format:
+ return -1;
+ } else if (m->flags & MOPT_QFMT) {
if (sb_any_quota_loaded(sb) &&
- sbi->s_jquota_fmt != qfmt) {
- ext4_msg(sb, KERN_ERR, "Cannot change "
- "journaled quota options when "
- "quota turned on");
- return 0;
- }
- sbi->s_jquota_fmt = qfmt;
- break;
- case Opt_quota:
- case Opt_usrquota:
- set_opt(sb, QUOTA);
- set_opt(sb, USRQUOTA);
- break;
- case Opt_grpquota:
- set_opt(sb, QUOTA);
- set_opt(sb, GRPQUOTA);
- break;
- case Opt_noquota:
- if (sb_any_quota_loaded(sb)) {
- ext4_msg(sb, KERN_ERR, "Cannot change quota "
- "options when quota turned on");
- return 0;
+ sbi->s_jquota_fmt != m->mount_opt) {
+ ext4_msg(sb, KERN_ERR, "Cannot "
+ "change journaled quota options "
+ "when quota turned on");
+ return -1;
}
- clear_opt(sb, QUOTA);
- clear_opt(sb, USRQUOTA);
- clear_opt(sb, GRPQUOTA);
- break;
-#else
- case Opt_quota:
- case Opt_usrquota:
- case Opt_grpquota:
- ext4_msg(sb, KERN_ERR,
- "quota options not supported");
- break;
- case Opt_usrjquota:
- case Opt_grpjquota:
- case Opt_offusrjquota:
- case Opt_offgrpjquota:
- case Opt_jqfmt_vfsold:
- case Opt_jqfmt_vfsv0:
- case Opt_jqfmt_vfsv1:
- ext4_msg(sb, KERN_ERR,
- "journaled quota options not supported");
- break;
- case Opt_noquota:
- break;
+ sbi->s_jquota_fmt = m->mount_opt;
#endif
- case Opt_abort:
- sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
- break;
- case Opt_nobarrier:
- clear_opt(sb, BARRIER);
- break;
- case Opt_barrier:
- if (args[0].from) {
- if (match_int(&args[0], &option))
- return 0;
- } else
- option = 1; /* No argument, default to 1 */
- if (option)
- set_opt(sb, BARRIER);
- else
- clear_opt(sb, BARRIER);
- break;
- case Opt_ignore:
- break;
- case Opt_resize:
- if (!is_remount) {
- ext4_msg(sb, KERN_ERR,
- "resize option only available "
- "for remount");
- return 0;
- }
- if (match_int(&args[0], &option) != 0)
- return 0;
- *n_blocks_count = option;
- break;
- case Opt_nobh:
- ext4_msg(sb, KERN_WARNING,
- "Ignoring deprecated nobh option");
- break;
- case Opt_bh:
- ext4_msg(sb, KERN_WARNING,
- "Ignoring deprecated bh option");
- break;
- case Opt_i_version:
- set_opt(sb, I_VERSION);
- sb->s_flags |= MS_I_VERSION;
- break;
- case Opt_nodelalloc:
- clear_opt(sb, DELALLOC);
- clear_opt2(sb, EXPLICIT_DELALLOC);
- break;
- case Opt_mblk_io_submit:
- set_opt(sb, MBLK_IO_SUBMIT);
- break;
- case Opt_nomblk_io_submit:
- clear_opt(sb, MBLK_IO_SUBMIT);
- break;
- case Opt_stripe:
- if (match_int(&args[0], &option))
- return 0;
- if (option < 0)
- return 0;
- sbi->s_stripe = option;
- break;
- case Opt_delalloc:
- set_opt(sb, DELALLOC);
- set_opt2(sb, EXPLICIT_DELALLOC);
- break;
- case Opt_block_validity:
- set_opt(sb, BLOCK_VALIDITY);
- break;
- case Opt_noblock_validity:
- clear_opt(sb, BLOCK_VALIDITY);
- break;
- case Opt_inode_readahead_blks:
- if (match_int(&args[0], &option))
- return 0;
- if (option < 0 || option > (1 << 30))
- return 0;
- if (option && !is_power_of_2(option)) {
- ext4_msg(sb, KERN_ERR,
- "EXT4-fs: inode_readahead_blks"
- " must be a power of 2");
- return 0;
+ } else {
+ if (!args->from)
+ arg = 1;
+ if (m->flags & MOPT_CLEAR)
+ arg = !arg;
+ else if (unlikely(!(m->flags & MOPT_SET))) {
+ ext4_msg(sb, KERN_WARNING,
+ "buggy handling of option %s", opt);
+ WARN_ON(1);
+ return -1;
}
- sbi->s_inode_readahead_blks = option;
- break;
- case Opt_journal_ioprio:
- if (match_int(&args[0], &option))
- return 0;
- if (option < 0 || option > 7)
- break;
- *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
- option);
- break;
- case Opt_noauto_da_alloc:
- set_opt(sb, NO_AUTO_DA_ALLOC);
- break;
- case Opt_auto_da_alloc:
- if (args[0].from) {
- if (match_int(&args[0], &option))
- return 0;
- } else
- option = 1; /* No argument, default to 1 */
- if (option)
- clear_opt(sb, NO_AUTO_DA_ALLOC);
+ if (arg != 0)
+ sbi->s_mount_opt |= m->mount_opt;
else
- set_opt(sb,NO_AUTO_DA_ALLOC);
- break;
- case Opt_discard:
- set_opt(sb, DISCARD);
- break;
- case Opt_nodiscard:
- clear_opt(sb, DISCARD);
- break;
- case Opt_dioread_nolock:
- set_opt(sb, DIOREAD_NOLOCK);
- break;
- case Opt_dioread_lock:
- clear_opt(sb, DIOREAD_NOLOCK);
- break;
- case Opt_init_itable:
- set_opt(sb, INIT_INODE_TABLE);
- if (args[0].from) {
- if (match_int(&args[0], &option))
- return 0;
- } else
- option = EXT4_DEF_LI_WAIT_MULT;
- if (option < 0)
- return 0;
- sbi->s_li_wait_mult = option;
- break;
- case Opt_noinit_itable:
- clear_opt(sb, INIT_INODE_TABLE);
- break;
- default:
- ext4_msg(sb, KERN_ERR,
- "Unrecognized mount option \"%s\" "
- "or missing value", p);
- return 0;
+ sbi->s_mount_opt &= ~m->mount_opt;
}
+ return 1;
+ }
+ ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
+ "or missing value", opt);
+ return -1;
+}
+
+static int parse_options(char *options, struct super_block *sb,
+ unsigned long *journal_devnum,
+ unsigned int *journal_ioprio,
+ int is_remount)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int token;
+
+ if (!options)
+ return 1;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ if (!*p)
+ continue;
+ /*
+ * Initialize args struct so we know whether arg was
+ * found; some options take optional arguments.
+ */
+ args[0].to = args[0].from = 0;
+ token = match_token(p, tokens, args);
+ if (handle_mount_opt(sb, p, token, args, journal_devnum,
+ journal_ioprio, is_remount) < 0)
+ return 0;
}
#ifdef CONFIG_QUOTA
if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
@@ -1942,6 +1651,160 @@ set_qf_format:
return 1;
}
+static inline void ext4_show_quota_options(struct seq_file *seq,
+ struct super_block *sb)
+{
+#if defined(CONFIG_QUOTA)
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (sbi->s_jquota_fmt) {
+ char *fmtname = "";
+
+ switch (sbi->s_jquota_fmt) {
+ case QFMT_VFS_OLD:
+ fmtname = "vfsold";
+ break;
+ case QFMT_VFS_V0:
+ fmtname = "vfsv0";
+ break;
+ case QFMT_VFS_V1:
+ fmtname = "vfsv1";
+ break;
+ }
+ seq_printf(seq, ",jqfmt=%s", fmtname);
+ }
+
+ if (sbi->s_qf_names[USRQUOTA])
+ seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
+
+ if (sbi->s_qf_names[GRPQUOTA])
+ seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
+
+ if (test_opt(sb, USRQUOTA))
+ seq_puts(seq, ",usrquota");
+
+ if (test_opt(sb, GRPQUOTA))
+ seq_puts(seq, ",grpquota");
+#endif
+}
+
+static const char *token2str(int token)
+{
+ static const struct match_token *t;
+
+ for (t = tokens; t->token != Opt_err; t++)
+ if (t->token == token && !strchr(t->pattern, '='))
+ break;
+ return t->pattern;
+}
+
+/*
+ * Show an option if
+ * - it's set to a non-default value OR
+ * - if the per-sb default is different from the global default
+ */
+static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
+ int nodefs)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
+ const struct mount_opts *m;
+ char sep = nodefs ? '\n' : ',';
+
+#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
+#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
+
+ if (sbi->s_sb_block != 1)
+ SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
+
+ for (m = ext4_mount_opts; m->token != Opt_err; m++) {
+ int want_set = m->flags & MOPT_SET;
+ if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
+ (m->flags & MOPT_CLEAR_ERR))
+ continue;
+ if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
+ continue; /* skip if same as the default */
+ if ((want_set &&
+ (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
+ (!want_set && (sbi->s_mount_opt & m->mount_opt)))
+ continue; /* select Opt_noFoo vs Opt_Foo */
+ SEQ_OPTS_PRINT("%s", token2str(m->token));
+ }
+
+ if (nodefs || sbi->s_resuid != EXT4_DEF_RESUID ||
+ le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
+ SEQ_OPTS_PRINT("resuid=%u", sbi->s_resuid);
+ if (nodefs || sbi->s_resgid != EXT4_DEF_RESGID ||
+ le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
+ SEQ_OPTS_PRINT("resgid=%u", sbi->s_resgid);
+ def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
+ if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
+ SEQ_OPTS_PUTS("errors=remount-ro");
+ if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
+ SEQ_OPTS_PUTS("errors=continue");
+ if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
+ SEQ_OPTS_PUTS("errors=panic");
+ if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
+ SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
+ if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
+ SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
+ if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
+ SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
+ if (sb->s_flags & MS_I_VERSION)
+ SEQ_OPTS_PUTS("i_version");
+ if (nodefs || sbi->s_stripe)
+ SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
+ if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+ SEQ_OPTS_PUTS("data=journal");
+ else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+ SEQ_OPTS_PUTS("data=ordered");
+ else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
+ SEQ_OPTS_PUTS("data=writeback");
+ }
+ if (nodefs ||
+ sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
+ SEQ_OPTS_PRINT("inode_readahead_blks=%u",
+ sbi->s_inode_readahead_blks);
+
+ if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
+ (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
+ SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
+
+ ext4_show_quota_options(seq, sb);
+ return 0;
+}
+
+static int ext4_show_options(struct seq_file *seq, struct dentry *root)
+{
+ return _ext4_show_options(seq, root->d_sb, 0);
+}
+
+static int options_seq_show(struct seq_file *seq, void *offset)
+{
+ struct super_block *sb = seq->private;
+ int rc;
+
+ seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
+ rc = _ext4_show_options(seq, sb, 1);
+ seq_puts(seq, "\n");
+ return rc;
+}
+
+static int options_open_fs(struct inode *inode, struct file *file)
+{
+ return single_open(file, options_seq_show, PDE(inode)->data);
+}
+
+static const struct file_operations ext4_seq_options_fops = {
+ .owner = THIS_MODULE,
+ .open = options_open_fs,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
int read_only)
{
@@ -2945,7 +2808,7 @@ static int ext4_run_lazyinit_thread(void)
ext4_clear_request_list();
kfree(ext4_li_info);
ext4_li_info = NULL;
- printk(KERN_CRIT "EXT4: error %d creating inode table "
+ printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
"initialization thread\n",
err);
return err;
@@ -3183,11 +3046,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sb, INIT_INODE_TABLE);
if (def_mount_opts & EXT4_DEFM_DEBUG)
set_opt(sb, DEBUG);
- if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
- ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
- "2.6.38");
+ if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
set_opt(sb, GRPID);
- }
if (def_mount_opts & EXT4_DEFM_UID16)
set_opt(sb, NO_UID32);
/* xattr user namespace & acls are now defaulted on */
@@ -3240,13 +3100,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
- &journal_devnum, &journal_ioprio, NULL, 0)) {
+ &journal_devnum, &journal_ioprio, 0)) {
ext4_msg(sb, KERN_WARNING,
"failed to parse options in superblock: %s",
sbi->s_es->s_mount_opts);
}
+ sbi->s_def_mount_opt = sbi->s_mount_opt;
if (!parse_options((char *) data, sb, &journal_devnum,
- &journal_ioprio, NULL, 0))
+ &journal_ioprio, 0))
goto failed_mount;
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
@@ -3416,7 +3277,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#else
es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
#endif
- sb->s_dirt = 1;
}
/* Handle clustersize */
@@ -3540,6 +3400,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (ext4_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
+ if (sbi->s_proc)
+ proc_create_data("options", S_IRUGO, sbi->s_proc,
+ &ext4_seq_options_fops, sb);
+
bgl_lock_init(sbi->s_blockgroup_lock);
for (i = 0; i < db_count; i++) {
@@ -3694,6 +3558,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
+ sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
+
/*
* The journal may have updated the bg summary counts, so we
* need to update the global counters.
@@ -3861,6 +3727,7 @@ failed_mount2:
ext4_kvfree(sbi->s_group_desc);
failed_mount:
if (sbi->s_proc) {
+ remove_proc_entry("options", sbi->s_proc);
remove_proc_entry(sb->s_id, ext4_proc_root);
}
#ifdef CONFIG_QUOTA
@@ -4090,15 +3957,6 @@ static int ext4_load_journal(struct super_block *sb,
if (!(journal->j_flags & JBD2_BARRIER))
ext4_msg(sb, KERN_INFO, "barriers disabled");
- if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
- err = jbd2_journal_update_format(journal);
- if (err) {
- ext4_msg(sb, KERN_ERR, "error updating journal");
- jbd2_journal_destroy(journal);
- return err;
- }
- }
-
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
err = jbd2_journal_wipe(journal, !really_read_only);
if (!err) {
@@ -4385,7 +4243,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
{
struct ext4_super_block *es;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_fsblk_t n_blocks_count = 0;
unsigned long old_sb_flags;
struct ext4_mount_options old_opts;
int enable_quota = 0;
@@ -4418,8 +4275,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
/*
* Allow the "check" option to be passed as a remount option.
*/
- if (!parse_options(data, sb, NULL, &journal_ioprio,
- &n_blocks_count, 1)) {
+ if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
err = -EINVAL;
goto restore_opts;
}
@@ -4437,8 +4293,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
}
- if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
- n_blocks_count > ext4_blocks_count(es)) {
+ if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
err = -EROFS;
goto restore_opts;
@@ -4513,8 +4368,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (sbi->s_journal)
ext4_clear_journal_err(sb, es);
sbi->s_mount_state = le16_to_cpu(es->s_state);
- if ((err = ext4_group_extend(sb, es, n_blocks_count)))
- goto restore_opts;
if (!ext4_setup_super(sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 93a00d89a220..e88748e55c0f 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -82,8 +82,8 @@
printk("\n"); \
} while (0)
#else
-# define ea_idebug(f...)
-# define ea_bdebug(f...)
+# define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
+# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
static void ext4_xattr_cache_insert(struct buffer_head *);
@@ -158,13 +158,10 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
static inline int
ext4_xattr_check_block(struct buffer_head *bh)
{
- int error;
-
if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1))
return -EIO;
- error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
- return error;
+ return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
}
static inline int
@@ -220,7 +217,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
error = -ENODATA;
if (!EXT4_I(inode)->i_file_acl)
goto cleanup;
- ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl);
+ ea_idebug(inode, "reading block %llu",
+ (unsigned long long)EXT4_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
if (!bh)
goto cleanup;
@@ -363,7 +361,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
error = 0;
if (!EXT4_I(inode)->i_file_acl)
goto cleanup;
- ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl);
+ ea_idebug(inode, "reading block %llu",
+ (unsigned long long)EXT4_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
error = -EIO;
if (!bh)
@@ -487,18 +486,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
+ unlock_buffer(bh);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+ if (ce)
+ mb_cache_entry_release(ce);
+ unlock_buffer(bh);
error = ext4_handle_dirty_metadata(handle, inode, bh);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
dquot_free_block(inode, 1);
ea_bdebug(bh, "refcount now=%d; releasing",
le32_to_cpu(BHDR(bh)->h_refcount));
- if (ce)
- mb_cache_entry_release(ce);
}
- unlock_buffer(bh);
out:
ext4_std_error(inode->i_sb, error);
return;
@@ -834,7 +834,8 @@ inserted:
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
- ea_idebug(inode, "creating block %d", block);
+ ea_idebug(inode, "creating block %llu",
+ (unsigned long long)block);
new_bh = sb_getblk(sb, block);
if (!new_bh) {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 22764c7c8382..75e7c1f3a080 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -32,20 +32,20 @@ void set_close_on_exec(unsigned int fd, int flag)
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
if (flag)
- FD_SET(fd, fdt->close_on_exec);
+ __set_close_on_exec(fd, fdt);
else
- FD_CLR(fd, fdt->close_on_exec);
+ __clear_close_on_exec(fd, fdt);
spin_unlock(&files->file_lock);
}
-static int get_close_on_exec(unsigned int fd)
+static bool get_close_on_exec(unsigned int fd)
{
struct files_struct *files = current->files;
struct fdtable *fdt;
- int res;
+ bool res;
rcu_read_lock();
fdt = files_fdtable(files);
- res = FD_ISSET(fd, fdt->close_on_exec);
+ res = close_on_exec(fd, fdt);
rcu_read_unlock();
return res;
}
@@ -90,15 +90,15 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
err = -EBUSY;
fdt = files_fdtable(files);
tofree = fdt->fd[newfd];
- if (!tofree && FD_ISSET(newfd, fdt->open_fds))
+ if (!tofree && fd_is_open(newfd, fdt))
goto out_unlock;
get_file(file);
rcu_assign_pointer(fdt->fd[newfd], file);
- FD_SET(newfd, fdt->open_fds);
+ __set_open_fd(newfd, fdt);
if (flags & O_CLOEXEC)
- FD_SET(newfd, fdt->close_on_exec);
+ __set_close_on_exec(newfd, fdt);
else
- FD_CLR(newfd, fdt->close_on_exec);
+ __clear_close_on_exec(newfd, fdt);
spin_unlock(&files->file_lock);
if (tofree)
diff --git a/fs/file.c b/fs/file.c
index 3c426de7203a..ba3f6053025c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -40,7 +40,7 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */
*/
static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
-static void *alloc_fdmem(unsigned int size)
+static void *alloc_fdmem(size_t size)
{
/*
* Very large allocations can stress page reclaim, so fall back to
@@ -142,7 +142,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
static struct fdtable * alloc_fdtable(unsigned int nr)
{
struct fdtable *fdt;
- char *data;
+ void *data;
/*
* Figure out how many fds we actually want to support in this fdtable.
@@ -172,14 +172,15 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
data = alloc_fdmem(nr * sizeof(struct file *));
if (!data)
goto out_fdt;
- fdt->fd = (struct file **)data;
- data = alloc_fdmem(max_t(unsigned int,
+ fdt->fd = data;
+
+ data = alloc_fdmem(max_t(size_t,
2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
if (!data)
goto out_arr;
- fdt->open_fds = (fd_set *)data;
+ fdt->open_fds = data;
data += nr / BITS_PER_BYTE;
- fdt->close_on_exec = (fd_set *)data;
+ fdt->close_on_exec = data;
fdt->next = NULL;
return fdt;
@@ -275,11 +276,11 @@ static int count_open_files(struct fdtable *fdt)
int i;
/* Find the last open fd */
- for (i = size/(8*sizeof(long)); i > 0; ) {
- if (fdt->open_fds->fds_bits[--i])
+ for (i = size / BITS_PER_LONG; i > 0; ) {
+ if (fdt->open_fds[--i])
break;
}
- i = (i+1) * 8 * sizeof(long);
+ i = (i + 1) * BITS_PER_LONG;
return i;
}
@@ -306,8 +307,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
newf->next_fd = 0;
new_fdt = &newf->fdtab;
new_fdt->max_fds = NR_OPEN_DEFAULT;
- new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
- new_fdt->open_fds = (fd_set *)&newf->open_fds_init;
+ new_fdt->close_on_exec = newf->close_on_exec_init;
+ new_fdt->open_fds = newf->open_fds_init;
new_fdt->fd = &newf->fd_array[0];
new_fdt->next = NULL;
@@ -350,10 +351,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
old_fds = old_fdt->fd;
new_fds = new_fdt->fd;
- memcpy(new_fdt->open_fds->fds_bits,
- old_fdt->open_fds->fds_bits, open_files/8);
- memcpy(new_fdt->close_on_exec->fds_bits,
- old_fdt->close_on_exec->fds_bits, open_files/8);
+ memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
+ memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
for (i = open_files; i != 0; i--) {
struct file *f = *old_fds++;
@@ -366,7 +365,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
* is partway through open(). So make sure that this
* fd is available to the new process.
*/
- FD_CLR(open_files - i, new_fdt->open_fds);
+ __clear_open_fd(open_files - i, new_fdt);
}
rcu_assign_pointer(*new_fds++, f);
}
@@ -379,11 +378,11 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
memset(new_fds, 0, size);
if (new_fdt->max_fds > open_files) {
- int left = (new_fdt->max_fds-open_files)/8;
- int start = open_files / (8 * sizeof(unsigned long));
+ int left = (new_fdt->max_fds - open_files) / 8;
+ int start = open_files / BITS_PER_LONG;
- memset(&new_fdt->open_fds->fds_bits[start], 0, left);
- memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
+ memset(&new_fdt->open_fds[start], 0, left);
+ memset(&new_fdt->close_on_exec[start], 0, left);
}
rcu_assign_pointer(newf->fdt, new_fdt);
@@ -419,8 +418,8 @@ struct files_struct init_files = {
.fdtab = {
.max_fds = NR_OPEN_DEFAULT,
.fd = &init_files.fd_array[0],
- .close_on_exec = (fd_set *)&init_files.close_on_exec_init,
- .open_fds = (fd_set *)&init_files.open_fds_init,
+ .close_on_exec = init_files.close_on_exec_init,
+ .open_fds = init_files.open_fds_init,
},
.file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
};
@@ -443,8 +442,7 @@ repeat:
fd = files->next_fd;
if (fd < fdt->max_fds)
- fd = find_next_zero_bit(fdt->open_fds->fds_bits,
- fdt->max_fds, fd);
+ fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
error = expand_files(files, fd);
if (error < 0)
@@ -460,11 +458,11 @@ repeat:
if (start <= files->next_fd)
files->next_fd = fd + 1;
- FD_SET(fd, fdt->open_fds);
+ __set_open_fd(fd, fdt);
if (flags & O_CLOEXEC)
- FD_SET(fd, fdt->close_on_exec);
+ __set_close_on_exec(fd, fdt);
else
- FD_CLR(fd, fdt->close_on_exec);
+ __clear_close_on_exec(fd, fdt);
error = fd;
#if 1
/* Sanity check */
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 236972b752f5..539f36cf3e4a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -256,7 +256,8 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
}
/*
- * Move expired dirty inodes from @delaying_queue to @dispatch_queue.
+ * Move expired (dirtied after work->older_than_this) dirty inodes from
+ * @delaying_queue to @dispatch_queue.
*/
static int move_expired_inodes(struct list_head *delaying_queue,
struct list_head *dispatch_queue,
@@ -1148,23 +1149,6 @@ out_unlock_inode:
}
EXPORT_SYMBOL(__mark_inode_dirty);
-/*
- * Write out a superblock's list of dirty inodes. A wait will be performed
- * upon no inodes, all inodes or the final one, depending upon sync_mode.
- *
- * If older_than_this is non-NULL, then only write out inodes which
- * had their first dirtying at a time earlier than *older_than_this.
- *
- * If `bdi' is non-zero then we're being asked to writeback a specific queue.
- * This function assumes that the blockdev superblock's inodes are backed by
- * a variety of queues, so all inodes are searched. For other superblocks,
- * assume that all inodes are backed by the same queue.
- *
- * The inodes to be written are parked on bdi->b_io. They are moved back onto
- * bdi->b_dirty as they are selected for writing. This way, none can be missed
- * on the writer throttling path, and we get decent balancing between many
- * throttled threads: we don't want them all piling up on inode_sync_wait.
- */
static void wait_sb_inodes(struct super_block *sb)
{
struct inode *inode, *old_inode = NULL;
@@ -1364,8 +1348,6 @@ int write_inode_now(struct inode *inode, int sync)
ret = writeback_single_inode(inode, wb, &wbc);
spin_unlock(&inode->i_lock);
spin_unlock(&wb->list_lock);
- if (sync)
- inode_sync_wait(inode);
return ret;
}
EXPORT_SYMBOL(write_inode_now);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index d49d202903fb..c78841ee81cf 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -88,14 +88,13 @@ static inline void __buffer_relink_io(struct journal_head *jh)
* whole transaction.
*
* Requires j_list_lock
- * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/
static int __try_to_free_cp_buf(struct journal_head *jh)
{
int ret = 0;
struct buffer_head *bh = jh2bh(jh);
- if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
+ if (jh->b_transaction == NULL && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
/*
* Get our reference so that bh cannot be freed before
@@ -104,11 +103,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
- jbd_unlock_bh_state(bh);
BUFFER_TRACE(bh, "release");
__brelse(bh);
- } else {
- jbd_unlock_bh_state(bh);
}
return ret;
}
@@ -180,21 +176,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
}
/*
- * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
- * The caller must restart a list walk. Wait for someone else to run
- * jbd_unlock_bh_state().
- */
-static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
- __releases(journal->j_list_lock)
-{
- get_bh(bh);
- spin_unlock(&journal->j_list_lock);
- jbd_lock_bh_state(bh);
- jbd_unlock_bh_state(bh);
- put_bh(bh);
-}
-
-/*
* Clean up transaction's list of buffers submitted for io.
* We wait for any pending IO to complete and remove any clean
* buffers. Note that we take the buffers in the opposite ordering
@@ -222,15 +203,9 @@ restart:
while (!released && transaction->t_checkpoint_io_list) {
jh = transaction->t_checkpoint_io_list;
bh = jh2bh(jh);
- if (!jbd_trylock_bh_state(bh)) {
- jbd_sync_bh(journal, bh);
- spin_lock(&journal->j_list_lock);
- goto restart;
- }
get_bh(bh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");
@@ -246,7 +221,6 @@ restart:
* it has been written out and so we can drop it from the list
*/
released = __jbd2_journal_remove_checkpoint(jh);
- jbd_unlock_bh_state(bh);
__brelse(bh);
}
@@ -266,7 +240,6 @@ __flush_batch(journal_t *journal, int *batch_count)
for (i = 0; i < *batch_count; i++) {
struct buffer_head *bh = journal->j_chkpt_bhs[i];
- clear_buffer_jwrite(bh);
BUFFER_TRACE(bh, "brelse");
__brelse(bh);
}
@@ -281,7 +254,6 @@ __flush_batch(journal_t *journal, int *batch_count)
* be written out.
*
* Called with j_list_lock held and drops it if 1 is returned
- * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/
static int __process_buffer(journal_t *journal, struct journal_head *jh,
int *batch_count, transaction_t *transaction)
@@ -292,7 +264,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
if (buffer_locked(bh)) {
get_bh(bh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");
@@ -304,7 +275,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
transaction->t_chp_stats.cs_forced_to_close++;
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
if (unlikely(journal->j_flags & JBD2_UNMOUNT))
/*
* The journal thread is dead; so starting and
@@ -323,11 +293,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;
get_bh(bh);
- J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint");
__jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
__brelse(bh);
} else {
/*
@@ -340,10 +308,8 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
BUFFER_TRACE(bh, "queue");
get_bh(bh);
J_ASSERT_BH(bh, !buffer_jwrite(bh));
- set_buffer_jwrite(bh);
journal->j_chkpt_bhs[*batch_count] = bh;
__buffer_relink_io(jh);
- jbd_unlock_bh_state(bh);
transaction->t_chp_stats.cs_written++;
(*batch_count)++;
if (*batch_count == JBD2_NR_BATCH) {
@@ -407,15 +373,7 @@ restart:
int retry = 0, err;
while (!retry && transaction->t_checkpoint_list) {
- struct buffer_head *bh;
-
jh = transaction->t_checkpoint_list;
- bh = jh2bh(jh);
- if (!jbd_trylock_bh_state(bh)) {
- jbd_sync_bh(journal, bh);
- retry = 1;
- break;
- }
retry = __process_buffer(journal, jh, &batch_count,
transaction);
if (retry < 0 && !result)
@@ -478,79 +436,28 @@ out:
int jbd2_cleanup_journal_tail(journal_t *journal)
{
- transaction_t * transaction;
tid_t first_tid;
- unsigned long blocknr, freed;
+ unsigned long blocknr;
if (is_journal_aborted(journal))
return 1;
- /* OK, work out the oldest transaction remaining in the log, and
- * the log block it starts at.
- *
- * If the log is now empty, we need to work out which is the
- * next transaction ID we will write, and where it will
- * start. */
-
- write_lock(&journal->j_state_lock);
- spin_lock(&journal->j_list_lock);
- transaction = journal->j_checkpoint_transactions;
- if (transaction) {
- first_tid = transaction->t_tid;
- blocknr = transaction->t_log_start;
- } else if ((transaction = journal->j_committing_transaction) != NULL) {
- first_tid = transaction->t_tid;
- blocknr = transaction->t_log_start;
- } else if ((transaction = journal->j_running_transaction) != NULL) {
- first_tid = transaction->t_tid;
- blocknr = journal->j_head;
- } else {
- first_tid = journal->j_transaction_sequence;
- blocknr = journal->j_head;
- }
- spin_unlock(&journal->j_list_lock);
- J_ASSERT(blocknr != 0);
-
- /* If the oldest pinned transaction is at the tail of the log
- already then there's not much we can do right now. */
- if (journal->j_tail_sequence == first_tid) {
- write_unlock(&journal->j_state_lock);
+ if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
return 1;
- }
-
- /* OK, update the superblock to recover the freed space.
- * Physical blocks come first: have we wrapped beyond the end of
- * the log? */
- freed = blocknr - journal->j_tail;
- if (blocknr < journal->j_tail)
- freed = freed + journal->j_last - journal->j_first;
-
- trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
- jbd_debug(1,
- "Cleaning journal tail from %d to %d (offset %lu), "
- "freeing %lu\n",
- journal->j_tail_sequence, first_tid, blocknr, freed);
-
- journal->j_free += freed;
- journal->j_tail_sequence = first_tid;
- journal->j_tail = blocknr;
- write_unlock(&journal->j_state_lock);
+ J_ASSERT(blocknr != 0);
/*
- * If there is an external journal, we need to make sure that
- * any data blocks that were recently written out --- perhaps
- * by jbd2_log_do_checkpoint() --- are flushed out before we
- * drop the transactions from the external journal. It's
- * unlikely this will be necessary, especially with a
- * appropriately sized journal, but we need this to guarantee
- * correctness. Fortunately jbd2_cleanup_journal_tail()
- * doesn't get called all that often.
+ * We need to make sure that any blocks that were recently written out
+ * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
+ * we drop the transactions from the journal. It's unlikely this will
+ * be necessary, especially with an appropriately sized journal, but we
+ * need this to guarantee correctness. Fortunately
+ * jbd2_cleanup_journal_tail() doesn't get called all that often.
*/
- if ((journal->j_fs_dev != journal->j_dev) &&
- (journal->j_flags & JBD2_BARRIER))
+ if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
- if (!(journal->j_flags & JBD2_ABORT))
- jbd2_journal_update_superblock(journal, 1);
+
+ __jbd2_update_log_tail(journal, first_tid, blocknr);
return 0;
}
@@ -582,15 +489,12 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
do {
jh = next_jh;
next_jh = jh->b_cpnext;
- /* Use trylock because of the ranking */
- if (jbd_trylock_bh_state(jh2bh(jh))) {
- ret = __try_to_free_cp_buf(jh);
- if (ret) {
- freed++;
- if (ret == 2) {
- *released = 1;
- return freed;
- }
+ ret = __try_to_free_cp_buf(jh);
+ if (ret) {
+ freed++;
+ if (ret == 2) {
+ *released = 1;
+ return freed;
}
}
/*
@@ -673,9 +577,7 @@ out:
* The function can free jh and bh.
*
* This function is called with j_list_lock held.
- * This function is called with jbd_lock_bh_state(jh2bh(jh))
*/
-
int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
{
struct transaction_chp_stats_s *stats;
@@ -722,7 +624,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
transaction->t_tid, stats);
__jbd2_journal_drop_transaction(journal, transaction);
- kfree(transaction);
+ jbd2_journal_free_transaction(transaction);
/* Just in case anybody was waiting for more transactions to be
checkpointed... */
@@ -797,5 +699,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
+ trace_jbd2_drop_transaction(journal, transaction);
+
jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index c067a8cae63b..806525a7269c 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -28,7 +28,6 @@
#include <linux/blkdev.h>
#include <linux/bitops.h>
#include <trace/events/jbd2.h>
-#include <asm/system.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
@@ -331,6 +330,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
struct buffer_head *cbh = NULL; /* For transactional checksums */
__u32 crc32_sum = ~0;
struct blk_plug plug;
+ /* Tail of the journal */
+ unsigned long first_block;
+ tid_t first_tid;
+ int update_tail;
/*
* First job: lock down the current transaction and wait for
@@ -340,7 +343,18 @@ void jbd2_journal_commit_transaction(journal_t *journal)
/* Do we need to erase the effects of a prior jbd2_journal_flush? */
if (journal->j_flags & JBD2_FLUSHED) {
jbd_debug(3, "super block updated\n");
- jbd2_journal_update_superblock(journal, 1);
+ mutex_lock(&journal->j_checkpoint_mutex);
+ /*
+ * We hold j_checkpoint_mutex so tail cannot change under us.
+ * We don't need any special data guarantees for writing sb
+ * since journal is empty and it is ok for write to be
+ * flushed only with transaction commit.
+ */
+ jbd2_journal_update_sb_log_tail(journal,
+ journal->j_tail_sequence,
+ journal->j_tail,
+ WRITE_SYNC);
+ mutex_unlock(&journal->j_checkpoint_mutex);
} else {
jbd_debug(3, "superblock not updated\n");
}
@@ -677,10 +691,30 @@ start_journal_io:
err = 0;
}
+ /*
+ * Get current oldest transaction in the log before we issue flush
+ * to the filesystem device. After the flush we can be sure that
+ * blocks of all older transactions are checkpointed to persistent
+ * storage and we will be safe to update journal start in the
+ * superblock with the numbers we get here.
+ */
+ update_tail =
+ jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
+
write_lock(&journal->j_state_lock);
+ if (update_tail) {
+ long freed = first_block - journal->j_tail;
+
+ if (first_block < journal->j_tail)
+ freed += journal->j_last - journal->j_first;
+ /* Update tail only if we free significant amount of space */
+ if (freed < journal->j_maxlen / 4)
+ update_tail = 0;
+ }
J_ASSERT(commit_transaction->t_state == T_COMMIT);
commit_transaction->t_state = T_COMMIT_DFLUSH;
write_unlock(&journal->j_state_lock);
+
/*
* If the journal is not located on the file system device,
* then we must flush the file system device before we issue
@@ -831,6 +865,14 @@ wait_for_iobuf:
if (err)
jbd2_journal_abort(journal, err);
+ /*
+ * Now disk caches for filesystem device are flushed so we are safe to
+ * erase checkpointed transactions from the log by updating journal
+ * superblock.
+ */
+ if (update_tail)
+ jbd2_update_log_tail(journal, first_tid, first_block);
+
/* End of a transaction! Finally, we can do checkpoint
processing: any buffers committed as a result of this
transaction can be removed from any checkpoint list it was on
@@ -1048,7 +1090,7 @@ restart_loop:
jbd_debug(1, "JBD2: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
if (to_free)
- kfree(commit_transaction);
+ jbd2_journal_free_transaction(commit_transaction);
wake_up(&journal->j_wait_done_commit);
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 839377e3d624..1afb701622b0 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -50,7 +50,6 @@
#include <asm/uaccess.h>
#include <asm/page.h>
-#include <asm/system.h>
EXPORT_SYMBOL(jbd2_journal_extend);
EXPORT_SYMBOL(jbd2_journal_stop);
@@ -71,7 +70,6 @@ EXPORT_SYMBOL(jbd2_journal_revoke);
EXPORT_SYMBOL(jbd2_journal_init_dev);
EXPORT_SYMBOL(jbd2_journal_init_inode);
-EXPORT_SYMBOL(jbd2_journal_update_format);
EXPORT_SYMBOL(jbd2_journal_check_used_features);
EXPORT_SYMBOL(jbd2_journal_check_available_features);
EXPORT_SYMBOL(jbd2_journal_set_features);
@@ -96,7 +94,6 @@ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
EXPORT_SYMBOL(jbd2_inode_cache);
-static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
static void __journal_abort_soft (journal_t *journal, int errno);
static int jbd2_journal_create_slab(size_t slab_size);
@@ -746,6 +743,98 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
return jbd2_journal_add_journal_head(bh);
}
+/*
+ * Return tid of the oldest transaction in the journal and block in the journal
+ * where the transaction starts.
+ *
+ * If the journal is now empty, return which will be the next transaction ID
+ * we will write and where will that transaction start.
+ *
+ * The return value is 0 if journal tail cannot be pushed any further, 1 if
+ * it can.
+ */
+int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
+ unsigned long *block)
+{
+ transaction_t *transaction;
+ int ret;
+
+ read_lock(&journal->j_state_lock);
+ spin_lock(&journal->j_list_lock);
+ transaction = journal->j_checkpoint_transactions;
+ if (transaction) {
+ *tid = transaction->t_tid;
+ *block = transaction->t_log_start;
+ } else if ((transaction = journal->j_committing_transaction) != NULL) {
+ *tid = transaction->t_tid;
+ *block = transaction->t_log_start;
+ } else if ((transaction = journal->j_running_transaction) != NULL) {
+ *tid = transaction->t_tid;
+ *block = journal->j_head;
+ } else {
+ *tid = journal->j_transaction_sequence;
+ *block = journal->j_head;
+ }
+ ret = tid_gt(*tid, journal->j_tail_sequence);
+ spin_unlock(&journal->j_list_lock);
+ read_unlock(&journal->j_state_lock);
+
+ return ret;
+}
+
+/*
+ * Update information in journal structure and in on disk journal superblock
+ * about log tail. This function does not check whether information passed in
+ * really pushes log tail further. It's responsibility of the caller to make
+ * sure provided log tail information is valid (e.g. by holding
+ * j_checkpoint_mutex all the time between computing log tail and calling this
+ * function as is the case with jbd2_cleanup_journal_tail()).
+ *
+ * Requires j_checkpoint_mutex
+ */
+void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
+{
+ unsigned long freed;
+
+ BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
+
+ /*
+ * We cannot afford for write to remain in drive's caches since as
+ * soon as we update j_tail, next transaction can start reusing journal
+ * space and if we lose sb update during power failure we'd replay
+ * old transaction with possibly newly overwritten data.
+ */
+ jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
+ write_lock(&journal->j_state_lock);
+ freed = block - journal->j_tail;
+ if (block < journal->j_tail)
+ freed += journal->j_last - journal->j_first;
+
+ trace_jbd2_update_log_tail(journal, tid, block, freed);
+ jbd_debug(1,
+ "Cleaning journal tail from %d to %d (offset %lu), "
+ "freeing %lu\n",
+ journal->j_tail_sequence, tid, block, freed);
+
+ journal->j_free += freed;
+ journal->j_tail_sequence = tid;
+ journal->j_tail = block;
+ write_unlock(&journal->j_state_lock);
+}
+
+/*
+ * This is a variaon of __jbd2_update_log_tail which checks for validity of
+ * provided log tail and locks j_checkpoint_mutex. So it is safe against races
+ * with other threads updating log tail.
+ */
+void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
+{
+ mutex_lock(&journal->j_checkpoint_mutex);
+ if (tid_gt(tid, journal->j_tail_sequence))
+ __jbd2_update_log_tail(journal, tid, block);
+ mutex_unlock(&journal->j_checkpoint_mutex);
+}
+
struct jbd2_stats_proc_session {
journal_t *journal;
struct transaction_stats_s *stats;
@@ -1114,40 +1203,45 @@ static int journal_reset(journal_t *journal)
journal->j_max_transaction_buffers = journal->j_maxlen / 4;
- /* Add the dynamic fields and write it to disk. */
- jbd2_journal_update_superblock(journal, 1);
- return jbd2_journal_start_thread(journal);
-}
-
-/**
- * void jbd2_journal_update_superblock() - Update journal sb on disk.
- * @journal: The journal to update.
- * @wait: Set to '0' if you don't want to wait for IO completion.
- *
- * Update a journal's dynamic superblock fields and write it to disk,
- * optionally waiting for the IO to complete.
- */
-void jbd2_journal_update_superblock(journal_t *journal, int wait)
-{
- journal_superblock_t *sb = journal->j_superblock;
- struct buffer_head *bh = journal->j_sb_buffer;
-
/*
* As a special case, if the on-disk copy is already marked as needing
- * no recovery (s_start == 0) and there are no outstanding transactions
- * in the filesystem, then we can safely defer the superblock update
- * until the next commit by setting JBD2_FLUSHED. This avoids
+ * no recovery (s_start == 0), then we can safely defer the superblock
+ * update until the next commit by setting JBD2_FLUSHED. This avoids
* attempting a write to a potential-readonly device.
*/
- if (sb->s_start == 0 && journal->j_tail_sequence ==
- journal->j_transaction_sequence) {
+ if (sb->s_start == 0) {
jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
"(start %ld, seq %d, errno %d)\n",
journal->j_tail, journal->j_tail_sequence,
journal->j_errno);
- goto out;
+ journal->j_flags |= JBD2_FLUSHED;
+ } else {
+ /* Lock here to make assertions happy... */
+ mutex_lock(&journal->j_checkpoint_mutex);
+ /*
+ * Update log tail information. We use WRITE_FUA since new
+ * transaction will start reusing journal space and so we
+ * must make sure information about current log tail is on
+ * disk before that.
+ */
+ jbd2_journal_update_sb_log_tail(journal,
+ journal->j_tail_sequence,
+ journal->j_tail,
+ WRITE_FUA);
+ mutex_unlock(&journal->j_checkpoint_mutex);
}
+ return jbd2_journal_start_thread(journal);
+}
+static void jbd2_write_superblock(journal_t *journal, int write_op)
+{
+ struct buffer_head *bh = journal->j_sb_buffer;
+ int ret;
+
+ trace_jbd2_write_superblock(journal, write_op);
+ if (!(journal->j_flags & JBD2_BARRIER))
+ write_op &= ~(REQ_FUA | REQ_FLUSH);
+ lock_buffer(bh);
if (buffer_write_io_error(bh)) {
/*
* Oh, dear. A previous attempt to write the journal
@@ -1163,48 +1257,106 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
clear_buffer_write_io_error(bh);
set_buffer_uptodate(bh);
}
+ get_bh(bh);
+ bh->b_end_io = end_buffer_write_sync;
+ ret = submit_bh(write_op, bh);
+ wait_on_buffer(bh);
+ if (buffer_write_io_error(bh)) {
+ clear_buffer_write_io_error(bh);
+ set_buffer_uptodate(bh);
+ ret = -EIO;
+ }
+ if (ret) {
+ printk(KERN_ERR "JBD2: Error %d detected when updating "
+ "journal superblock for %s.\n", ret,
+ journal->j_devname);
+ }
+}
+
+/**
+ * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
+ * @journal: The journal to update.
+ * @tail_tid: TID of the new transaction at the tail of the log
+ * @tail_block: The first block of the transaction at the tail of the log
+ * @write_op: With which operation should we write the journal sb
+ *
+ * Update a journal's superblock information about log tail and write it to
+ * disk, waiting for the IO to complete.
+ */
+void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
+ unsigned long tail_block, int write_op)
+{
+ journal_superblock_t *sb = journal->j_superblock;
+
+ BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
+ jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
+ tail_block, tail_tid);
+
+ sb->s_sequence = cpu_to_be32(tail_tid);
+ sb->s_start = cpu_to_be32(tail_block);
+
+ jbd2_write_superblock(journal, write_op);
+
+ /* Log is no longer empty */
+ write_lock(&journal->j_state_lock);
+ WARN_ON(!sb->s_sequence);
+ journal->j_flags &= ~JBD2_FLUSHED;
+ write_unlock(&journal->j_state_lock);
+}
+
+/**
+ * jbd2_mark_journal_empty() - Mark on disk journal as empty.
+ * @journal: The journal to update.
+ *
+ * Update a journal's dynamic superblock fields to show that journal is empty.
+ * Write updated superblock to disk waiting for IO to complete.
+ */
+static void jbd2_mark_journal_empty(journal_t *journal)
+{
+ journal_superblock_t *sb = journal->j_superblock;
+ BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
read_lock(&journal->j_state_lock);
- jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n",
- journal->j_tail, journal->j_tail_sequence, journal->j_errno);
+ jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
+ journal->j_tail_sequence);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
- sb->s_start = cpu_to_be32(journal->j_tail);
- sb->s_errno = cpu_to_be32(journal->j_errno);
+ sb->s_start = cpu_to_be32(0);
read_unlock(&journal->j_state_lock);
- BUFFER_TRACE(bh, "marking dirty");
- mark_buffer_dirty(bh);
- if (wait) {
- sync_dirty_buffer(bh);
- if (buffer_write_io_error(bh)) {
- printk(KERN_ERR "JBD2: I/O error detected "
- "when updating journal superblock for %s.\n",
- journal->j_devname);
- clear_buffer_write_io_error(bh);
- set_buffer_uptodate(bh);
- }
- } else
- write_dirty_buffer(bh, WRITE);
-
-out:
- /* If we have just flushed the log (by marking s_start==0), then
- * any future commit will have to be careful to update the
- * superblock again to re-record the true start of the log. */
+ jbd2_write_superblock(journal, WRITE_FUA);
+ /* Log is no longer empty */
write_lock(&journal->j_state_lock);
- if (sb->s_start)
- journal->j_flags &= ~JBD2_FLUSHED;
- else
- journal->j_flags |= JBD2_FLUSHED;
+ journal->j_flags |= JBD2_FLUSHED;
write_unlock(&journal->j_state_lock);
}
+
+/**
+ * jbd2_journal_update_sb_errno() - Update error in the journal.
+ * @journal: The journal to update.
+ *
+ * Update a journal's errno. Write updated superblock to disk waiting for IO
+ * to complete.
+ */
+static void jbd2_journal_update_sb_errno(journal_t *journal)
+{
+ journal_superblock_t *sb = journal->j_superblock;
+
+ read_lock(&journal->j_state_lock);
+ jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
+ journal->j_errno);
+ sb->s_errno = cpu_to_be32(journal->j_errno);
+ read_unlock(&journal->j_state_lock);
+
+ jbd2_write_superblock(journal, WRITE_SYNC);
+}
+
/*
* Read the superblock for a given journal, performing initial
* validation of the format.
*/
-
static int journal_get_superblock(journal_t *journal)
{
struct buffer_head *bh;
@@ -1398,14 +1550,11 @@ int jbd2_journal_destroy(journal_t *journal)
if (journal->j_sb_buffer) {
if (!is_journal_aborted(journal)) {
- /* We can now mark the journal as empty. */
- journal->j_tail = 0;
- journal->j_tail_sequence =
- ++journal->j_transaction_sequence;
- jbd2_journal_update_superblock(journal, 1);
- } else {
+ mutex_lock(&journal->j_checkpoint_mutex);
+ jbd2_mark_journal_empty(journal);
+ mutex_unlock(&journal->j_checkpoint_mutex);
+ } else
err = -EIO;
- }
brelse(journal->j_sb_buffer);
}
@@ -1552,61 +1701,6 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
EXPORT_SYMBOL(jbd2_journal_clear_features);
/**
- * int jbd2_journal_update_format () - Update on-disk journal structure.
- * @journal: Journal to act on.
- *
- * Given an initialised but unloaded journal struct, poke about in the
- * on-disk structure to update it to the most recent supported version.
- */
-int jbd2_journal_update_format (journal_t *journal)
-{
- journal_superblock_t *sb;
- int err;
-
- err = journal_get_superblock(journal);
- if (err)
- return err;
-
- sb = journal->j_superblock;
-
- switch (be32_to_cpu(sb->s_header.h_blocktype)) {
- case JBD2_SUPERBLOCK_V2:
- return 0;
- case JBD2_SUPERBLOCK_V1:
- return journal_convert_superblock_v1(journal, sb);
- default:
- break;
- }
- return -EINVAL;
-}
-
-static int journal_convert_superblock_v1(journal_t *journal,
- journal_superblock_t *sb)
-{
- int offset, blocksize;
- struct buffer_head *bh;
-
- printk(KERN_WARNING
- "JBD2: Converting superblock from version 1 to 2.\n");
-
- /* Pre-initialise new fields to zero */
- offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
- blocksize = be32_to_cpu(sb->s_blocksize);
- memset(&sb->s_feature_compat, 0, blocksize-offset);
-
- sb->s_nr_users = cpu_to_be32(1);
- sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
- journal->j_format_version = 2;
-
- bh = journal->j_sb_buffer;
- BUFFER_TRACE(bh, "marking dirty");
- mark_buffer_dirty(bh);
- sync_dirty_buffer(bh);
- return 0;
-}
-
-
-/**
* int jbd2_journal_flush () - Flush journal
* @journal: Journal to act on.
*
@@ -1619,7 +1713,6 @@ int jbd2_journal_flush(journal_t *journal)
{
int err = 0;
transaction_t *transaction = NULL;
- unsigned long old_tail;
write_lock(&journal->j_state_lock);
@@ -1654,6 +1747,7 @@ int jbd2_journal_flush(journal_t *journal)
if (is_journal_aborted(journal))
return -EIO;
+ mutex_lock(&journal->j_checkpoint_mutex);
jbd2_cleanup_journal_tail(journal);
/* Finally, mark the journal as really needing no recovery.
@@ -1661,14 +1755,9 @@ int jbd2_journal_flush(journal_t *journal)
* the magic code for a fully-recovered superblock. Any future
* commits of data to the journal will restore the current
* s_start value. */
+ jbd2_mark_journal_empty(journal);
+ mutex_unlock(&journal->j_checkpoint_mutex);
write_lock(&journal->j_state_lock);
- old_tail = journal->j_tail;
- journal->j_tail = 0;
- write_unlock(&journal->j_state_lock);
- jbd2_journal_update_superblock(journal, 1);
- write_lock(&journal->j_state_lock);
- journal->j_tail = old_tail;
-
J_ASSERT(!journal->j_running_transaction);
J_ASSERT(!journal->j_committing_transaction);
J_ASSERT(!journal->j_checkpoint_transactions);
@@ -1708,8 +1797,12 @@ int jbd2_journal_wipe(journal_t *journal, int write)
write ? "Clearing" : "Ignoring");
err = jbd2_journal_skip_recovery(journal);
- if (write)
- jbd2_journal_update_superblock(journal, 1);
+ if (write) {
+ /* Lock to make assertions happy... */
+ mutex_lock(&journal->j_checkpoint_mutex);
+ jbd2_mark_journal_empty(journal);
+ mutex_unlock(&journal->j_checkpoint_mutex);
+ }
no_recovery:
return err;
@@ -1759,7 +1852,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
__jbd2_journal_abort_hard(journal);
if (errno)
- jbd2_journal_update_superblock(journal, 1);
+ jbd2_journal_update_sb_errno(journal);
}
/**
@@ -2017,7 +2110,7 @@ static struct kmem_cache *jbd2_journal_head_cache;
static atomic_t nr_journal_heads = ATOMIC_INIT(0);
#endif
-static int journal_init_jbd2_journal_head_cache(void)
+static int jbd2_journal_init_journal_head_cache(void)
{
int retval;
@@ -2035,7 +2128,7 @@ static int journal_init_jbd2_journal_head_cache(void)
return retval;
}
-static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
+static void jbd2_journal_destroy_journal_head_cache(void)
{
if (jbd2_journal_head_cache) {
kmem_cache_destroy(jbd2_journal_head_cache);
@@ -2323,7 +2416,7 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void)
struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
-static int __init journal_init_handle_cache(void)
+static int __init jbd2_journal_init_handle_cache(void)
{
jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
if (jbd2_handle_cache == NULL) {
@@ -2358,17 +2451,20 @@ static int __init journal_init_caches(void)
ret = jbd2_journal_init_revoke_caches();
if (ret == 0)
- ret = journal_init_jbd2_journal_head_cache();
+ ret = jbd2_journal_init_journal_head_cache();
+ if (ret == 0)
+ ret = jbd2_journal_init_handle_cache();
if (ret == 0)
- ret = journal_init_handle_cache();
+ ret = jbd2_journal_init_transaction_cache();
return ret;
}
static void jbd2_journal_destroy_caches(void)
{
jbd2_journal_destroy_revoke_caches();
- jbd2_journal_destroy_jbd2_journal_head_cache();
+ jbd2_journal_destroy_journal_head_cache();
jbd2_journal_destroy_handle_cache();
+ jbd2_journal_destroy_transaction_cache();
jbd2_journal_destroy_slabs();
}
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index da6d7baf1390..c1a03354a22f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
#include <linux/jbd2.h>
#include <linux/errno.h>
#include <linux/crc32.h>
+#include <linux/blkdev.h>
#endif
/*
@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal)
err2 = sync_blockdev(journal->j_fs_dev);
if (!err)
err = err2;
-
+ /* Make sure all replayed data is on permanent storage */
+ if (journal->j_flags & JBD2_BARRIER)
+ blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
return err;
}
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 30b2867d6cc9..6973705d6a3d 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -208,17 +208,13 @@ int __init jbd2_journal_init_revoke_caches(void)
J_ASSERT(!jbd2_revoke_record_cache);
J_ASSERT(!jbd2_revoke_table_cache);
- jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
- sizeof(struct jbd2_revoke_record_s),
- 0,
- SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
- NULL);
+ jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s,
+ SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY);
if (!jbd2_revoke_record_cache)
goto record_cache_failure;
- jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
- sizeof(struct jbd2_revoke_table_s),
- 0, SLAB_TEMPORARY, NULL);
+ jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s,
+ SLAB_TEMPORARY);
if (!jbd2_revoke_table_cache)
goto table_cache_failure;
return 0;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e5aba56e1fd5..ddcd3549c6c2 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -33,6 +33,35 @@
static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
+static struct kmem_cache *transaction_cache;
+int __init jbd2_journal_init_transaction_cache(void)
+{
+ J_ASSERT(!transaction_cache);
+ transaction_cache = kmem_cache_create("jbd2_transaction_s",
+ sizeof(transaction_t),
+ 0,
+ SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
+ NULL);
+ if (transaction_cache)
+ return 0;
+ return -ENOMEM;
+}
+
+void jbd2_journal_destroy_transaction_cache(void)
+{
+ if (transaction_cache) {
+ kmem_cache_destroy(transaction_cache);
+ transaction_cache = NULL;
+ }
+}
+
+void jbd2_journal_free_transaction(transaction_t *transaction)
+{
+ if (unlikely(ZERO_OR_NULL_PTR(transaction)))
+ return;
+ kmem_cache_free(transaction_cache, transaction);
+}
+
/*
* jbd2_get_transaction: obtain a new transaction_t object.
*
@@ -133,7 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
alloc_transaction:
if (!journal->j_running_transaction) {
- new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
+ new_transaction = kmem_cache_alloc(transaction_cache,
+ gfp_mask | __GFP_ZERO);
if (!new_transaction) {
/*
* If __GFP_FS is not present, then we may be
@@ -162,7 +192,7 @@ repeat:
if (is_journal_aborted(journal) ||
(journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
read_unlock(&journal->j_state_lock);
- kfree(new_transaction);
+ jbd2_journal_free_transaction(new_transaction);
return -EROFS;
}
@@ -284,7 +314,7 @@ repeat:
read_unlock(&journal->j_state_lock);
lock_map_acquire(&handle->h_lockdep_map);
- kfree(new_transaction);
+ jbd2_journal_free_transaction(new_transaction);
return 0;
}
@@ -1549,9 +1579,9 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
* of these pointers, it could go bad. Generally the caller needs to re-read
* the pointer from the transaction_t.
*
- * Called under j_list_lock. The journal may not be locked.
+ * Called under j_list_lock.
*/
-void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
+static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
{
struct journal_head **list = NULL;
transaction_t *transaction;
@@ -1646,10 +1676,8 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
spin_lock(&journal->j_list_lock);
if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
/* written-back checkpointed metadata buffer */
- if (jh->b_jlist == BJ_None) {
- JBUFFER_TRACE(jh, "remove from checkpoint list");
- __jbd2_journal_remove_checkpoint(jh);
- }
+ JBUFFER_TRACE(jh, "remove from checkpoint list");
+ __jbd2_journal_remove_checkpoint(jh);
}
spin_unlock(&journal->j_list_lock);
out:
@@ -1949,6 +1977,8 @@ zap_buffer_unlocked:
clear_buffer_mapped(bh);
clear_buffer_req(bh);
clear_buffer_new(bh);
+ clear_buffer_delay(bh);
+ clear_buffer_unwritten(bh);
bh->b_bdev = NULL;
return may_free;
}
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 2774e1013b34..f49b9afc4436 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -496,7 +496,7 @@ static int param_set_##name(const char *val, struct kernel_param *kp) \
__typeof__(type) num = which_strtol(val, &endp, 0); \
if (endp == val || *endp || num < (min) || num > (max)) \
return -EINVAL; \
- *((int *) kp->arg) = num; \
+ *((type *) kp->arg) = num; \
return 0; \
}
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 64a326418aa2..3ff5fcc1528f 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -7,7 +7,6 @@
*/
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <linux/time.h>
#include <linux/kernel.h>
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 49df0e7f8379..87484fb8d177 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -11,7 +11,6 @@
#include <linux/module.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index e5d71b27a5b0..be20a7e171a0 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -19,7 +19,6 @@
#include <linux/memcontrol.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include "ncp_fs.h"
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 4a108a0a2a60..da7b5e4ff9ec 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -43,7 +43,6 @@
#include <linux/nsproxy.h>
#include <linux/pid_namespace.h>
-#include <asm/system.h>
#include "nfs4_fs.h"
#include "callback.h"
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9c7f66ac6cc2..481be7f7bdd3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -51,7 +51,6 @@
#include <linux/nfs_page.h>
#include <linux/sunrpc/clnt.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/atomic.h>
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4fdaaa63cf1c..aa9b709fd328 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -30,7 +30,6 @@
#include <linux/swap.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include "delegation.h"
#include "internal.h"
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 801d6d830787..4ca6f5c8038e 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -32,7 +32,6 @@
#include <linux/namei.h>
#include <linux/security.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include "nfs4_fs.h"
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7bb4d13c1cd5..e8bbfa5b3500 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -41,7 +41,6 @@
#include <linux/freezer.h>
#include <linux/crc32.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include "nfs4_fs.h"
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 634c0bcb4fd6..5acfd9ea8a31 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -793,7 +793,6 @@ filelayout_clear_request_commit(struct nfs_page *req)
if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
goto out;
if (list_is_singular(&req->wb_list)) {
- struct inode *inode = req->wb_context->dentry->d_inode;
struct pnfs_layout_segment *lseg;
/* From here we can find the bucket, but for the moment,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e809d2305ebf..f82bde005a82 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -270,7 +270,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
case 0:
return 0;
case -NFS4ERR_OPENMODE:
- if (nfs_have_delegation(inode, FMODE_READ)) {
+ if (inode && nfs_have_delegation(inode, FMODE_READ)) {
nfs_inode_return_delegation(inode);
exception->retry = 1;
return 0;
@@ -282,10 +282,9 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- if (state != NULL)
- nfs_remove_bad_delegation(state->inode);
if (state == NULL)
break;
+ nfs_remove_bad_delegation(state->inode);
nfs4_schedule_stateid_recovery(server, state);
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
@@ -2290,11 +2289,12 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
- break;
+ goto out;
default:
err = nfs4_handle_exception(server, err, &exception);
}
} while (exception.retry);
+out:
return err;
}
@@ -3712,7 +3712,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
if (acl_len > buflen)
goto out_free;
_copy_from_pages(buf, pages, res.acl_data_offset,
- res.acl_len);
+ acl_len);
}
ret = acl_len;
out_free:
@@ -3824,8 +3824,9 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- if (state != NULL)
- nfs_remove_bad_delegation(state->inode);
+ if (state == NULL)
+ break;
+ nfs_remove_bad_delegation(state->inode);
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
@@ -6111,21 +6112,22 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
return;
switch (task->tk_status) { /* Just ignore these failures */
- case NFS4ERR_DELEG_REVOKED: /* layout was recalled */
- case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */
- case NFS4ERR_BADLAYOUT: /* no layout */
- case NFS4ERR_GRACE: /* loca_recalim always false */
+ case -NFS4ERR_DELEG_REVOKED: /* layout was recalled */
+ case -NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */
+ case -NFS4ERR_BADLAYOUT: /* no layout */
+ case -NFS4ERR_GRACE: /* loca_recalim always false */
task->tk_status = 0;
- }
-
- if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
- rpc_restart_call_prepare(task);
- return;
- }
-
- if (task->tk_status == 0)
+ break;
+ case 0:
nfs_post_op_update_inode_force_wcc(data->args.inode,
data->res.fattr);
+ break;
+ default:
+ if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+ rpc_restart_call_prepare(task);
+ return;
+ }
+ }
}
static void nfs4_layoutcommit_release(void *calldata)
@@ -6229,11 +6231,12 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
case 0:
case -NFS4ERR_WRONGSEC:
case -NFS4ERR_NOTSUPP:
- break;
+ goto out;
default:
err = nfs4_handle_exception(server, err, &exception);
}
} while (exception.retry);
+out:
return err;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index cc1f758a7ee1..9a0e8ef4a409 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -20,7 +20,6 @@
#include <linux/nfs_page.h>
#include <linux/module.h>
-#include <asm/system.h>
#include "pnfs.h"
#include "nfs4_fs.h"
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ccc4cdb1efe9..37412f706b32 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -55,7 +55,6 @@
#include <linux/nsproxy.h>
#include <linux/rcupdate.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include "nfs4_fs.h"
diff --git a/fs/nfsd/current_stateid.h b/fs/nfsd/current_stateid.h
new file mode 100644
index 000000000000..4123551208d8
--- /dev/null
+++ b/fs/nfsd/current_stateid.h
@@ -0,0 +1,28 @@
+#ifndef _NFSD4_CURRENT_STATE_H
+#define _NFSD4_CURRENT_STATE_H
+
+#include "state.h"
+#include "xdr4.h"
+
+extern void clear_current_stateid(struct nfsd4_compound_state *cstate);
+/*
+ * functions to set current state id
+ */
+extern void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *);
+extern void nfsd4_set_openstateid(struct nfsd4_compound_state *, struct nfsd4_open *);
+extern void nfsd4_set_lockstateid(struct nfsd4_compound_state *, struct nfsd4_lock *);
+extern void nfsd4_set_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *);
+
+/*
+ * functions to consume current state id
+ */
+extern void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *);
+extern void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *, struct nfsd4_delegreturn *);
+extern void nfsd4_get_freestateid(struct nfsd4_compound_state *, struct nfsd4_free_stateid *);
+extern void nfsd4_get_setattrstateid(struct nfsd4_compound_state *, struct nfsd4_setattr *);
+extern void nfsd4_get_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *);
+extern void nfsd4_get_lockustateid(struct nfsd4_compound_state *, struct nfsd4_locku *);
+extern void nfsd4_get_readstateid(struct nfsd4_compound_state *, struct nfsd4_read *);
+extern void nfsd4_get_writestateid(struct nfsd4_compound_state *, struct nfsd4_write *);
+
+#endif /* _NFSD4_CURRENT_STATE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cf8a6bd062fa..8e9689abbc0c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
struct svc_expkey key;
struct svc_expkey *ek = NULL;
- if (mlen < 1 || mesg[mlen-1] != '\n')
+ if (mesg[mlen - 1] != '\n')
return -EINVAL;
mesg[mlen-1] = 0;
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
new file mode 100644
index 000000000000..12e0cff435b4
--- /dev/null
+++ b/fs/nfsd/netns.h
@@ -0,0 +1,34 @@
+/*
+ * per net namespace data structures for nfsd
+ *
+ * Copyright (C) 2012, Jeff Layton <jlayton@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __NFSD_NETNS_H__
+#define __NFSD_NETNS_H__
+
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+struct cld_net;
+
+struct nfsd_net {
+ struct cld_net *cld_net;
+};
+
+extern int nfsd_net_id;
+#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 0e262f32ac41..c8e9f637153a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -645,7 +645,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
.timeout = &timeparms,
.program = &cb_program,
.version = 0,
- .authflavor = clp->cl_flavor,
.flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
};
struct rpc_clnt *client;
@@ -656,6 +655,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
args.client_name = clp->cl_principal;
args.prognumber = conn->cb_prog,
args.protocol = XPRT_TRANSPORT_TCP;
+ args.authflavor = clp->cl_flavor;
clp->cl_cb_ident = conn->cb_ident;
} else {
if (!conn->cb_xprt)
@@ -665,6 +665,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
args.bc_xprt = conn->cb_xprt;
args.prognumber = clp->cl_cb_session->se_cb_prog;
args.protocol = XPRT_TRANSPORT_BC_TCP;
+ args.authflavor = RPC_AUTH_UNIX;
}
/* Create RPC client */
client = rpc_create(&args);
@@ -754,9 +755,9 @@ static void do_probe_callback(struct nfs4_client *clp)
*/
void nfsd4_probe_callback(struct nfs4_client *clp)
{
- /* XXX: atomicity? Also, should we be using cl_cb_flags? */
+ /* XXX: atomicity? Also, should we be using cl_flags? */
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
- set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
+ set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
do_probe_callback(clp);
}
@@ -915,7 +916,7 @@ void nfsd4_destroy_callback_queue(void)
/* must be called under the state lock */
void nfsd4_shutdown_callback(struct nfs4_client *clp)
{
- set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags);
+ set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags);
/*
* Note this won't actually result in a null callback;
* instead, nfsd4_do_callback_rpc() will detect the killed
@@ -966,15 +967,15 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
clp->cl_cb_conn.cb_xprt = NULL;
}
- if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags))
+ if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
return;
spin_lock(&clp->cl_lock);
/*
* Only serialized callback code is allowed to clear these
* flags; main nfsd code can only set them:
*/
- BUG_ON(!clp->cl_cb_flags);
- clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
+ BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
+ clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
c = __nfsd4_find_backchannel(clp);
if (c) {
@@ -986,7 +987,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
err = setup_callback_client(clp, &conn, ses);
if (err) {
- warn_no_callback_path(clp, err);
+ nfsd4_mark_cb_down(clp, err);
return;
}
/* Yay, the callback channel's back! Restart any callbacks: */
@@ -1000,7 +1001,7 @@ void nfsd4_do_callback_rpc(struct work_struct *w)
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
- if (clp->cl_cb_flags)
+ if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 94096273cd6c..322d11ce06a4 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -41,6 +41,14 @@
#include "nfsd.h"
/*
+ * Turn off idmapping when using AUTH_SYS.
+ */
+static bool nfs4_disable_idmapping = true;
+module_param(nfs4_disable_idmapping, bool, 0644);
+MODULE_PARM_DESC(nfs4_disable_idmapping,
+ "Turn off server's NFSv4 idmapping when using 'sec=sys'");
+
+/*
* Cache entry
*/
@@ -561,28 +569,65 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
return ret;
}
+static bool
+numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id)
+{
+ int ret;
+ char buf[11];
+
+ if (namelen + 1 > sizeof(buf))
+ /* too long to represent a 32-bit id: */
+ return false;
+ /* Just to make sure it's null-terminated: */
+ memcpy(buf, name, namelen);
+ buf[namelen] = '\0';
+ ret = kstrtouint(name, 10, id);
+ return ret == 0;
+}
+
+static __be32
+do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id)
+{
+ if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS)
+ if (numeric_name_to_id(rqstp, type, name, namelen, id))
+ return 0;
+ /*
+ * otherwise, fall through and try idmapping, for
+ * backwards compatibility with clients sending names:
+ */
+ return idmap_name_to_id(rqstp, type, name, namelen, id);
+}
+
+static int
+do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
+{
+ if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS)
+ return sprintf(name, "%u", id);
+ return idmap_id_to_name(rqstp, type, id, name);
+}
+
__be32
nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
__u32 *id)
{
- return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id);
+ return do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id);
}
__be32
nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
__u32 *id)
{
- return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id);
+ return do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id);
}
int
nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
{
- return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name);
+ return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name);
}
int
nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
{
- return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name);
+ return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name);
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 896da74ec563..2ed14dfd00a2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -39,6 +39,7 @@
#include "cache.h"
#include "xdr4.h"
#include "vfs.h"
+#include "current_stateid.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
@@ -192,10 +193,13 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
static __be32
do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
{
- struct svc_fh resfh;
+ struct svc_fh *resfh;
__be32 status;
- fh_init(&resfh, NFS4_FHSIZE);
+ resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
+ if (!resfh)
+ return nfserr_jukebox;
+ fh_init(resfh, NFS4_FHSIZE);
open->op_truncate = 0;
if (open->op_create) {
@@ -220,7 +224,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
*/
status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
open->op_fname.len, &open->op_iattr,
- &resfh, open->op_createmode,
+ resfh, open->op_createmode,
(u32 *)open->op_verf.data,
&open->op_truncate, &open->op_created);
@@ -234,30 +238,29 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
FATTR4_WORD1_TIME_MODIFY);
} else {
status = nfsd_lookup(rqstp, current_fh,
- open->op_fname.data, open->op_fname.len, &resfh);
+ open->op_fname.data, open->op_fname.len, resfh);
fh_unlock(current_fh);
if (status)
goto out;
- status = nfsd_check_obj_isreg(&resfh);
+ status = nfsd_check_obj_isreg(resfh);
}
if (status)
goto out;
if (is_create_with_attrs(open) && open->op_acl != NULL)
- do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval);
-
- set_change_info(&open->op_cinfo, current_fh);
- fh_dup2(current_fh, &resfh);
+ do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval);
/* set reply cache */
fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
- &resfh.fh_handle);
+ &resfh->fh_handle);
if (!open->op_created)
- status = do_open_permission(rqstp, current_fh, open,
+ status = do_open_permission(rqstp, resfh, open,
NFSD_MAY_NOP);
-
+ set_change_info(&open->op_cinfo, current_fh);
+ fh_dup2(current_fh, resfh);
out:
- fh_put(&resfh);
+ fh_put(resfh);
+ kfree(resfh);
return status;
}
@@ -310,16 +313,14 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
return nfserr_inval;
- /* We don't yet support WANT bits: */
- open->op_share_access &= NFS4_SHARE_ACCESS_MASK;
-
open->op_created = 0;
/*
* RFC5661 18.51.3
* Before RECLAIM_COMPLETE done, server should deny new lock
*/
if (nfsd4_has_session(cstate) &&
- !cstate->session->se_client->cl_firststate &&
+ !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
+ &cstate->session->se_client->cl_flags) &&
open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
return nfserr_grace;
@@ -452,6 +453,10 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_restorefh;
fh_dup2(&cstate->current_fh, &cstate->save_fh);
+ if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) {
+ memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t));
+ SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+ }
return nfs_ok;
}
@@ -463,6 +468,10 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_nofilehandle;
fh_dup2(&cstate->save_fh, &cstate->current_fh);
+ if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) {
+ memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t));
+ SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG);
+ }
return nfs_ok;
}
@@ -481,14 +490,20 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&access->ac_supported);
}
+static void gen_boot_verifier(nfs4_verifier *verifier)
+{
+ __be32 verf[2];
+
+ verf[0] = (__be32)nfssvc_boot.tv_sec;
+ verf[1] = (__be32)nfssvc_boot.tv_usec;
+ memcpy(verifier->data, verf, sizeof(verifier->data));
+}
+
static __be32
nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_commit *commit)
{
- u32 *p = (u32 *)commit->co_verf.data;
- *p++ = nfssvc_boot.tv_sec;
- *p++ = nfssvc_boot.tv_usec;
-
+ gen_boot_verifier(&commit->co_verf);
return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
commit->co_count);
}
@@ -865,7 +880,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
stateid_t *stateid = &write->wr_stateid;
struct file *filp = NULL;
- u32 *p;
__be32 status = nfs_ok;
unsigned long cnt;
@@ -887,9 +901,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cnt = write->wr_buflen;
write->wr_how_written = write->wr_stable_how;
- p = (u32 *)write->wr_verifier.data;
- *p++ = nfssvc_boot.tv_sec;
- *p++ = nfssvc_boot.tv_usec;
+ gen_boot_verifier(&write->wr_verifier);
status = nfsd_write(rqstp, &cstate->current_fh, filp,
write->wr_offset, rqstp->rq_vec, write->wr_vlen,
@@ -1000,6 +1012,8 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
void *);
typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
+typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *);
+typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *);
enum nfsd4_op_flags {
ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
@@ -1025,6 +1039,10 @@ enum nfsd4_op_flags {
* the v4.0 case).
*/
OP_CACHEME = 1 << 6,
+ /*
+ * These are ops which clear current state id.
+ */
+ OP_CLEAR_STATEID = 1 << 7,
};
struct nfsd4_operation {
@@ -1033,11 +1051,15 @@ struct nfsd4_operation {
char *op_name;
/* Try to get response size before operation */
nfsd4op_rsize op_rsize_bop;
+ stateid_setter op_get_currentstateid;
+ stateid_getter op_set_currentstateid;
};
static struct nfsd4_operation nfsd4_ops[];
+#ifdef NFSD_DEBUG
static const char *nfsd4_op_name(unsigned opnum);
+#endif
/*
* Enforce NFSv4.1 COMPOUND ordering rules:
@@ -1215,13 +1237,23 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
if (op->status)
goto encode_op;
- if (opdesc->op_func)
+ if (opdesc->op_func) {
+ if (opdesc->op_get_currentstateid)
+ opdesc->op_get_currentstateid(cstate, &op->u);
op->status = opdesc->op_func(rqstp, cstate, &op->u);
- else
+ } else
BUG_ON(op->status == nfs_ok);
- if (!op->status && need_wrongsec_check(rqstp))
- op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp);
+ if (!op->status) {
+ if (opdesc->op_set_currentstateid)
+ opdesc->op_set_currentstateid(cstate, &op->u);
+
+ if (opdesc->op_flags & OP_CLEAR_STATEID)
+ clear_current_stateid(cstate);
+
+ if (need_wrongsec_check(rqstp))
+ op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp);
+ }
encode_op:
/* Only from SEQUENCE */
@@ -1413,6 +1445,8 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_CLOSE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+ .op_get_currentstateid = (stateid_getter)nfsd4_get_closestateid,
+ .op_set_currentstateid = (stateid_setter)nfsd4_set_closestateid,
},
[OP_COMMIT] = {
.op_func = (nfsd4op_func)nfsd4_commit,
@@ -1422,7 +1456,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_CREATE] = {
.op_func = (nfsd4op_func)nfsd4_create,
- .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME | OP_CLEAR_STATEID,
.op_name = "OP_CREATE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize,
},
@@ -1431,6 +1465,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_DELEGRETURN",
.op_rsize_bop = nfsd4_only_status_rsize,
+ .op_get_currentstateid = (stateid_getter)nfsd4_get_delegreturnstateid,
},
[OP_GETATTR] = {
.op_func = (nfsd4op_func)nfsd4_getattr,
@@ -1453,6 +1488,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_LOCK",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
+ .op_set_currentstateid = (stateid_setter)nfsd4_set_lockstateid,
},
[OP_LOCKT] = {
.op_func = (nfsd4op_func)nfsd4_lockt,
@@ -1463,15 +1499,16 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_LOCKU",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+ .op_get_currentstateid = (stateid_getter)nfsd4_get_lockustateid,
},
[OP_LOOKUP] = {
.op_func = (nfsd4op_func)nfsd4_lookup,
- .op_flags = OP_HANDLES_WRONGSEC,
+ .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUP",
},
[OP_LOOKUPP] = {
.op_func = (nfsd4op_func)nfsd4_lookupp,
- .op_flags = OP_HANDLES_WRONGSEC,
+ .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUPP",
},
[OP_NVERIFY] = {
@@ -1483,6 +1520,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
.op_name = "OP_OPEN",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize,
+ .op_set_currentstateid = (stateid_setter)nfsd4_set_openstateid,
},
[OP_OPEN_CONFIRM] = {
.op_func = (nfsd4op_func)nfsd4_open_confirm,
@@ -1495,25 +1533,30 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_OPEN_DOWNGRADE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+ .op_get_currentstateid = (stateid_getter)nfsd4_get_opendowngradestateid,
+ .op_set_currentstateid = (stateid_setter)nfsd4_set_opendowngradestateid,
},
[OP_PUTFH] = {
.op_func = (nfsd4op_func)nfsd4_putfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
- | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
+ | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
+ | OP_CLEAR_STATEID,
.op_name = "OP_PUTFH",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_PUTPUBFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
- | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
+ | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
+ | OP_CLEAR_STATEID,
.op_name = "OP_PUTPUBFH",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_PUTROOTFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
- | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
+ | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
+ | OP_CLEAR_STATEID,
.op_name = "OP_PUTROOTFH",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
@@ -1522,6 +1565,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_READ",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
+ .op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid,
},
[OP_READDIR] = {
.op_func = (nfsd4op_func)nfsd4_readdir,
@@ -1576,6 +1620,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_SETATTR",
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
.op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize,
+ .op_get_currentstateid = (stateid_getter)nfsd4_get_setattrstateid,
},
[OP_SETCLIENTID] = {
.op_func = (nfsd4op_func)nfsd4_setclientid,
@@ -1600,6 +1645,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
.op_name = "OP_WRITE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
+ .op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid,
},
[OP_RELEASE_LOCKOWNER] = {
.op_func = (nfsd4op_func)nfsd4_release_lockowner,
@@ -1674,12 +1720,14 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
};
+#ifdef NFSD_DEBUG
static const char *nfsd4_op_name(unsigned opnum)
{
if (opnum < ARRAY_SIZE(nfsd4_ops))
return nfsd4_ops[opnum].op_name;
return "unknown_operation";
}
+#endif
#define nfsd4_voidres nfsd4_voidargs
struct nfsd4_voidargs { int dummy; };
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 0b3e875d1abd..4767429264a2 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 The Regents of the University of Michigan.
+* Copyright (c) 2012 Jeff Layton <jlayton@redhat.com>
* All rights reserved.
*
* Andy Adamson <andros@citi.umich.edu>
@@ -36,16 +37,34 @@
#include <linux/namei.h>
#include <linux/crypto.h>
#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <net/net_namespace.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/cld.h>
#include "nfsd.h"
#include "state.h"
#include "vfs.h"
+#include "netns.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
+/* Declarations */
+struct nfsd4_client_tracking_ops {
+ int (*init)(struct net *);
+ void (*exit)(struct net *);
+ void (*create)(struct nfs4_client *);
+ void (*remove)(struct nfs4_client *);
+ int (*check)(struct nfs4_client *);
+ void (*grace_done)(struct net *, time_t);
+};
+
/* Globals */
static struct file *rec_file;
static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+static struct nfsd4_client_tracking_ops *client_tracking_ops;
static int
nfs4_save_creds(const struct cred **original_creds)
@@ -117,7 +136,8 @@ out_no_tfm:
return status;
}
-void nfsd4_create_clid_dir(struct nfs4_client *clp)
+static void
+nfsd4_create_clid_dir(struct nfs4_client *clp)
{
const struct cred *original_cred;
char *dname = clp->cl_recdir;
@@ -126,9 +146,8 @@ void nfsd4_create_clid_dir(struct nfs4_client *clp)
dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
- if (clp->cl_firststate)
+ if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
- clp->cl_firststate = 1;
if (!rec_file)
return;
status = nfs4_save_creds(&original_cred);
@@ -265,19 +284,19 @@ out_unlock:
return status;
}
-void
+static void
nfsd4_remove_clid_dir(struct nfs4_client *clp)
{
const struct cred *original_cred;
int status;
- if (!rec_file || !clp->cl_firststate)
+ if (!rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
status = mnt_want_write_file(rec_file);
if (status)
goto out;
- clp->cl_firststate = 0;
+ clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
status = nfs4_save_creds(&original_cred);
if (status < 0)
@@ -292,7 +311,6 @@ out:
if (status)
printk("NFSD: Failed to remove expired client state directory"
" %.*s\n", HEXDIR_LEN, clp->cl_recdir);
- return;
}
static int
@@ -311,8 +329,9 @@ purge_old(struct dentry *parent, struct dentry *child)
return 0;
}
-void
-nfsd4_recdir_purge_old(void) {
+static void
+nfsd4_recdir_purge_old(struct net *net, time_t boot_time)
+{
int status;
if (!rec_file)
@@ -343,7 +362,7 @@ load_recdir(struct dentry *parent, struct dentry *child)
return 0;
}
-int
+static int
nfsd4_recdir_load(void) {
int status;
@@ -361,8 +380,8 @@ nfsd4_recdir_load(void) {
* Hold reference to the recovery directory.
*/
-void
-nfsd4_init_recdir()
+static int
+nfsd4_init_recdir(void)
{
const struct cred *original_cred;
int status;
@@ -377,20 +396,44 @@ nfsd4_init_recdir()
printk("NFSD: Unable to change credentials to find recovery"
" directory: error %d\n",
status);
- return;
+ return status;
}
rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
if (IS_ERR(rec_file)) {
printk("NFSD: unable to find recovery directory %s\n",
user_recovery_dirname);
+ status = PTR_ERR(rec_file);
rec_file = NULL;
}
nfs4_reset_creds(original_cred);
+ return status;
}
-void
+static int
+nfsd4_load_reboot_recovery_data(struct net *net)
+{
+ int status;
+
+ /* XXX: The legacy code won't work in a container */
+ if (net != &init_net) {
+ WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client "
+ "tracking in a container!\n");
+ return -EINVAL;
+ }
+
+ nfs4_lock_state();
+ status = nfsd4_init_recdir();
+ if (!status)
+ status = nfsd4_recdir_load();
+ nfs4_unlock_state();
+ if (status)
+ printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n");
+ return status;
+}
+
+static void
nfsd4_shutdown_recdir(void)
{
if (!rec_file)
@@ -399,6 +442,13 @@ nfsd4_shutdown_recdir(void)
rec_file = NULL;
}
+static void
+nfsd4_legacy_tracking_exit(struct net *net)
+{
+ nfs4_release_reclaim();
+ nfsd4_shutdown_recdir();
+}
+
/*
* Change the NFSv4 recovery directory to recdir.
*/
@@ -425,3 +475,572 @@ nfs4_recoverydir(void)
{
return user_recovery_dirname;
}
+
+static int
+nfsd4_check_legacy_client(struct nfs4_client *clp)
+{
+ /* did we already find that this client is stable? */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ /* look for it in the reclaim hashtable otherwise */
+ if (nfsd4_find_reclaim_client(clp)) {
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
+ .init = nfsd4_load_reboot_recovery_data,
+ .exit = nfsd4_legacy_tracking_exit,
+ .create = nfsd4_create_clid_dir,
+ .remove = nfsd4_remove_clid_dir,
+ .check = nfsd4_check_legacy_client,
+ .grace_done = nfsd4_recdir_purge_old,
+};
+
+/* Globals */
+#define NFSD_PIPE_DIR "nfsd"
+#define NFSD_CLD_PIPE "cld"
+
+/* per-net-ns structure for holding cld upcall info */
+struct cld_net {
+ struct rpc_pipe *cn_pipe;
+ spinlock_t cn_lock;
+ struct list_head cn_list;
+ unsigned int cn_xid;
+};
+
+struct cld_upcall {
+ struct list_head cu_list;
+ struct cld_net *cu_net;
+ struct task_struct *cu_task;
+ struct cld_msg cu_msg;
+};
+
+static int
+__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+{
+ int ret;
+ struct rpc_pipe_msg msg;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.data = cmsg;
+ msg.len = sizeof(*cmsg);
+
+ /*
+ * Set task state before we queue the upcall. That prevents
+ * wake_up_process in the downcall from racing with schedule.
+ */
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ ret = rpc_queue_upcall(pipe, &msg);
+ if (ret < 0) {
+ set_current_state(TASK_RUNNING);
+ goto out;
+ }
+
+ schedule();
+ set_current_state(TASK_RUNNING);
+
+ if (msg.errno < 0)
+ ret = msg.errno;
+out:
+ return ret;
+}
+
+static int
+cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+{
+ int ret;
+
+ /*
+ * -EAGAIN occurs when pipe is closed and reopened while there are
+ * upcalls queued.
+ */
+ do {
+ ret = __cld_pipe_upcall(pipe, cmsg);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static ssize_t
+cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+{
+ struct cld_upcall *tmp, *cup;
+ struct cld_msg *cmsg = (struct cld_msg *)src;
+ uint32_t xid;
+ struct nfsd_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
+ nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+
+ if (mlen != sizeof(*cmsg)) {
+ dprintk("%s: got %lu bytes, expected %lu\n", __func__, mlen,
+ sizeof(*cmsg));
+ return -EINVAL;
+ }
+
+ /* copy just the xid so we can try to find that */
+ if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
+ dprintk("%s: error when copying xid from userspace", __func__);
+ return -EFAULT;
+ }
+
+ /* walk the list and find corresponding xid */
+ cup = NULL;
+ spin_lock(&cn->cn_lock);
+ list_for_each_entry(tmp, &cn->cn_list, cu_list) {
+ if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
+ cup = tmp;
+ list_del_init(&cup->cu_list);
+ break;
+ }
+ }
+ spin_unlock(&cn->cn_lock);
+
+ /* couldn't find upcall? */
+ if (!cup) {
+ dprintk("%s: couldn't find upcall -- xid=%u\n", __func__, xid);
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
+ return -EFAULT;
+
+ wake_up_process(cup->cu_task);
+ return mlen;
+}
+
+static void
+cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+ struct cld_msg *cmsg = msg->data;
+ struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
+ cu_msg);
+
+ /* errno >= 0 means we got a downcall */
+ if (msg->errno >= 0)
+ return;
+
+ wake_up_process(cup->cu_task);
+}
+
+static const struct rpc_pipe_ops cld_upcall_ops = {
+ .upcall = rpc_pipe_generic_upcall,
+ .downcall = cld_pipe_downcall,
+ .destroy_msg = cld_pipe_destroy_msg,
+};
+
+static struct dentry *
+nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe)
+{
+ struct dentry *dir, *dentry;
+
+ dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR);
+ if (dir == NULL)
+ return ERR_PTR(-ENOENT);
+ dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe);
+ dput(dir);
+ return dentry;
+}
+
+static void
+nfsd4_cld_unregister_sb(struct rpc_pipe *pipe)
+{
+ if (pipe->dentry)
+ rpc_unlink(pipe->dentry);
+}
+
+static struct dentry *
+nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe)
+{
+ struct super_block *sb;
+ struct dentry *dentry;
+
+ sb = rpc_get_sb_net(net);
+ if (!sb)
+ return NULL;
+ dentry = nfsd4_cld_register_sb(sb, pipe);
+ rpc_put_sb_net(net);
+ return dentry;
+}
+
+static void
+nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe)
+{
+ struct super_block *sb;
+
+ sb = rpc_get_sb_net(net);
+ if (sb) {
+ nfsd4_cld_unregister_sb(pipe);
+ rpc_put_sb_net(net);
+ }
+}
+
+/* Initialize rpc_pipefs pipe for communication with client tracking daemon */
+static int
+nfsd4_init_cld_pipe(struct net *net)
+{
+ int ret;
+ struct dentry *dentry;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct cld_net *cn;
+
+ if (nn->cld_net)
+ return 0;
+
+ cn = kzalloc(sizeof(*cn), GFP_KERNEL);
+ if (!cn) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ cn->cn_pipe = rpc_mkpipe_data(&cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+ if (IS_ERR(cn->cn_pipe)) {
+ ret = PTR_ERR(cn->cn_pipe);
+ goto err;
+ }
+ spin_lock_init(&cn->cn_lock);
+ INIT_LIST_HEAD(&cn->cn_list);
+
+ dentry = nfsd4_cld_register_net(net, cn->cn_pipe);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto err_destroy_data;
+ }
+
+ cn->cn_pipe->dentry = dentry;
+ nn->cld_net = cn;
+ return 0;
+
+err_destroy_data:
+ rpc_destroy_pipe_data(cn->cn_pipe);
+err:
+ kfree(cn);
+ printk(KERN_ERR "NFSD: unable to create nfsdcld upcall pipe (%d)\n",
+ ret);
+ return ret;
+}
+
+static void
+nfsd4_remove_cld_pipe(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+
+ nfsd4_cld_unregister_net(net, cn->cn_pipe);
+ rpc_destroy_pipe_data(cn->cn_pipe);
+ kfree(nn->cld_net);
+ nn->cld_net = NULL;
+}
+
+static struct cld_upcall *
+alloc_cld_upcall(struct cld_net *cn)
+{
+ struct cld_upcall *new, *tmp;
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return new;
+
+ /* FIXME: hard cap on number in flight? */
+restart_search:
+ spin_lock(&cn->cn_lock);
+ list_for_each_entry(tmp, &cn->cn_list, cu_list) {
+ if (tmp->cu_msg.cm_xid == cn->cn_xid) {
+ cn->cn_xid++;
+ spin_unlock(&cn->cn_lock);
+ goto restart_search;
+ }
+ }
+ new->cu_task = current;
+ new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
+ put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
+ new->cu_net = cn;
+ list_add(&new->cu_list, &cn->cn_list);
+ spin_unlock(&cn->cn_lock);
+
+ dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
+
+ return new;
+}
+
+static void
+free_cld_upcall(struct cld_upcall *victim)
+{
+ struct cld_net *cn = victim->cu_net;
+
+ spin_lock(&cn->cn_lock);
+ list_del(&victim->cu_list);
+ spin_unlock(&cn->cn_lock);
+ kfree(victim);
+}
+
+/* Ask daemon to create a new record */
+static void
+nfsd4_cld_create(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+ /* FIXME: determine net from clp */
+ struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+
+ /* Don't upcall if it's already stored */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+
+ cup = alloc_cld_upcall(cn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_msg.cm_cmd = Cld_Create;
+ cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ if (!ret) {
+ ret = cup->cu_msg.cm_status;
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ printk(KERN_ERR "NFSD: Unable to create client "
+ "record on stable storage: %d\n", ret);
+}
+
+/* Ask daemon to create a new record */
+static void
+nfsd4_cld_remove(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+ /* FIXME: determine net from clp */
+ struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+
+ /* Don't upcall if it's already removed */
+ if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+
+ cup = alloc_cld_upcall(cn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_msg.cm_cmd = Cld_Remove;
+ cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ if (!ret) {
+ ret = cup->cu_msg.cm_status;
+ clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ printk(KERN_ERR "NFSD: Unable to remove client "
+ "record from stable storage: %d\n", ret);
+}
+
+/* Check for presence of a record, and update its timestamp */
+static int
+nfsd4_cld_check(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+ /* FIXME: determine net from clp */
+ struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+
+ /* Don't upcall if one was already stored during this grace pd */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ cup = alloc_cld_upcall(cn);
+ if (!cup) {
+ printk(KERN_ERR "NFSD: Unable to check client record on "
+ "stable storage: %d\n", -ENOMEM);
+ return -ENOMEM;
+ }
+
+ cup->cu_msg.cm_cmd = Cld_Check;
+ cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ if (!ret) {
+ ret = cup->cu_msg.cm_status;
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+ free_cld_upcall(cup);
+ return ret;
+}
+
+static void
+nfsd4_cld_grace_done(struct net *net, time_t boot_time)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+
+ cup = alloc_cld_upcall(cn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_msg.cm_cmd = Cld_GraceDone;
+ cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ if (!ret)
+ ret = cup->cu_msg.cm_status;
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
+}
+
+static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
+ .init = nfsd4_init_cld_pipe,
+ .exit = nfsd4_remove_cld_pipe,
+ .create = nfsd4_cld_create,
+ .remove = nfsd4_cld_remove,
+ .check = nfsd4_cld_check,
+ .grace_done = nfsd4_cld_grace_done,
+};
+
+int
+nfsd4_client_tracking_init(struct net *net)
+{
+ int status;
+ struct path path;
+
+ if (!client_tracking_ops) {
+ client_tracking_ops = &nfsd4_cld_tracking_ops;
+ status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
+ if (!status) {
+ if (S_ISDIR(path.dentry->d_inode->i_mode))
+ client_tracking_ops =
+ &nfsd4_legacy_tracking_ops;
+ path_put(&path);
+ }
+ }
+
+ status = client_tracking_ops->init(net);
+ if (status) {
+ printk(KERN_WARNING "NFSD: Unable to initialize client "
+ "recovery tracking! (%d)\n", status);
+ client_tracking_ops = NULL;
+ }
+ return status;
+}
+
+void
+nfsd4_client_tracking_exit(struct net *net)
+{
+ if (client_tracking_ops) {
+ client_tracking_ops->exit(net);
+ client_tracking_ops = NULL;
+ }
+}
+
+void
+nfsd4_client_record_create(struct nfs4_client *clp)
+{
+ if (client_tracking_ops)
+ client_tracking_ops->create(clp);
+}
+
+void
+nfsd4_client_record_remove(struct nfs4_client *clp)
+{
+ if (client_tracking_ops)
+ client_tracking_ops->remove(clp);
+}
+
+int
+nfsd4_client_record_check(struct nfs4_client *clp)
+{
+ if (client_tracking_ops)
+ return client_tracking_ops->check(clp);
+
+ return -EOPNOTSUPP;
+}
+
+void
+nfsd4_record_grace_done(struct net *net, time_t boot_time)
+{
+ if (client_tracking_ops)
+ client_tracking_ops->grace_done(net, boot_time);
+}
+
+static int
+rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+ struct super_block *sb = ptr;
+ struct net *net = sb->s_fs_info;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ struct dentry *dentry;
+ int ret = 0;
+
+ if (!try_module_get(THIS_MODULE))
+ return 0;
+
+ if (!cn) {
+ module_put(THIS_MODULE);
+ return 0;
+ }
+
+ switch (event) {
+ case RPC_PIPEFS_MOUNT:
+ dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ break;
+ }
+ cn->cn_pipe->dentry = dentry;
+ break;
+ case RPC_PIPEFS_UMOUNT:
+ if (cn->cn_pipe->dentry)
+ nfsd4_cld_unregister_sb(cn->cn_pipe);
+ break;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+ module_put(THIS_MODULE);
+ return ret;
+}
+
+struct notifier_block nfsd4_cld_block = {
+ .notifier_call = rpc_pipefs_event,
+};
+
+int
+register_cld_notifier(void)
+{
+ return rpc_pipefs_notifier_register(&nfsd4_cld_block);
+}
+
+void
+unregister_cld_notifier(void)
+{
+ rpc_pipefs_notifier_unregister(&nfsd4_cld_block);
+}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c5cddd659429..1841f8bf845e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -58,11 +58,15 @@ static const stateid_t one_stateid = {
static const stateid_t zero_stateid = {
/* all fields zero */
};
+static const stateid_t currentstateid = {
+ .si_generation = 1,
+};
static u64 current_sessionid = 1;
#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
+#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t)))
/* forward declarations */
static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner);
@@ -91,6 +95,19 @@ nfs4_lock_state(void)
mutex_lock(&client_mutex);
}
+static void free_session(struct kref *);
+
+/* Must be called under the client_lock */
+static void nfsd4_put_session_locked(struct nfsd4_session *ses)
+{
+ kref_put(&ses->se_ref, free_session);
+}
+
+static void nfsd4_get_session(struct nfsd4_session *ses)
+{
+ kref_get(&ses->se_ref);
+}
+
void
nfs4_unlock_state(void)
{
@@ -605,12 +622,20 @@ hash_sessionid(struct nfs4_sessionid *sessionid)
return sid->sequence % SESSION_HASH_SIZE;
}
+#ifdef NFSD_DEBUG
static inline void
dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
{
u32 *ptr = (u32 *)(&sessionid->data[0]);
dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]);
}
+#else
+static inline void
+dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
+{
+}
+#endif
+
static void
gen_sessionid(struct nfsd4_session *ses)
@@ -832,11 +857,12 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
spin_unlock(&clp->cl_lock);
}
-void free_session(struct kref *kref)
+static void free_session(struct kref *kref)
{
struct nfsd4_session *ses;
int mem;
+ BUG_ON(!spin_is_locked(&client_lock));
ses = container_of(kref, struct nfsd4_session, se_ref);
nfsd4_del_conns(ses);
spin_lock(&nfsd_drc_lock);
@@ -847,6 +873,13 @@ void free_session(struct kref *kref)
kfree(ses);
}
+void nfsd4_put_session(struct nfsd4_session *ses)
+{
+ spin_lock(&client_lock);
+ nfsd4_put_session_locked(ses);
+ spin_unlock(&client_lock);
+}
+
static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses)
{
struct nfsd4_session *new;
@@ -894,7 +927,9 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n
status = nfsd4_new_conn_from_crses(rqstp, new);
/* whoops: benny points out, status is ignored! (err, or bogus) */
if (status) {
+ spin_lock(&client_lock);
free_session(&new->se_ref);
+ spin_unlock(&client_lock);
return NULL;
}
if (cses->flags & SESSION4_BACK_CHAN) {
@@ -1006,12 +1041,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
static inline void
free_client(struct nfs4_client *clp)
{
+ BUG_ON(!spin_is_locked(&client_lock));
while (!list_empty(&clp->cl_sessions)) {
struct nfsd4_session *ses;
ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
se_perclnt);
list_del(&ses->se_perclnt);
- nfsd4_put_session(ses);
+ nfsd4_put_session_locked(ses);
}
if (clp->cl_cred.cr_group_info)
put_group_info(clp->cl_cred.cr_group_info);
@@ -1138,12 +1174,12 @@ static void gen_clid(struct nfs4_client *clp)
static void gen_confirm(struct nfs4_client *clp)
{
+ __be32 verf[2];
static u32 i;
- u32 *p;
- p = (u32 *)clp->cl_confirm.data;
- *p++ = get_seconds();
- *p++ = i++;
+ verf[0] = (__be32)get_seconds();
+ verf[1] = (__be32)i++;
+ memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
}
static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t)
@@ -1180,7 +1216,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
if (princ) {
clp->cl_principal = kstrdup(princ, GFP_KERNEL);
if (clp->cl_principal == NULL) {
+ spin_lock(&client_lock);
free_client(clp);
+ spin_unlock(&client_lock);
return NULL;
}
}
@@ -1347,6 +1385,7 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
slot->sl_opcnt = resp->opcnt;
slot->sl_status = resp->cstate.status;
+ slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
if (nfsd4_not_cached(resp)) {
slot->sl_datalen = 0;
return;
@@ -1374,15 +1413,12 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
struct nfsd4_op *op;
struct nfsd4_slot *slot = resp->cstate.slot;
- dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
- resp->opcnt, resp->cstate.slot->sl_cachethis);
-
/* Encode the replayed sequence operation */
op = &args->ops[resp->opcnt - 1];
nfsd4_encode_operation(resp, op);
/* Return nfserr_retry_uncached_rep in next operation. */
- if (args->opcnt > 1 && slot->sl_cachethis == 0) {
+ if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) {
op = &args->ops[resp->opcnt++];
op->status = nfserr_retry_uncached_rep;
nfsd4_encode_operation(resp, op);
@@ -1575,16 +1611,11 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
else
return nfserr_seq_misordered;
}
- /* Normal */
+ /* Note unsigned 32-bit arithmetic handles wraparound: */
if (likely(seqid == slot_seqid + 1))
return nfs_ok;
- /* Replay */
if (seqid == slot_seqid)
return nfserr_replay_cache;
- /* Wraparound */
- if (seqid == 1 && (slot_seqid + 1) == 0)
- return nfs_ok;
- /* Misordered replay or misordered new request */
return nfserr_seq_misordered;
}
@@ -1815,9 +1846,10 @@ nfsd4_destroy_session(struct svc_rqst *r,
nfsd4_probe_callback_sync(ses->se_client);
nfs4_unlock_state();
+ spin_lock(&client_lock);
nfsd4_del_conns(ses);
-
- nfsd4_put_session(ses);
+ nfsd4_put_session_locked(ses);
+ spin_unlock(&client_lock);
status = nfs_ok;
out:
dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -1921,8 +1953,12 @@ nfsd4_sequence(struct svc_rqst *rqstp,
* sr_highest_slotid and the sr_target_slot id to maxslots */
seq->maxslots = session->se_fchannel.maxreqs;
- status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse);
+ status = check_slot_seqid(seq->seqid, slot->sl_seqid,
+ slot->sl_flags & NFSD4_SLOT_INUSE);
if (status == nfserr_replay_cache) {
+ status = nfserr_seq_misordered;
+ if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
+ goto out;
cstate->slot = slot;
cstate->session = session;
/* Return the cached reply status and set cstate->status
@@ -1938,9 +1974,12 @@ nfsd4_sequence(struct svc_rqst *rqstp,
conn = NULL;
/* Success! bump slot seqid */
- slot->sl_inuse = true;
slot->sl_seqid = seq->seqid;
- slot->sl_cachethis = seq->cachethis;
+ slot->sl_flags |= NFSD4_SLOT_INUSE;
+ if (seq->cachethis)
+ slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
+ else
+ slot->sl_flags &= ~NFSD4_SLOT_CACHETHIS;
cstate->slot = slot;
cstate->session = session;
@@ -2030,7 +2069,8 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
nfs4_lock_state();
status = nfserr_complete_already;
- if (cstate->session->se_client->cl_firststate)
+ if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
+ &cstate->session->se_client->cl_flags))
goto out;
status = nfserr_stale_clientid;
@@ -2045,7 +2085,7 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
goto out;
status = nfs_ok;
- nfsd4_create_clid_dir(cstate->session->se_client);
+ nfsd4_client_record_create(cstate->session->se_client);
out:
nfs4_unlock_state();
return status;
@@ -2240,7 +2280,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
conf = find_confirmed_client_by_str(unconf->cl_recdir,
hash);
if (conf) {
- nfsd4_remove_clid_dir(conf);
+ nfsd4_client_record_remove(conf);
expire_client(conf);
}
move_to_confirmed(unconf);
@@ -2633,8 +2673,6 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
static int share_access_to_flags(u32 share_access)
{
- share_access &= ~NFS4_SHARE_WANT_MASK;
-
return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
}
@@ -2776,10 +2814,9 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
static void
-nfs4_set_claim_prev(struct nfsd4_open *open)
+nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
{
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
- open->op_openowner->oo_owner.so_client->cl_firststate = 1;
}
/* Should we give out recallable state?: */
@@ -2855,6 +2892,27 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
return 0;
}
+static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
+{
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ if (status == -EAGAIN)
+ open->op_why_no_deleg = WND4_CONTENTION;
+ else {
+ open->op_why_no_deleg = WND4_RESOURCE;
+ switch (open->op_deleg_want) {
+ case NFS4_SHARE_WANT_READ_DELEG:
+ case NFS4_SHARE_WANT_WRITE_DELEG:
+ case NFS4_SHARE_WANT_ANY_DELEG:
+ break;
+ case NFS4_SHARE_WANT_CANCEL:
+ open->op_why_no_deleg = WND4_CANCELLED;
+ break;
+ case NFS4_SHARE_WANT_NO_DELEG:
+ BUG(); /* not supposed to get here */
+ }
+ }
+}
+
/*
* Attempt to hand out a delegation.
*/
@@ -2864,7 +2922,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_
struct nfs4_delegation *dp;
struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner);
int cb_up;
- int status, flag = 0;
+ int status = 0, flag = 0;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
flag = NFS4_OPEN_DELEGATE_NONE;
@@ -2905,11 +2963,16 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_
dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
STATEID_VAL(&dp->dl_stid.sc_stateid));
out:
- if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
- && flag == NFS4_OPEN_DELEGATE_NONE
- && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
- dprintk("NFSD: WARNING: refusing delegation reclaim\n");
open->op_delegate_type = flag;
+ if (flag == NFS4_OPEN_DELEGATE_NONE) {
+ if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
+ open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
+ dprintk("NFSD: WARNING: refusing delegation reclaim\n");
+
+ /* 4.1 client asking for a delegation? */
+ if (open->op_deleg_want)
+ nfsd4_open_deleg_none_ext(open, status);
+ }
return;
out_free:
nfs4_put_delegation(dp);
@@ -2918,6 +2981,24 @@ out_no_deleg:
goto out;
}
+static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
+ struct nfs4_delegation *dp)
+{
+ if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
+ dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
+ } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
+ dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+ }
+ /* Otherwise the client must be confused wanting a delegation
+ * it already has, therefore we don't return
+ * NFS4_OPEN_DELEGATE_NONE_EXT and reason.
+ */
+}
+
/*
* called with nfs4_lock_state() held.
*/
@@ -2979,24 +3060,36 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
- if (nfsd4_has_session(&resp->cstate))
+ if (nfsd4_has_session(&resp->cstate)) {
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+ if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ open->op_why_no_deleg = WND4_NOT_WANTED;
+ goto nodeleg;
+ }
+ }
+
/*
* Attempt to hand out a delegation. No error return, because the
* OPEN succeeds even if we fail.
*/
nfs4_open_delegation(current_fh, open, stp);
-
+nodeleg:
status = nfs_ok;
dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
STATEID_VAL(&stp->st_stid.sc_stateid));
out:
+ /* 4.1 client trying to upgrade/downgrade delegation? */
+ if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
+ open->op_deleg_want)
+ nfsd4_deleg_xgrade_none_ext(open, dp);
+
if (fp)
put_nfs4_file(fp);
if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
- nfs4_set_claim_prev(open);
+ nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate));
/*
* To finish the open response, we just need to set the rflags.
*/
@@ -3066,7 +3159,7 @@ static void
nfsd4_end_grace(void)
{
dprintk("NFSD: end of grace period\n");
- nfsd4_recdir_purge_old();
+ nfsd4_record_grace_done(&init_net, boot_time);
locks_end_grace(&nfsd4_manager);
/*
* Now that every NFSv4 client has had the chance to recover and
@@ -3115,7 +3208,7 @@ nfs4_laundromat(void)
clp = list_entry(pos, struct nfs4_client, cl_lru);
dprintk("NFSD: purging unused client (clientid %08x)\n",
clp->cl_clientid.cl_id);
- nfsd4_remove_clid_dir(clp);
+ nfsd4_client_record_remove(clp);
expire_client(clp);
}
spin_lock(&recall_lock);
@@ -3400,7 +3493,14 @@ __be32
nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_test_stateid *test_stateid)
{
- /* real work is done during encoding */
+ struct nfsd4_test_stateid_id *stateid;
+ struct nfs4_client *cl = cstate->session->se_client;
+
+ nfs4_lock_state();
+ list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
+ stateid->ts_id_status = nfs4_validate_stateid(cl, &stateid->ts_id_stateid);
+ nfs4_unlock_state();
+
return nfs_ok;
}
@@ -3539,7 +3639,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
- nfsd4_create_clid_dir(oo->oo_owner.so_client);
+ nfsd4_client_record_create(oo->oo_owner.so_client);
status = nfs_ok;
out:
if (!cstate->replay_owner)
@@ -3596,7 +3696,9 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
cstate->current_fh.fh_dentry->d_name.name);
/* We don't yet support WANT bits: */
- od->od_share_access &= NFS4_SHARE_ACCESS_MASK;
+ if (od->od_deleg_want)
+ dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__,
+ od->od_deleg_want);
nfs4_lock_state();
status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
@@ -4353,7 +4455,9 @@ nfs4_has_reclaimed_state(const char *name, bool use_exchange_id)
struct nfs4_client *clp;
clp = find_confirmed_client_by_str(name, strhashval);
- return clp ? 1 : 0;
+ if (!clp)
+ return 0;
+ return test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
/*
@@ -4377,7 +4481,7 @@ nfs4_client_to_reclaim(const char *name)
return 1;
}
-static void
+void
nfs4_release_reclaim(void)
{
struct nfs4_client_reclaim *crp = NULL;
@@ -4397,19 +4501,12 @@ nfs4_release_reclaim(void)
/*
* called from OPEN, CLAIM_PREVIOUS with a new clientid. */
-static struct nfs4_client_reclaim *
-nfs4_find_reclaim_client(clientid_t *clid)
+struct nfs4_client_reclaim *
+nfsd4_find_reclaim_client(struct nfs4_client *clp)
{
unsigned int strhashval;
- struct nfs4_client *clp;
struct nfs4_client_reclaim *crp = NULL;
-
- /* find clientid in conf_id_hashtbl */
- clp = find_confirmed_client(clid);
- if (clp == NULL)
- return NULL;
-
dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
clp->cl_name.len, clp->cl_name.data,
clp->cl_recdir);
@@ -4430,7 +4527,14 @@ nfs4_find_reclaim_client(clientid_t *clid)
__be32
nfs4_check_open_reclaim(clientid_t *clid)
{
- return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
+ struct nfs4_client *clp;
+
+ /* find clientid in conf_id_hashtbl */
+ clp = find_confirmed_client(clid);
+ if (clp == NULL)
+ return nfserr_reclaim_bad;
+
+ return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok;
}
#ifdef CONFIG_NFSD_FAULT_INJECTION
@@ -4442,7 +4546,7 @@ void nfsd_forget_clients(u64 num)
nfs4_lock_state();
list_for_each_entry_safe(clp, next, &client_lru, cl_lru) {
- nfsd4_remove_clid_dir(clp);
+ nfsd4_client_record_remove(clp);
expire_client(clp);
if (++count == num)
break;
@@ -4577,19 +4681,6 @@ nfs4_state_init(void)
reclaim_str_hashtbl_size = 0;
}
-static void
-nfsd4_load_reboot_recovery_data(void)
-{
- int status;
-
- nfs4_lock_state();
- nfsd4_init_recdir();
- status = nfsd4_recdir_load();
- nfs4_unlock_state();
- if (status)
- printk("NFSD: Failure reading reboot recovery data\n");
-}
-
/*
* Since the lifetime of a delegation isn't limited to that of an open, a
* client may quite reasonably hang on to a delegation as long as it has
@@ -4613,21 +4704,34 @@ set_max_delegations(void)
/* initialization to perform when the nfsd service is started: */
-static int
-__nfs4_state_start(void)
+int
+nfs4_state_start(void)
{
int ret;
+ /*
+ * FIXME: For now, we hang most of the pernet global stuff off of
+ * init_net until nfsd is fully containerized. Eventually, we'll
+ * need to pass a net pointer into this function, take a reference
+ * to that instead and then do most of the rest of this on a per-net
+ * basis.
+ */
+ get_net(&init_net);
+ nfsd4_client_tracking_init(&init_net);
boot_time = get_seconds();
locks_start_grace(&nfsd4_manager);
printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
nfsd4_grace);
ret = set_callback_cred();
- if (ret)
- return -ENOMEM;
+ if (ret) {
+ ret = -ENOMEM;
+ goto out_recovery;
+ }
laundry_wq = create_singlethread_workqueue("nfsd4");
- if (laundry_wq == NULL)
- return -ENOMEM;
+ if (laundry_wq == NULL) {
+ ret = -ENOMEM;
+ goto out_recovery;
+ }
ret = nfsd4_create_callback_queue();
if (ret)
goto out_free_laundry;
@@ -4636,16 +4740,12 @@ __nfs4_state_start(void)
return 0;
out_free_laundry:
destroy_workqueue(laundry_wq);
+out_recovery:
+ nfsd4_client_tracking_exit(&init_net);
+ put_net(&init_net);
return ret;
}
-int
-nfs4_state_start(void)
-{
- nfsd4_load_reboot_recovery_data();
- return __nfs4_state_start();
-}
-
static void
__nfs4_state_shutdown(void)
{
@@ -4676,7 +4776,8 @@ __nfs4_state_shutdown(void)
unhash_delegation(dp);
}
- nfsd4_shutdown_recdir();
+ nfsd4_client_tracking_exit(&init_net);
+ put_net(&init_net);
}
void
@@ -4686,8 +4787,108 @@ nfs4_state_shutdown(void)
destroy_workqueue(laundry_wq);
locks_end_grace(&nfsd4_manager);
nfs4_lock_state();
- nfs4_release_reclaim();
__nfs4_state_shutdown();
nfs4_unlock_state();
nfsd4_destroy_callback_queue();
}
+
+static void
+get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
+{
+ if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid))
+ memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t));
+}
+
+static void
+put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
+{
+ if (cstate->minorversion) {
+ memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t));
+ SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+ }
+}
+
+void
+clear_current_stateid(struct nfsd4_compound_state *cstate)
+{
+ CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+}
+
+/*
+ * functions to set current state id
+ */
+void
+nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
+{
+ put_stateid(cstate, &odp->od_stateid);
+}
+
+void
+nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
+{
+ put_stateid(cstate, &open->op_stateid);
+}
+
+void
+nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
+{
+ put_stateid(cstate, &close->cl_stateid);
+}
+
+void
+nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock)
+{
+ put_stateid(cstate, &lock->lk_resp_stateid);
+}
+
+/*
+ * functions to consume current state id
+ */
+
+void
+nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
+{
+ get_stateid(cstate, &odp->od_stateid);
+}
+
+void
+nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *drp)
+{
+ get_stateid(cstate, &drp->dr_stateid);
+}
+
+void
+nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, struct nfsd4_free_stateid *fsp)
+{
+ get_stateid(cstate, &fsp->fr_stateid);
+}
+
+void
+nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr)
+{
+ get_stateid(cstate, &setattr->sa_stateid);
+}
+
+void
+nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
+{
+ get_stateid(cstate, &close->cl_stateid);
+}
+
+void
+nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku)
+{
+ get_stateid(cstate, &locku->lu_stateid);
+}
+
+void
+nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, struct nfsd4_read *read)
+{
+ get_stateid(cstate, &read->rd_stateid);
+}
+
+void
+nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, struct nfsd4_write *write)
+{
+ get_stateid(cstate, &write->wr_stateid);
+}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0ec5a1b9700e..bcd8904ab1e3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -133,22 +133,6 @@ xdr_error: \
} \
} while (0)
-static void save_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep)
-{
- savep->p = argp->p;
- savep->end = argp->end;
- savep->pagelen = argp->pagelen;
- savep->pagelist = argp->pagelist;
-}
-
-static void restore_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep)
-{
- argp->p = savep->p;
- argp->end = savep->end;
- argp->pagelen = savep->pagelen;
- argp->pagelist = savep->pagelist;
-}
-
static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
{
/* We want more bytes than seem to be available.
@@ -638,14 +622,18 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup
DECODE_TAIL;
}
-static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x)
+static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when)
{
__be32 *p;
u32 w;
READ_BUF(4);
READ32(w);
- *x = w;
+ *share_access = w & NFS4_SHARE_ACCESS_MASK;
+ *deleg_want = w & NFS4_SHARE_WANT_MASK;
+ if (deleg_when)
+ *deleg_when = w & NFS4_SHARE_WHEN_MASK;
+
switch (w & NFS4_SHARE_ACCESS_MASK) {
case NFS4_SHARE_ACCESS_READ:
case NFS4_SHARE_ACCESS_WRITE:
@@ -673,6 +661,9 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x)
w &= ~NFS4_SHARE_WANT_MASK;
if (!w)
return nfs_ok;
+
+ if (!deleg_when) /* open_downgrade */
+ return nfserr_inval;
switch (w) {
case NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL:
case NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED:
@@ -719,6 +710,7 @@ static __be32
nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
{
DECODE_HEAD;
+ u32 dummy;
memset(open->op_bmval, 0, sizeof(open->op_bmval));
open->op_iattr.ia_valid = 0;
@@ -727,7 +719,9 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
/* seqid, share_access, share_deny, clientid, ownerlen */
READ_BUF(4);
READ32(open->op_seqid);
- status = nfsd4_decode_share_access(argp, &open->op_share_access);
+ /* decode, yet ignore deleg_when until supported */
+ status = nfsd4_decode_share_access(argp, &open->op_share_access,
+ &open->op_deleg_want, &dummy);
if (status)
goto xdr_error;
status = nfsd4_decode_share_deny(argp, &open->op_share_deny);
@@ -755,14 +749,14 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
goto out;
break;
case NFS4_CREATE_EXCLUSIVE:
- READ_BUF(8);
- COPYMEM(open->op_verf.data, 8);
+ READ_BUF(NFS4_VERIFIER_SIZE);
+ COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
break;
case NFS4_CREATE_EXCLUSIVE4_1:
if (argp->minorversion < 1)
goto xdr_error;
- READ_BUF(8);
- COPYMEM(open->op_verf.data, 8);
+ READ_BUF(NFS4_VERIFIER_SIZE);
+ COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
status = nfsd4_decode_fattr(argp, open->op_bmval,
&open->op_iattr, &open->op_acl);
if (status)
@@ -848,7 +842,8 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
return status;
READ_BUF(4);
READ32(open_down->od_seqid);
- status = nfsd4_decode_share_access(argp, &open_down->od_share_access);
+ status = nfsd4_decode_share_access(argp, &open_down->od_share_access,
+ &open_down->od_deleg_want, NULL);
if (status)
return status;
status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny);
@@ -994,8 +989,8 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
{
DECODE_HEAD;
- READ_BUF(8);
- COPYMEM(setclientid->se_verf.data, 8);
+ READ_BUF(NFS4_VERIFIER_SIZE);
+ COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE);
status = nfsd4_decode_opaque(argp, &setclientid->se_name);
if (status)
@@ -1020,9 +1015,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s
{
DECODE_HEAD;
- READ_BUF(8 + sizeof(nfs4_verifier));
+ READ_BUF(8 + NFS4_VERIFIER_SIZE);
COPYMEM(&scd_c->sc_clientid, 8);
- COPYMEM(&scd_c->sc_confirm, sizeof(nfs4_verifier));
+ COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE);
DECODE_TAIL;
}
@@ -1385,26 +1380,29 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
static __be32
nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid)
{
- unsigned int nbytes;
- stateid_t si;
int i;
- __be32 *p;
- __be32 status;
+ __be32 *p, status;
+ struct nfsd4_test_stateid_id *stateid;
READ_BUF(4);
test_stateid->ts_num_ids = ntohl(*p++);
- nbytes = test_stateid->ts_num_ids * sizeof(stateid_t);
- if (nbytes > (u32)((char *)argp->end - (char *)argp->p))
- goto xdr_error;
-
- test_stateid->ts_saved_args = argp;
- save_buf(argp, &test_stateid->ts_savedp);
+ INIT_LIST_HEAD(&test_stateid->ts_stateid_list);
for (i = 0; i < test_stateid->ts_num_ids; i++) {
- status = nfsd4_decode_stateid(argp, &si);
+ stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL);
+ if (!stateid) {
+ status = PTR_ERR(stateid);
+ goto out;
+ }
+
+ defer_free(argp, kfree, stateid);
+ INIT_LIST_HEAD(&stateid->ts_id_list);
+ list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
+
+ status = nfsd4_decode_stateid(argp, &stateid->ts_id_stateid);
if (status)
- return status;
+ goto out;
}
status = 0;
@@ -2661,8 +2659,8 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
__be32 *p;
if (!nfserr) {
- RESERVE_SPACE(8);
- WRITEMEM(commit->co_verf.data, 8);
+ RESERVE_SPACE(NFS4_VERIFIER_SIZE);
+ WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE);
ADJUST_ARGS();
}
return nfserr;
@@ -2851,6 +2849,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
WRITE32(0); /* XXX: is NULL principal ok? */
ADJUST_ARGS();
break;
+ case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */
+ switch (open->op_why_no_deleg) {
+ case WND4_CONTENTION:
+ case WND4_RESOURCE:
+ RESERVE_SPACE(8);
+ WRITE32(open->op_why_no_deleg);
+ WRITE32(0); /* deleg signaling not supported yet */
+ break;
+ default:
+ RESERVE_SPACE(4);
+ WRITE32(open->op_why_no_deleg);
+ }
+ ADJUST_ARGS();
+ break;
default:
BUG();
}
@@ -3008,7 +3020,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
if (resp->xbuf->page_len)
return nfserr_resource;
- RESERVE_SPACE(8); /* verifier */
+ RESERVE_SPACE(NFS4_VERIFIER_SIZE);
savep = p;
/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
@@ -3209,9 +3221,9 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
__be32 *p;
if (!nfserr) {
- RESERVE_SPACE(8 + sizeof(nfs4_verifier));
+ RESERVE_SPACE(8 + NFS4_VERIFIER_SIZE);
WRITEMEM(&scd->se_clientid, 8);
- WRITEMEM(&scd->se_confirm, sizeof(nfs4_verifier));
+ WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE);
ADJUST_ARGS();
}
else if (nfserr == nfserr_clid_inuse) {
@@ -3232,7 +3244,7 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
RESERVE_SPACE(16);
WRITE32(write->wr_bytes_written);
WRITE32(write->wr_how_written);
- WRITEMEM(write->wr_verifier.data, 8);
+ WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE);
ADJUST_ARGS();
}
return nfserr;
@@ -3391,30 +3403,17 @@ __be32
nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr,
struct nfsd4_test_stateid *test_stateid)
{
- struct nfsd4_compoundargs *argp;
- struct nfs4_client *cl = resp->cstate.session->se_client;
- stateid_t si;
+ struct nfsd4_test_stateid_id *stateid, *next;
__be32 *p;
- int i;
- int valid;
-
- restore_buf(test_stateid->ts_saved_args, &test_stateid->ts_savedp);
- argp = test_stateid->ts_saved_args;
- RESERVE_SPACE(4);
+ RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids));
*p++ = htonl(test_stateid->ts_num_ids);
- resp->p = p;
- nfs4_lock_state();
- for (i = 0; i < test_stateid->ts_num_ids; i++) {
- nfsd4_decode_stateid(argp, &si);
- valid = nfs4_validate_stateid(cl, &si);
- RESERVE_SPACE(4);
- *p++ = htonl(valid);
- resp->p = p;
+ list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) {
+ *p++ = htonl(stateid->ts_id_status);
}
- nfs4_unlock_state();
+ ADJUST_ARGS();
return nfserr;
}
@@ -3532,7 +3531,7 @@ int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
if (length > session->se_fchannel.maxresp_sz)
return nfserr_rep_too_big;
- if (slot->sl_cachethis == 1 &&
+ if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) &&
length > session->se_fchannel.maxresp_cached)
return nfserr_rep_too_big_to_cache;
@@ -3656,8 +3655,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
if (nfsd4_has_session(cs)) {
if (cs->status != nfserr_replay_cache) {
nfsd4_store_cache_entry(resp);
- dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
- cs->slot->sl_inuse = false;
+ cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
}
/* Renew the clientid on success and on replay */
release_session_client(cs->session);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 64c24af8d7ea..2c53be6d3579 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -13,12 +13,14 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/gss_api.h>
#include <linux/sunrpc/gss_krb5_enctypes.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/module.h>
#include "idmap.h"
#include "nfsd.h"
#include "cache.h"
#include "fault_inject.h"
+#include "netns.h"
/*
* We have a single directory with several nodes in it.
@@ -1124,14 +1126,26 @@ static int create_proc_exports_entry(void)
}
#endif
+int nfsd_net_id;
+static struct pernet_operations nfsd_net_ops = {
+ .id = &nfsd_net_id,
+ .size = sizeof(struct nfsd_net),
+};
+
static int __init init_nfsd(void)
{
int retval;
printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
- retval = nfsd4_init_slabs();
+ retval = register_cld_notifier();
if (retval)
return retval;
+ retval = register_pernet_subsys(&nfsd_net_ops);
+ if (retval < 0)
+ goto out_unregister_notifier;
+ retval = nfsd4_init_slabs();
+ if (retval)
+ goto out_unregister_pernet;
nfs4_state_init();
retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
if (retval)
@@ -1169,6 +1183,10 @@ out_free_stat:
nfsd_fault_inject_cleanup();
out_free_slabs:
nfsd4_free_slabs();
+out_unregister_pernet:
+ unregister_pernet_subsys(&nfsd_net_ops);
+out_unregister_notifier:
+ unregister_cld_notifier();
return retval;
}
@@ -1184,6 +1202,8 @@ static void __exit exit_nfsd(void)
nfsd4_free_slabs();
nfsd_fault_inject_cleanup();
unregister_filesystem(&nfsd_fs_type);
+ unregister_pernet_subsys(&nfsd_net_ops);
+ unregister_cld_notifier();
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 1d1e8589b4ce..1671429ffa66 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -364,12 +364,17 @@ static inline u32 nfsd_suppattrs2(u32 minorversion)
NFSD_WRITEABLE_ATTRS_WORD2
extern int nfsd4_is_junction(struct dentry *dentry);
-#else
+extern int register_cld_notifier(void);
+extern void unregister_cld_notifier(void);
+#else /* CONFIG_NFSD_V4 */
static inline int nfsd4_is_junction(struct dentry *dentry)
{
return 0;
}
+#define register_cld_notifier() 0
+#define unregister_cld_notifier() do { } while(0)
+
#endif /* CONFIG_NFSD_V4 */
#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index fce472f5f39e..28dfad39f0c5 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -307,33 +307,37 @@ static void set_max_drc(void)
dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
}
-int nfsd_create_serv(void)
+static int nfsd_get_default_max_blksize(void)
{
- int err = 0;
+ struct sysinfo i;
+ unsigned long long target;
+ unsigned long ret;
+
+ si_meminfo(&i);
+ target = (i.totalram - i.totalhigh) << PAGE_SHIFT;
+ /*
+ * Aim for 1/4096 of memory per thread This gives 1MB on 4Gig
+ * machines, but only uses 32K on 128M machines. Bottom out at
+ * 8K on 32M and smaller. Of course, this is only a default.
+ */
+ target >>= 12;
+
+ ret = NFSSVC_MAXBLKSIZE;
+ while (ret > target && ret >= 8*1024*2)
+ ret /= 2;
+ return ret;
+}
+int nfsd_create_serv(void)
+{
WARN_ON(!mutex_is_locked(&nfsd_mutex));
if (nfsd_serv) {
svc_get(nfsd_serv);
return 0;
}
- if (nfsd_max_blksize == 0) {
- /* choose a suitable default */
- struct sysinfo i;
- si_meminfo(&i);
- /* Aim for 1/4096 of memory per thread
- * This gives 1MB on 4Gig machines
- * But only uses 32K on 128M machines.
- * Bottom out at 8K on 32M and smaller.
- * Of course, this is only a default.
- */
- nfsd_max_blksize = NFSSVC_MAXBLKSIZE;
- i.totalram <<= PAGE_SHIFT - 12;
- while (nfsd_max_blksize > i.totalram &&
- nfsd_max_blksize >= 8*1024*2)
- nfsd_max_blksize /= 2;
- }
+ if (nfsd_max_blksize == 0)
+ nfsd_max_blksize = nfsd_get_default_max_blksize();
nfsd_reset_versions();
-
nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
nfsd_last_thread, nfsd, THIS_MODULE);
if (nfsd_serv == NULL)
@@ -341,7 +345,7 @@ int nfsd_create_serv(void)
set_max_drc();
do_gettimeofday(&nfssvc_boot); /* record boot time */
- return err;
+ return 0;
}
int nfsd_nrpools(void)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index ffb5df1db94f..89ab137d379a 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -128,12 +128,14 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
struct nfsd4_slot {
- bool sl_inuse;
- bool sl_cachethis;
- u16 sl_opcnt;
u32 sl_seqid;
__be32 sl_status;
u32 sl_datalen;
+ u16 sl_opcnt;
+#define NFSD4_SLOT_INUSE (1 << 0)
+#define NFSD4_SLOT_CACHETHIS (1 << 1)
+#define NFSD4_SLOT_INITIALIZED (1 << 2)
+ u8 sl_flags;
char sl_data[];
};
@@ -196,18 +198,7 @@ struct nfsd4_session {
struct nfsd4_slot *se_slots[]; /* forward channel slots */
};
-static inline void
-nfsd4_put_session(struct nfsd4_session *ses)
-{
- extern void free_session(struct kref *kref);
- kref_put(&ses->se_ref, free_session);
-}
-
-static inline void
-nfsd4_get_session(struct nfsd4_session *ses)
-{
- kref_get(&ses->se_ref);
-}
+extern void nfsd4_put_session(struct nfsd4_session *ses);
/* formatted contents of nfs4_sessionid */
struct nfsd4_sessionid {
@@ -245,14 +236,17 @@ struct nfs4_client {
struct svc_cred cl_cred; /* setclientid principal */
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
- u32 cl_firststate; /* recovery dir creation */
u32 cl_minorversion;
/* for v4.0 and v4.1 callbacks: */
struct nfs4_cb_conn cl_cb_conn;
-#define NFSD4_CLIENT_CB_UPDATE 1
-#define NFSD4_CLIENT_KILL 2
- unsigned long cl_cb_flags;
+#define NFSD4_CLIENT_CB_UPDATE (0)
+#define NFSD4_CLIENT_CB_KILL (1)
+#define NFSD4_CLIENT_STABLE (2) /* client on stable storage */
+#define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */
+#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
+ 1 << NFSD4_CLIENT_CB_KILL)
+ unsigned long cl_flags;
struct rpc_clnt *cl_cb_client;
u32 cl_cb_ident;
#define NFSD4_CB_UP 0
@@ -463,6 +457,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
extern void nfs4_lock_state(void);
extern void nfs4_unlock_state(void);
extern int nfs4_in_grace(void);
+extern void nfs4_release_reclaim(void);
+extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct nfs4_client *crp);
extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
extern void nfs4_free_openowner(struct nfs4_openowner *);
extern void nfs4_free_lockowner(struct nfs4_lockowner *);
@@ -477,16 +473,17 @@ extern void nfsd4_destroy_callback_queue(void);
extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfs4_put_delegation(struct nfs4_delegation *dp);
extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
-extern void nfsd4_init_recdir(void);
-extern int nfsd4_recdir_load(void);
-extern void nfsd4_shutdown_recdir(void);
extern int nfs4_client_to_reclaim(const char *name);
extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
-extern void nfsd4_recdir_purge_old(void);
-extern void nfsd4_create_clid_dir(struct nfs4_client *clp);
-extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
extern void release_session_client(struct nfsd4_session *);
extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *);
extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *);
+/* nfs4recover operations */
+extern int nfsd4_client_tracking_init(struct net *net);
+extern void nfsd4_client_tracking_exit(struct net *net);
+extern void nfsd4_client_record_create(struct nfs4_client *clp);
+extern void nfsd4_client_record_remove(struct nfs4_client *clp);
+extern int nfsd4_client_record_check(struct nfs4_client *clp);
+extern void nfsd4_record_grace_done(struct net *net, time_t boot_time);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index e59f71d0cf73..296d671654d6 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -737,12 +737,13 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
/*
* Open an existing file or directory.
- * The access argument indicates the type of open (read/write/lock)
+ * The may_flags argument indicates the type of open (read/write/lock)
+ * and additional flags.
* N.B. After this call fhp needs an fh_put
*/
__be32
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
- int access, struct file **filp)
+ int may_flags, struct file **filp)
{
struct dentry *dentry;
struct inode *inode;
@@ -757,7 +758,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
* and (hopefully) checked permission - so allow OWNER_OVERRIDE
* in case a chmod has now revoked permission.
*/
- err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
+ err = fh_verify(rqstp, fhp, type, may_flags | NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
@@ -768,7 +769,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
* or any access when mandatory locking enabled
*/
err = nfserr_perm;
- if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
+ if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
goto out;
/*
* We must ignore files (but only files) which might have mandatory
@@ -781,12 +782,12 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
if (!inode->i_fop)
goto out;
- host_err = nfsd_open_break_lease(inode, access);
+ host_err = nfsd_open_break_lease(inode, may_flags);
if (host_err) /* NOMEM or WOULDBLOCK */
goto out_nfserr;
- if (access & NFSD_MAY_WRITE) {
- if (access & NFSD_MAY_READ)
+ if (may_flags & NFSD_MAY_WRITE) {
+ if (may_flags & NFSD_MAY_READ)
flags = O_RDWR|O_LARGEFILE;
else
flags = O_WRONLY|O_LARGEFILE;
@@ -795,8 +796,15 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
flags, current_cred());
if (IS_ERR(*filp))
host_err = PTR_ERR(*filp);
- else
- host_err = ima_file_check(*filp, access);
+ else {
+ host_err = ima_file_check(*filp, may_flags);
+
+ if (may_flags & NFSD_MAY_64BIT_COOKIE)
+ (*filp)->f_mode |= FMODE_64BITHASH;
+ else
+ (*filp)->f_mode |= FMODE_32BITHASH;
+ }
+
out_nfserr:
err = nfserrno(host_err);
out:
@@ -2021,8 +2029,13 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
__be32 err;
struct file *file;
loff_t offset = *offsetp;
+ int may_flags = NFSD_MAY_READ;
+
+ /* NFSv2 only supports 32 bit cookies */
+ if (rqstp->rq_vers > 2)
+ may_flags |= NFSD_MAY_64BIT_COOKIE;
- err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file);
+ err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
if (err)
goto out;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 1dcd238e11a0..ec0611b2b738 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -27,6 +27,8 @@
#define NFSD_MAY_BYPASS_GSS 0x400
#define NFSD_MAY_READ_IF_EXEC 0x800
+#define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */
+
#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 2364747ee97d..1b3501598ab5 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -43,6 +43,13 @@
#define NFSD4_MAX_TAGLEN 128
#define XDR_LEN(n) (((n) + 3) & ~3)
+#define CURRENT_STATE_ID_FLAG (1<<0)
+#define SAVED_STATE_ID_FLAG (1<<1)
+
+#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f))
+#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f))
+#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f))
+
struct nfsd4_compound_state {
struct svc_fh current_fh;
struct svc_fh save_fh;
@@ -54,6 +61,10 @@ struct nfsd4_compound_state {
size_t iovlen;
u32 minorversion;
u32 status;
+ stateid_t current_stateid;
+ stateid_t save_stateid;
+ /* to indicate current and saved state id presents */
+ u32 sid_flags;
};
static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
@@ -212,16 +223,19 @@ struct nfsd4_open {
struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */
u32 op_delegate_type; /* request - CLAIM_PREV only */
stateid_t op_delegate_stateid; /* request - response */
+ u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */
u32 op_create; /* request */
u32 op_createmode; /* request */
u32 op_bmval[3]; /* request */
struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
- nfs4_verifier verf; /* EXCLUSIVE4 */
+ nfs4_verifier op_verf __attribute__((aligned(32)));
+ /* EXCLUSIVE4 */
clientid_t op_clientid; /* request */
struct xdr_netobj op_owner; /* request */
u32 op_seqid; /* request */
u32 op_share_access; /* request */
u32 op_share_deny; /* request */
+ u32 op_deleg_want; /* request */
stateid_t op_stateid; /* response */
u32 op_recall; /* recall */
struct nfsd4_change_info op_cinfo; /* response */
@@ -234,7 +248,6 @@ struct nfsd4_open {
struct nfs4_acl *op_acl;
};
#define op_iattr iattr
-#define op_verf verf
struct nfsd4_open_confirm {
stateid_t oc_req_stateid /* request */;
@@ -245,8 +258,9 @@ struct nfsd4_open_confirm {
struct nfsd4_open_downgrade {
stateid_t od_stateid;
u32 od_seqid;
- u32 od_share_access;
- u32 od_share_deny;
+ u32 od_share_access; /* request */
+ u32 od_deleg_want; /* request */
+ u32 od_share_deny; /* request */
};
@@ -343,10 +357,15 @@ struct nfsd4_saved_compoundargs {
struct page **pagelist;
};
+struct nfsd4_test_stateid_id {
+ __be32 ts_id_status;
+ stateid_t ts_id_stateid;
+ struct list_head ts_id_list;
+};
+
struct nfsd4_test_stateid {
__be32 ts_num_ids;
- struct nfsd4_compoundargs *ts_saved_args;
- struct nfsd4_saved_compoundargs ts_savedp;
+ struct list_head ts_stateid_list;
};
struct nfsd4_free_stateid {
@@ -503,7 +522,8 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
{
- return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
+ return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
+ || nfsd4_is_solo_sequence(resp);
}
#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
diff --git a/fs/open.c b/fs/open.c
index 77becc041149..5720854156db 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -836,7 +836,7 @@ EXPORT_SYMBOL(dentry_open);
static void __put_unused_fd(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = files_fdtable(files);
- __FD_CLR(fd, fdt->open_fds);
+ __clear_open_fd(fd, fdt);
if (fd < files->next_fd)
files->next_fd = fd;
}
@@ -1080,7 +1080,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
if (!filp)
goto out_unlock;
rcu_assign_pointer(fdt->fd[fd], NULL);
- FD_CLR(fd, fdt->close_on_exec);
+ __clear_close_on_exec(fd, fdt);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
retval = filp_close(filp, files);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index fbb53c249086..f9bd395b3473 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -550,7 +550,7 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, ' ', shared);
seq_put_decimal_ull(m, ' ', text);
seq_put_decimal_ull(m, ' ', 0);
- seq_put_decimal_ull(m, ' ', text);
+ seq_put_decimal_ull(m, ' ', data);
seq_put_decimal_ull(m, ' ', 0);
seq_putc(m, '\n');
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3b42c1418f31..1c8b280146d7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1753,7 +1753,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
fdt = files_fdtable(files);
f_flags = file->f_flags & ~O_CLOEXEC;
- if (FD_ISSET(fd, fdt->close_on_exec))
+ if (close_on_exec(fd, fdt))
f_flags |= O_CLOEXEC;
if (path) {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 8461a7b82fdb..205c92280838 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -22,7 +22,6 @@
#include <linux/slab.h>
#include <linux/mount.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include "internal.h"
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 3551f1f839eb..0d9e23a39e49 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -156,15 +156,15 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out;
- last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
- for (entry = ns_entries; entry <= last; entry++) {
+ last = &ns_entries[ARRAY_SIZE(ns_entries)];
+ for (entry = ns_entries; entry < last; entry++) {
if (strlen((*entry)->name) != len)
continue;
if (!memcmp(dentry->d_name.name, (*entry)->name, len))
break;
}
error = ERR_PTR(-ENOENT);
- if (entry > last)
+ if (entry == last)
goto out;
error = proc_ns_instantiate(dir, dentry, task, *entry);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9694cc283511..2b9a7607cbd5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -781,12 +781,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
int err = 0;
pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
- if (pmd_trans_unstable(pmd))
- return 0;
-
/* find the first VMA at or above 'addr' */
vma = find_vma(walk->mm, addr);
- spin_lock(&walk->mm->page_table_lock);
if (pmd_trans_huge_lock(pmd, vma) == 1) {
for (; addr != end; addr += PAGE_SIZE) {
unsigned long offset;
@@ -802,6 +798,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
return err;
}
+ if (pmd_trans_unstable(pmd))
+ return 0;
for (; addr != end; addr += PAGE_SIZE) {
/* check to see if we've left 'vma' behind
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8b4f12b33f57..d69a1d1d7e15 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1110,6 +1110,13 @@ static void dquot_decr_space(struct dquot *dquot, qsize_t number)
clear_bit(DQ_BLKS_B, &dquot->dq_flags);
}
+struct dquot_warn {
+ struct super_block *w_sb;
+ qid_t w_dq_id;
+ short w_dq_type;
+ short w_type;
+};
+
static int warning_issued(struct dquot *dquot, const int warntype)
{
int flag = (warntype == QUOTA_NL_BHARDWARN ||
@@ -1125,41 +1132,42 @@ static int warning_issued(struct dquot *dquot, const int warntype)
#ifdef CONFIG_PRINT_QUOTA_WARNING
static int flag_print_warnings = 1;
-static int need_print_warning(struct dquot *dquot)
+static int need_print_warning(struct dquot_warn *warn)
{
if (!flag_print_warnings)
return 0;
- switch (dquot->dq_type) {
+ switch (warn->w_dq_type) {
case USRQUOTA:
- return current_fsuid() == dquot->dq_id;
+ return current_fsuid() == warn->w_dq_id;
case GRPQUOTA:
- return in_group_p(dquot->dq_id);
+ return in_group_p(warn->w_dq_id);
}
return 0;
}
/* Print warning to user which exceeded quota */
-static void print_warning(struct dquot *dquot, const int warntype)
+static void print_warning(struct dquot_warn *warn)
{
char *msg = NULL;
struct tty_struct *tty;
+ int warntype = warn->w_type;
if (warntype == QUOTA_NL_IHARDBELOW ||
warntype == QUOTA_NL_ISOFTBELOW ||
warntype == QUOTA_NL_BHARDBELOW ||
- warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
+ warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(warn))
return;
tty = get_current_tty();
if (!tty)
return;
- tty_write_message(tty, dquot->dq_sb->s_id);
+ tty_write_message(tty, warn->w_sb->s_id);
if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN)
tty_write_message(tty, ": warning, ");
else
tty_write_message(tty, ": write failed, ");
- tty_write_message(tty, quotatypes[dquot->dq_type]);
+ tty_write_message(tty, quotatypes[warn->w_dq_type]);
switch (warntype) {
case QUOTA_NL_IHARDWARN:
msg = " file limit reached.\r\n";
@@ -1185,26 +1193,34 @@ static void print_warning(struct dquot *dquot, const int warntype)
}
#endif
+static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot,
+ int warntype)
+{
+ if (warning_issued(dquot, warntype))
+ return;
+ warn->w_type = warntype;
+ warn->w_sb = dquot->dq_sb;
+ warn->w_dq_id = dquot->dq_id;
+ warn->w_dq_type = dquot->dq_type;
+}
+
/*
* Write warnings to the console and send warning messages over netlink.
*
- * Note that this function can sleep.
+ * Note that this function can call into tty and networking code.
*/
-static void flush_warnings(struct dquot *const *dquots, char *warntype)
+static void flush_warnings(struct dquot_warn *warn)
{
- struct dquot *dq;
int i;
for (i = 0; i < MAXQUOTAS; i++) {
- dq = dquots[i];
- if (dq && warntype[i] != QUOTA_NL_NOWARN &&
- !warning_issued(dq, warntype[i])) {
+ if (warn[i].w_type == QUOTA_NL_NOWARN)
+ continue;
#ifdef CONFIG_PRINT_QUOTA_WARNING
- print_warning(dq, warntype[i]);
+ print_warning(&warn[i]);
#endif
- quota_send_warning(dq->dq_type, dq->dq_id,
- dq->dq_sb->s_dev, warntype[i]);
- }
+ quota_send_warning(warn[i].w_dq_type, warn[i].w_dq_id,
+ warn[i].w_sb->s_dev, warn[i].w_type);
}
}
@@ -1218,11 +1234,11 @@ static int ignore_hardlimit(struct dquot *dquot)
}
/* needs dq_data_lock */
-static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
+static int check_idq(struct dquot *dquot, qsize_t inodes,
+ struct dquot_warn *warn)
{
qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
- *warntype = QUOTA_NL_NOWARN;
if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
test_bit(DQ_FAKE_B, &dquot->dq_flags))
return 0;
@@ -1230,7 +1246,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
if (dquot->dq_dqb.dqb_ihardlimit &&
newinodes > dquot->dq_dqb.dqb_ihardlimit &&
!ignore_hardlimit(dquot)) {
- *warntype = QUOTA_NL_IHARDWARN;
+ prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN);
return -EDQUOT;
}
@@ -1239,14 +1255,14 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
dquot->dq_dqb.dqb_itime &&
get_seconds() >= dquot->dq_dqb.dqb_itime &&
!ignore_hardlimit(dquot)) {
- *warntype = QUOTA_NL_ISOFTLONGWARN;
+ prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN);
return -EDQUOT;
}
if (dquot->dq_dqb.dqb_isoftlimit &&
newinodes > dquot->dq_dqb.dqb_isoftlimit &&
dquot->dq_dqb.dqb_itime == 0) {
- *warntype = QUOTA_NL_ISOFTWARN;
+ prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN);
dquot->dq_dqb.dqb_itime = get_seconds() +
sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
}
@@ -1255,12 +1271,12 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
}
/* needs dq_data_lock */
-static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
+static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc,
+ struct dquot_warn *warn)
{
qsize_t tspace;
struct super_block *sb = dquot->dq_sb;
- *warntype = QUOTA_NL_NOWARN;
if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) ||
test_bit(DQ_FAKE_B, &dquot->dq_flags))
return 0;
@@ -1272,7 +1288,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
tspace > dquot->dq_dqb.dqb_bhardlimit &&
!ignore_hardlimit(dquot)) {
if (!prealloc)
- *warntype = QUOTA_NL_BHARDWARN;
+ prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN);
return -EDQUOT;
}
@@ -1282,7 +1298,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
get_seconds() >= dquot->dq_dqb.dqb_btime &&
!ignore_hardlimit(dquot)) {
if (!prealloc)
- *warntype = QUOTA_NL_BSOFTLONGWARN;
+ prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN);
return -EDQUOT;
}
@@ -1290,7 +1306,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
tspace > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_btime == 0) {
if (!prealloc) {
- *warntype = QUOTA_NL_BSOFTWARN;
+ prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN);
dquot->dq_dqb.dqb_btime = get_seconds() +
sb_dqopt(sb)->info[dquot->dq_type].dqi_bgrace;
}
@@ -1543,10 +1559,9 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
{
int cnt, ret = 0;
- char warntype[MAXQUOTAS];
- int warn = flags & DQUOT_SPACE_WARN;
+ struct dquot_warn warn[MAXQUOTAS];
+ struct dquot **dquots = inode->i_dquot;
int reserve = flags & DQUOT_SPACE_RESERVE;
- int nofail = flags & DQUOT_SPACE_NOFAIL;
/*
* First test before acquiring mutex - solves deadlocks when we
@@ -1559,36 +1574,36 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- warntype[cnt] = QUOTA_NL_NOWARN;
+ warn[cnt].w_type = QUOTA_NL_NOWARN;
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquots[cnt])
continue;
- ret = check_bdq(inode->i_dquot[cnt], number, !warn,
- warntype+cnt);
- if (ret && !nofail) {
+ ret = check_bdq(dquots[cnt], number,
+ !(flags & DQUOT_SPACE_WARN), &warn[cnt]);
+ if (ret && !(flags & DQUOT_SPACE_NOFAIL)) {
spin_unlock(&dq_data_lock);
goto out_flush_warn;
}
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquots[cnt])
continue;
if (reserve)
- dquot_resv_space(inode->i_dquot[cnt], number);
+ dquot_resv_space(dquots[cnt], number);
else
- dquot_incr_space(inode->i_dquot[cnt], number);
+ dquot_incr_space(dquots[cnt], number);
}
inode_incr_space(inode, number, reserve);
spin_unlock(&dq_data_lock);
if (reserve)
goto out_flush_warn;
- mark_all_dquot_dirty(inode->i_dquot);
+ mark_all_dquot_dirty(dquots);
out_flush_warn:
- flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ flush_warnings(warn);
out:
return ret;
}
@@ -1600,36 +1615,37 @@ EXPORT_SYMBOL(__dquot_alloc_space);
int dquot_alloc_inode(const struct inode *inode)
{
int cnt, ret = 0;
- char warntype[MAXQUOTAS];
+ struct dquot_warn warn[MAXQUOTAS];
+ struct dquot * const *dquots = inode->i_dquot;
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (!dquot_active(inode))
return 0;
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- warntype[cnt] = QUOTA_NL_NOWARN;
+ warn[cnt].w_type = QUOTA_NL_NOWARN;
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquots[cnt])
continue;
- ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt);
+ ret = check_idq(dquots[cnt], 1, &warn[cnt]);
if (ret)
goto warn_put_all;
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquots[cnt])
continue;
- dquot_incr_inodes(inode->i_dquot[cnt], 1);
+ dquot_incr_inodes(dquots[cnt], 1);
}
warn_put_all:
spin_unlock(&dq_data_lock);
if (ret == 0)
- mark_all_dquot_dirty(inode->i_dquot);
- flush_warnings(inode->i_dquot, warntype);
+ mark_all_dquot_dirty(dquots);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ flush_warnings(warn);
return ret;
}
EXPORT_SYMBOL(dquot_alloc_inode);
@@ -1669,7 +1685,8 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
{
unsigned int cnt;
- char warntype[MAXQUOTAS];
+ struct dquot_warn warn[MAXQUOTAS];
+ struct dquot **dquots = inode->i_dquot;
int reserve = flags & DQUOT_SPACE_RESERVE;
/* First test before acquiring mutex - solves deadlocks when we
@@ -1682,23 +1699,28 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ int wtype;
+
+ warn[cnt].w_type = QUOTA_NL_NOWARN;
+ if (!dquots[cnt])
continue;
- warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
+ wtype = info_bdq_free(dquots[cnt], number);
+ if (wtype != QUOTA_NL_NOWARN)
+ prepare_warning(&warn[cnt], dquots[cnt], wtype);
if (reserve)
- dquot_free_reserved_space(inode->i_dquot[cnt], number);
+ dquot_free_reserved_space(dquots[cnt], number);
else
- dquot_decr_space(inode->i_dquot[cnt], number);
+ dquot_decr_space(dquots[cnt], number);
}
inode_decr_space(inode, number, reserve);
spin_unlock(&dq_data_lock);
if (reserve)
goto out_unlock;
- mark_all_dquot_dirty(inode->i_dquot);
+ mark_all_dquot_dirty(dquots);
out_unlock:
- flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ flush_warnings(warn);
}
EXPORT_SYMBOL(__dquot_free_space);
@@ -1708,7 +1730,8 @@ EXPORT_SYMBOL(__dquot_free_space);
void dquot_free_inode(const struct inode *inode)
{
unsigned int cnt;
- char warntype[MAXQUOTAS];
+ struct dquot_warn warn[MAXQUOTAS];
+ struct dquot * const *dquots = inode->i_dquot;
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
@@ -1718,15 +1741,20 @@ void dquot_free_inode(const struct inode *inode)
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ int wtype;
+
+ warn[cnt].w_type = QUOTA_NL_NOWARN;
+ if (!dquots[cnt])
continue;
- warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1);
- dquot_decr_inodes(inode->i_dquot[cnt], 1);
+ wtype = info_idq_free(dquots[cnt], 1);
+ if (wtype != QUOTA_NL_NOWARN)
+ prepare_warning(&warn[cnt], dquots[cnt], wtype);
+ dquot_decr_inodes(dquots[cnt], 1);
}
spin_unlock(&dq_data_lock);
- mark_all_dquot_dirty(inode->i_dquot);
- flush_warnings(inode->i_dquot, warntype);
+ mark_all_dquot_dirty(dquots);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ flush_warnings(warn);
}
EXPORT_SYMBOL(dquot_free_inode);
@@ -1747,16 +1775,20 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
struct dquot *transfer_from[MAXQUOTAS] = {};
int cnt, ret = 0;
char is_valid[MAXQUOTAS] = {};
- char warntype_to[MAXQUOTAS];
- char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
+ struct dquot_warn warn_to[MAXQUOTAS];
+ struct dquot_warn warn_from_inodes[MAXQUOTAS];
+ struct dquot_warn warn_from_space[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode))
return 0;
/* Initialize the arrays */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- warntype_to[cnt] = QUOTA_NL_NOWARN;
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ warn_to[cnt].w_type = QUOTA_NL_NOWARN;
+ warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN;
+ warn_from_space[cnt].w_type = QUOTA_NL_NOWARN;
+ }
down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1778,10 +1810,10 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
continue;
is_valid[cnt] = 1;
transfer_from[cnt] = inode->i_dquot[cnt];
- ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
+ ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]);
if (ret)
goto over_quota;
- ret = check_bdq(transfer_to[cnt], space, 0, warntype_to + cnt);
+ ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]);
if (ret)
goto over_quota;
}
@@ -1794,10 +1826,15 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
continue;
/* Due to IO error we might not have transfer_from[] structure */
if (transfer_from[cnt]) {
- warntype_from_inodes[cnt] =
- info_idq_free(transfer_from[cnt], 1);
- warntype_from_space[cnt] =
- info_bdq_free(transfer_from[cnt], space);
+ int wtype;
+ wtype = info_idq_free(transfer_from[cnt], 1);
+ if (wtype != QUOTA_NL_NOWARN)
+ prepare_warning(&warn_from_inodes[cnt],
+ transfer_from[cnt], wtype);
+ wtype = info_bdq_free(transfer_from[cnt], space);
+ if (wtype != QUOTA_NL_NOWARN)
+ prepare_warning(&warn_from_space[cnt],
+ transfer_from[cnt], wtype);
dquot_decr_inodes(transfer_from[cnt], 1);
dquot_decr_space(transfer_from[cnt], cur_space);
dquot_free_reserved_space(transfer_from[cnt],
@@ -1815,9 +1852,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
mark_all_dquot_dirty(transfer_from);
mark_all_dquot_dirty(transfer_to);
- flush_warnings(transfer_to, warntype_to);
- flush_warnings(transfer_from, warntype_from_inodes);
- flush_warnings(transfer_from, warntype_from_space);
+ flush_warnings(warn_to);
+ flush_warnings(warn_from_inodes);
+ flush_warnings(warn_from_space);
/* Pass back references to put */
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (is_valid[cnt])
@@ -1826,7 +1863,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
over_quota:
spin_unlock(&dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
- flush_warnings(transfer_to, warntype_to);
+ flush_warnings(warn_to);
return ret;
}
EXPORT_SYMBOL(__dquot_transfer);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index cf9f4de00a95..b1a08573fe14 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -51,7 +51,6 @@
#include <linux/uaccess.h>
#include <linux/slab.h>
-#include <asm/system.h>
/* gets a struct reiserfs_journal_list * from a list head */
#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
diff --git a/fs/select.c b/fs/select.c
index 6fb8943d580b..17d33d09fc16 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -348,7 +348,7 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
set = ~(~0UL << (n & (__NFDBITS-1)));
n /= __NFDBITS;
fdt = files_fdtable(current->files);
- open_fds = fdt->open_fds->fds_bits+n;
+ open_fds = fdt->open_fds + n;
max = 0;
if (set) {
set &= BITS(fds, n);
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index ed0eb2a921f4..fb50652e4e11 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -83,7 +83,8 @@ static struct buffer_head *get_block_length(struct super_block *sb,
* filesystem), otherwise the length is obtained from the first two bytes of
* the metadata block. A bit in the length field indicates if the block
* is stored uncompressed in the filesystem (usually because compression
- * generated a larger block - this does occasionally happen with zlib).
+ * generated a larger block - this does occasionally happen with compression
+ * algorithms).
*/
int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
int length, u64 *next_index, int srclength, int pages)
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index 9dfe2ce0fb70..b381305c9a47 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -64,7 +64,7 @@ static int get_dir_index_using_offset(struct super_block *sb,
* is offset by 3 because we invent "." and ".." entries which are
* not actually stored in the directory.
*/
- if (f_pos < 3)
+ if (f_pos <= 3)
return f_pos;
f_pos -= 3;
@@ -105,7 +105,7 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
struct inode *inode = file->f_dentry->d_inode;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
u64 block = squashfs_i(inode)->start + msblk->directory_table;
- int offset = squashfs_i(inode)->offset, length = 0, dir_count, size,
+ int offset = squashfs_i(inode)->offset, length, dir_count, size,
type, err;
unsigned int inode_number;
struct squashfs_dir_header dirh;
@@ -173,8 +173,7 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
dir_count = le32_to_cpu(dirh.count) + 1;
- /* dir_count should never be larger than 256 */
- if (dir_count > 256)
+ if (dir_count > SQUASHFS_DIR_COUNT)
goto failed_read;
while (dir_count--) {
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 0682b38d7e31..abcc58f3c152 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -144,7 +144,7 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
struct squashfs_dir_entry *dire;
u64 block = squashfs_i(dir)->start + msblk->directory_table;
int offset = squashfs_i(dir)->offset;
- int err, length = 0, dir_count, size;
+ int err, length, dir_count, size;
TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset);
@@ -177,8 +177,7 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
dir_count = le32_to_cpu(dirh.count) + 1;
- /* dir_count should never be larger than 256 */
- if (dir_count > 256)
+ if (dir_count > SQUASHFS_DIR_COUNT)
goto data_error;
while (dir_count--) {
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index e8e14645de9a..9e2349d07cb1 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -30,11 +30,6 @@
/* size of metadata (inode and directory) blocks */
#define SQUASHFS_METADATA_SIZE 8192
-#define SQUASHFS_METADATA_LOG 13
-
-/* default size of data blocks */
-#define SQUASHFS_FILE_SIZE 131072
-#define SQUASHFS_FILE_LOG 17
/* default size of block device I/O */
#ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE
@@ -46,12 +41,12 @@
#define SQUASHFS_FILE_MAX_SIZE 1048576
#define SQUASHFS_FILE_MAX_LOG 20
-/* Max number of uids and gids */
-#define SQUASHFS_IDS 65536
-
/* Max length of filename (not 255) */
#define SQUASHFS_NAME_LEN 256
+/* Max value for directory header count*/
+#define SQUASHFS_DIR_COUNT 256
+
#define SQUASHFS_INVALID_FRAG (0xffffffffU)
#define SQUASHFS_INVALID_XATTR (0xffffffffU)
#define SQUASHFS_INVALID_BLK (-1LL)
@@ -142,9 +137,6 @@
#define SQUASHFS_MKINODE(A, B) ((long long)(((long long) (A)\
<< 16) + (B)))
-/* Translate between VFS mode and squashfs mode */
-#define SQUASHFS_MODE(A) ((A) & 0xfff)
-
/* fragment and fragment table defines */
#define SQUASHFS_FRAGMENT_BYTES(A) \
((A) * sizeof(struct squashfs_fragment_entry))
@@ -215,11 +207,6 @@
/* cached data constants for filesystem */
#define SQUASHFS_CACHED_BLKS 8
-#define SQUASHFS_MAX_FILE_SIZE_LOG 64
-
-#define SQUASHFS_MAX_FILE_SIZE (1LL << \
- (SQUASHFS_MAX_FILE_SIZE_LOG - 2))
-
/* meta index cache */
#define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int))
#define SQUASHFS_META_ENTRIES 127
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 970b1167e7cb..29cd014ed3a1 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -158,10 +158,15 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
+ /* Check block log for sanity */
msblk->block_log = le16_to_cpu(sblk->block_log);
if (msblk->block_log > SQUASHFS_FILE_MAX_LOG)
goto failed_mount;
+ /* Check that block_size and block_log match */
+ if (msblk->block_size != (1 << msblk->block_log))
+ goto failed_mount;
+
/* Check the root inode for sanity */
root_inode = le64_to_cpu(sblk->root_inode);
if (SQUASHFS_INODE_OFFSET(root_inode) > SQUASHFS_METADATA_SIZE)
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 987585bb0a1d..1ba2baaf4367 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -105,7 +105,6 @@ static void udf_add_free_space(struct super_block *sb, u16 partition, u32 cnt)
}
static void udf_bitmap_free_blocks(struct super_block *sb,
- struct inode *inode,
struct udf_bitmap *bitmap,
struct kernel_lb_addr *bloc,
uint32_t offset,
@@ -172,7 +171,6 @@ error_return:
}
static int udf_bitmap_prealloc_blocks(struct super_block *sb,
- struct inode *inode,
struct udf_bitmap *bitmap,
uint16_t partition, uint32_t first_block,
uint32_t block_count)
@@ -223,7 +221,6 @@ out:
}
static int udf_bitmap_new_block(struct super_block *sb,
- struct inode *inode,
struct udf_bitmap *bitmap, uint16_t partition,
uint32_t goal, int *err)
{
@@ -349,7 +346,6 @@ error_return:
}
static void udf_table_free_blocks(struct super_block *sb,
- struct inode *inode,
struct inode *table,
struct kernel_lb_addr *bloc,
uint32_t offset,
@@ -581,7 +577,6 @@ error_return:
}
static int udf_table_prealloc_blocks(struct super_block *sb,
- struct inode *inode,
struct inode *table, uint16_t partition,
uint32_t first_block, uint32_t block_count)
{
@@ -643,7 +638,6 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
}
static int udf_table_new_block(struct super_block *sb,
- struct inode *inode,
struct inode *table, uint16_t partition,
uint32_t goal, int *err)
{
@@ -743,18 +737,23 @@ void udf_free_blocks(struct super_block *sb, struct inode *inode,
struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
- udf_bitmap_free_blocks(sb, inode, map->s_uspace.s_bitmap,
+ udf_bitmap_free_blocks(sb, map->s_uspace.s_bitmap,
bloc, offset, count);
} else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) {
- udf_table_free_blocks(sb, inode, map->s_uspace.s_table,
+ udf_table_free_blocks(sb, map->s_uspace.s_table,
bloc, offset, count);
} else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) {
- udf_bitmap_free_blocks(sb, inode, map->s_fspace.s_bitmap,
+ udf_bitmap_free_blocks(sb, map->s_fspace.s_bitmap,
bloc, offset, count);
} else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) {
- udf_table_free_blocks(sb, inode, map->s_fspace.s_table,
+ udf_table_free_blocks(sb, map->s_fspace.s_table,
bloc, offset, count);
}
+
+ if (inode) {
+ inode_sub_bytes(inode,
+ ((sector_t)count) << sb->s_blocksize_bits);
+ }
}
inline int udf_prealloc_blocks(struct super_block *sb,
@@ -763,29 +762,34 @@ inline int udf_prealloc_blocks(struct super_block *sb,
uint32_t block_count)
{
struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
+ sector_t allocated;
if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
- return udf_bitmap_prealloc_blocks(sb, inode,
- map->s_uspace.s_bitmap,
- partition, first_block,
- block_count);
+ allocated = udf_bitmap_prealloc_blocks(sb,
+ map->s_uspace.s_bitmap,
+ partition, first_block,
+ block_count);
else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
- return udf_table_prealloc_blocks(sb, inode,
- map->s_uspace.s_table,
- partition, first_block,
- block_count);
+ allocated = udf_table_prealloc_blocks(sb,
+ map->s_uspace.s_table,
+ partition, first_block,
+ block_count);
else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
- return udf_bitmap_prealloc_blocks(sb, inode,
- map->s_fspace.s_bitmap,
- partition, first_block,
- block_count);
+ allocated = udf_bitmap_prealloc_blocks(sb,
+ map->s_fspace.s_bitmap,
+ partition, first_block,
+ block_count);
else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
- return udf_table_prealloc_blocks(sb, inode,
- map->s_fspace.s_table,
- partition, first_block,
- block_count);
+ allocated = udf_table_prealloc_blocks(sb,
+ map->s_fspace.s_table,
+ partition, first_block,
+ block_count);
else
return 0;
+
+ if (inode && allocated > 0)
+ inode_add_bytes(inode, allocated << sb->s_blocksize_bits);
+ return allocated;
}
inline int udf_new_block(struct super_block *sb,
@@ -793,25 +797,29 @@ inline int udf_new_block(struct super_block *sb,
uint16_t partition, uint32_t goal, int *err)
{
struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
+ int block;
if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
- return udf_bitmap_new_block(sb, inode,
- map->s_uspace.s_bitmap,
- partition, goal, err);
+ block = udf_bitmap_new_block(sb,
+ map->s_uspace.s_bitmap,
+ partition, goal, err);
else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
- return udf_table_new_block(sb, inode,
- map->s_uspace.s_table,
- partition, goal, err);
- else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
- return udf_bitmap_new_block(sb, inode,
- map->s_fspace.s_bitmap,
+ block = udf_table_new_block(sb,
+ map->s_uspace.s_table,
partition, goal, err);
+ else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
+ block = udf_bitmap_new_block(sb,
+ map->s_fspace.s_bitmap,
+ partition, goal, err);
else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
- return udf_table_new_block(sb, inode,
- map->s_fspace.s_table,
- partition, goal, err);
+ block = udf_table_new_block(sb,
+ map->s_fspace.s_table,
+ partition, goal, err);
else {
*err = -EIO;
return 0;
}
+ if (inode && block)
+ inode_add_bytes(inode, sb->s_blocksize);
+ return block;
}
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 05ab48195be9..7e5aae4bf46f 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -116,6 +116,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
iinfo->i_lenEAttr = 0;
iinfo->i_lenAlloc = 0;
iinfo->i_use = 0;
+ iinfo->i_checkpoint = 1;
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB))
iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7699df7b3198..7d7528008359 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1358,6 +1358,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
iinfo->i_unique = le64_to_cpu(fe->uniqueID);
iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs);
+ iinfo->i_checkpoint = le32_to_cpu(fe->checkpoint);
offset = sizeof(struct fileEntry) + iinfo->i_lenEAttr;
} else {
inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
@@ -1379,6 +1380,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
iinfo->i_unique = le64_to_cpu(efe->uniqueID);
iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs);
+ iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint);
offset = sizeof(struct extendedFileEntry) +
iinfo->i_lenEAttr;
}
@@ -1495,6 +1497,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
struct buffer_head *bh = NULL;
struct fileEntry *fe;
struct extendedFileEntry *efe;
+ uint64_t lb_recorded;
uint32_t udfperms;
uint16_t icbflags;
uint16_t crclen;
@@ -1589,13 +1592,18 @@ static int udf_update_inode(struct inode *inode, int do_sync)
dsea->minorDeviceIdent = cpu_to_le32(iminor(inode));
}
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
+ lb_recorded = 0; /* No extents => no blocks! */
+ else
+ lb_recorded =
+ (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >>
+ (blocksize_bits - 9);
+
if (iinfo->i_efe == 0) {
memcpy(bh->b_data + sizeof(struct fileEntry),
iinfo->i_ext.i_data,
inode->i_sb->s_blocksize - sizeof(struct fileEntry));
- fe->logicalBlocksRecorded = cpu_to_le64(
- (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >>
- (blocksize_bits - 9));
+ fe->logicalBlocksRecorded = cpu_to_le64(lb_recorded);
udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime);
udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime);
@@ -1607,6 +1615,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
fe->uniqueID = cpu_to_le64(iinfo->i_unique);
fe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr);
fe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc);
+ fe->checkpoint = cpu_to_le32(iinfo->i_checkpoint);
fe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_FE);
crclen = sizeof(struct fileEntry);
} else {
@@ -1615,9 +1624,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
inode->i_sb->s_blocksize -
sizeof(struct extendedFileEntry));
efe->objectSize = cpu_to_le64(inode->i_size);
- efe->logicalBlocksRecorded = cpu_to_le64(
- (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >>
- (blocksize_bits - 9));
+ efe->logicalBlocksRecorded = cpu_to_le64(lb_recorded);
if (iinfo->i_crtime.tv_sec > inode->i_atime.tv_sec ||
(iinfo->i_crtime.tv_sec == inode->i_atime.tv_sec &&
@@ -1646,6 +1653,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
efe->uniqueID = cpu_to_le64(iinfo->i_unique);
efe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr);
efe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc);
+ efe->checkpoint = cpu_to_le32(iinfo->i_checkpoint);
efe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_EFE);
crclen = sizeof(struct extendedFileEntry);
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 85067b4c7e14..ac8a348dcb69 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -950,11 +950,8 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
else
bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
- if (bitmap == NULL) {
- udf_err(sb, "Unable to allocate space for bitmap and %d buffer_head pointers\n",
- nr_groups);
+ if (bitmap == NULL)
return NULL;
- }
bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1);
bitmap->s_nr_groups = nr_groups;
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index d1bd31ea724e..bb8309dcd5c1 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -23,6 +23,7 @@ struct udf_inode_info {
__u64 i_lenExtents;
__u32 i_next_alloc_block;
__u32 i_next_alloc_goal;
+ __u32 i_checkpoint;
unsigned i_alloc_type : 3;
unsigned i_efe : 1; /* extendedFileEntry */
unsigned i_use : 1; /* unallocSpaceEntry */
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 9094e1d917be..7cdd3953d67e 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -26,7 +26,6 @@
*/
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <linux/errno.h>
#include <linux/fs.h>
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index f636f6b460d0..ac8e279eccc6 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -73,7 +73,6 @@
#include <stdarg.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <linux/errno.h>
#include <linux/fs.h>
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index ce84ffd0264c..0f0df2759b09 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -35,6 +35,7 @@
#include "xfs_error.h"
#include "xfs_trace.h"
+struct workqueue_struct *xfs_alloc_wq;
#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
@@ -68,7 +69,7 @@ xfs_alloc_lookup_eq(
* Lookup the first record greater than or equal to [bno, len]
* in the btree given by cur.
*/
-STATIC int /* error */
+int /* error */
xfs_alloc_lookup_ge(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_agblock_t bno, /* starting block of extent */
@@ -2207,7 +2208,7 @@ xfs_alloc_read_agf(
* group or loop over the allocation groups to find the result.
*/
int /* error */
-xfs_alloc_vextent(
+__xfs_alloc_vextent(
xfs_alloc_arg_t *args) /* allocation argument structure */
{
xfs_agblock_t agsize; /* allocation group size */
@@ -2417,6 +2418,37 @@ error0:
return error;
}
+static void
+xfs_alloc_vextent_worker(
+ struct work_struct *work)
+{
+ struct xfs_alloc_arg *args = container_of(work,
+ struct xfs_alloc_arg, work);
+ unsigned long pflags;
+
+ /* we are in a transaction context here */
+ current_set_flags_nested(&pflags, PF_FSTRANS);
+
+ args->result = __xfs_alloc_vextent(args);
+ complete(args->done);
+
+ current_restore_flags_nested(&pflags, PF_FSTRANS);
+}
+
+
+int /* error */
+xfs_alloc_vextent(
+ xfs_alloc_arg_t *args) /* allocation argument structure */
+{
+ DECLARE_COMPLETION_ONSTACK(done);
+
+ args->done = &done;
+ INIT_WORK(&args->work, xfs_alloc_vextent_worker);
+ queue_work(xfs_alloc_wq, &args->work);
+ wait_for_completion(&done);
+ return args->result;
+}
+
/*
* Free an extent.
* Just break up the extent address and hand off to xfs_free_ag_extent
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 2f52b924be79..3a7e7d8f8ded 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -25,6 +25,8 @@ struct xfs_perag;
struct xfs_trans;
struct xfs_busy_extent;
+extern struct workqueue_struct *xfs_alloc_wq;
+
/*
* Freespace allocation types. Argument to xfs_alloc_[v]extent.
*/
@@ -119,6 +121,9 @@ typedef struct xfs_alloc_arg {
char isfl; /* set if is freelist blocks - !acctg */
char userdata; /* set if this is user data */
xfs_fsblock_t firstblock; /* io first block allocated */
+ struct completion *done;
+ struct work_struct work;
+ int result;
} xfs_alloc_arg_t;
/*
@@ -243,6 +248,13 @@ xfs_alloc_lookup_le(
xfs_extlen_t len, /* length of extent */
int *stat); /* success/failure */
+int /* error */
+xfs_alloc_lookup_ge(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat); /* success/failure */
+
int /* error */
xfs_alloc_get_rec(
struct xfs_btree_cur *cur, /* btree cursor */
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 08b9ac644c31..65d61b948ead 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -853,6 +853,8 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
{
int newsize, forkoff, retval;
+ trace_xfs_attr_sf_addname(args);
+
retval = xfs_attr_shortform_lookup(args);
if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
return(retval);
@@ -896,6 +898,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
xfs_dabuf_t *bp;
int retval, error, committed, forkoff;
+ trace_xfs_attr_leaf_addname(args);
+
/*
* Read the (only) block in the attribute list in.
*/
@@ -920,6 +924,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
xfs_da_brelse(args->trans, bp);
return(retval);
}
+
+ trace_xfs_attr_leaf_replace(args);
+
args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
args->blkno2 = args->blkno; /* set 2nd entry info*/
args->index2 = args->index;
@@ -1090,6 +1097,8 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
xfs_dabuf_t *bp;
int error, committed, forkoff;
+ trace_xfs_attr_leaf_removename(args);
+
/*
* Remove the attribute.
*/
@@ -1223,6 +1232,8 @@ xfs_attr_node_addname(xfs_da_args_t *args)
xfs_mount_t *mp;
int committed, retval, error;
+ trace_xfs_attr_node_addname(args);
+
/*
* Fill in bucket of arguments/results/context to carry around.
*/
@@ -1249,6 +1260,9 @@ restart:
} else if (retval == EEXIST) {
if (args->flags & ATTR_CREATE)
goto out;
+
+ trace_xfs_attr_node_replace(args);
+
args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
args->blkno2 = args->blkno; /* set 2nd entry info*/
args->index2 = args->index;
@@ -1480,6 +1494,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
xfs_dabuf_t *bp;
int retval, error, committed, forkoff;
+ trace_xfs_attr_node_removename(args);
+
/*
* Tie a string around our finger to remind us where we are.
*/
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d25eafd4d28d..76d93dc953e1 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -235,6 +235,8 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
xfs_inode_t *dp;
xfs_ifork_t *ifp;
+ trace_xfs_attr_sf_create(args);
+
dp = args->dp;
ASSERT(dp != NULL);
ifp = dp->i_afp;
@@ -268,6 +270,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
xfs_inode_t *dp;
xfs_ifork_t *ifp;
+ trace_xfs_attr_sf_add(args);
+
dp = args->dp;
mp = dp->i_mount;
dp->i_d.di_forkoff = forkoff;
@@ -337,6 +341,8 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
xfs_mount_t *mp;
xfs_inode_t *dp;
+ trace_xfs_attr_sf_remove(args);
+
dp = args->dp;
mp = dp->i_mount;
base = sizeof(xfs_attr_sf_hdr_t);
@@ -405,6 +411,8 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
int i;
xfs_ifork_t *ifp;
+ trace_xfs_attr_sf_lookup(args);
+
ifp = args->dp->i_afp;
ASSERT(ifp->if_flags & XFS_IFINLINE);
sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
@@ -476,6 +484,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
xfs_dabuf_t *bp;
xfs_ifork_t *ifp;
+ trace_xfs_attr_sf_to_leaf(args);
+
dp = args->dp;
ifp = dp->i_afp;
sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
@@ -775,6 +785,8 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
char *tmpbuffer;
int error, i;
+ trace_xfs_attr_leaf_to_sf(args);
+
dp = args->dp;
tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
ASSERT(tmpbuffer != NULL);
@@ -848,6 +860,8 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
xfs_dablk_t blkno;
int error;
+ trace_xfs_attr_leaf_to_node(args);
+
dp = args->dp;
bp1 = bp2 = NULL;
error = xfs_da_grow_inode(args, &blkno);
@@ -911,6 +925,8 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
xfs_dabuf_t *bp;
int error;
+ trace_xfs_attr_leaf_create(args);
+
dp = args->dp;
ASSERT(dp != NULL);
error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp,
@@ -948,6 +964,8 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
xfs_dablk_t blkno;
int error;
+ trace_xfs_attr_leaf_split(state->args);
+
/*
* Allocate space for a new leaf node.
*/
@@ -977,10 +995,13 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
*
* Insert the "new" entry in the correct block.
*/
- if (state->inleaf)
+ if (state->inleaf) {
+ trace_xfs_attr_leaf_add_old(state->args);
error = xfs_attr_leaf_add(oldblk->bp, state->args);
- else
+ } else {
+ trace_xfs_attr_leaf_add_new(state->args);
error = xfs_attr_leaf_add(newblk->bp, state->args);
+ }
/*
* Update last hashval in each block since we added the name.
@@ -1001,6 +1022,8 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
xfs_attr_leaf_map_t *map;
int tablesize, entsize, sum, tmp, i;
+ trace_xfs_attr_leaf_add(args);
+
leaf = bp->data;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT((args->index >= 0)
@@ -1128,8 +1151,6 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
(be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval)));
/*
- * Copy the attribute name and value into the new space.
- *
* For "remote" attribute values, simply note that we need to
* allocate space for the "remote" value. We can't actually
* allocate the extents in this transaction, and we can't decide
@@ -1265,6 +1286,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
args = state->args;
+ trace_xfs_attr_leaf_rebalance(args);
+
/*
* Check ordering of blocks, reverse if it makes things simpler.
*
@@ -1810,6 +1833,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
xfs_mount_t *mp;
char *tmpbuffer;
+ trace_xfs_attr_leaf_unbalance(state->args);
+
/*
* Set up environment.
*/
@@ -1919,6 +1944,8 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
int probe, span;
xfs_dahash_t hashval;
+ trace_xfs_attr_leaf_lookup(args);
+
leaf = bp->data;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(be16_to_cpu(leaf->hdr.count)
@@ -2445,6 +2472,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
char *name;
#endif /* DEBUG */
+ trace_xfs_attr_leaf_clearflag(args);
/*
* Set up the operation.
*/
@@ -2509,6 +2537,8 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
xfs_dabuf_t *bp;
int error;
+ trace_xfs_attr_leaf_setflag(args);
+
/*
* Set up the operation.
*/
@@ -2565,6 +2595,8 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
char *name1, *name2;
#endif /* DEBUG */
+ trace_xfs_attr_leaf_flipflags(args);
+
/*
* Read the block containing the "old" attr
*/
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3548c6f75593..85e7e327bcd8 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5124,6 +5124,15 @@ xfs_bunmapi(
cur->bc_private.b.flags = 0;
} else
cur = NULL;
+
+ if (isrt) {
+ /*
+ * Synchronize by locking the bitmap inode.
+ */
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+ }
+
extno = 0;
while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
(nexts == 0 || extno < nexts)) {
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index df7ffb0affe7..5bf3be45f543 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -21,7 +21,6 @@
#include <linux/list.h>
#include <linux/types.h>
#include <linux/spinlock.h>
-#include <asm/system.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 77c74257c2a3..7f1a6f5b05a6 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -108,6 +108,8 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
int error;
xfs_trans_t *tp;
+ trace_xfs_da_node_create(args);
+
tp = args->trans;
error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
if (error)
@@ -140,6 +142,8 @@ xfs_da_split(xfs_da_state_t *state)
xfs_dabuf_t *bp;
int max, action, error, i;
+ trace_xfs_da_split(state->args);
+
/*
* Walk back up the tree splitting/inserting/adjusting as necessary.
* If we need to insert and there isn't room, split the node, then
@@ -178,10 +182,12 @@ xfs_da_split(xfs_da_state_t *state)
state->extravalid = 1;
if (state->inleaf) {
state->extraafter = 0; /* before newblk */
+ trace_xfs_attr_leaf_split_before(state->args);
error = xfs_attr_leaf_split(state, oldblk,
&state->extrablk);
} else {
state->extraafter = 1; /* after newblk */
+ trace_xfs_attr_leaf_split_after(state->args);
error = xfs_attr_leaf_split(state, newblk,
&state->extrablk);
}
@@ -300,6 +306,8 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
xfs_mount_t *mp;
xfs_dir2_leaf_t *leaf;
+ trace_xfs_da_root_split(state->args);
+
/*
* Copy the existing (incorrect) block from the root node position
* to a free space somewhere.
@@ -380,6 +388,8 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
int newcount, error;
int useextra;
+ trace_xfs_da_node_split(state->args);
+
node = oldblk->bp->data;
ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
@@ -466,6 +476,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
int count, tmp;
xfs_trans_t *tp;
+ trace_xfs_da_node_rebalance(state->args);
+
node1 = blk1->bp->data;
node2 = blk2->bp->data;
/*
@@ -574,6 +586,8 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
xfs_da_node_entry_t *btree;
int tmp;
+ trace_xfs_da_node_add(state->args);
+
node = oldblk->bp->data;
ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
@@ -619,6 +633,8 @@ xfs_da_join(xfs_da_state_t *state)
xfs_da_state_blk_t *drop_blk, *save_blk;
int action, error;
+ trace_xfs_da_join(state->args);
+
action = 0;
drop_blk = &state->path.blk[ state->path.active-1 ];
save_blk = &state->altpath.blk[ state->path.active-1 ];
@@ -723,6 +739,8 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
xfs_dabuf_t *bp;
int error;
+ trace_xfs_da_root_join(state->args);
+
args = state->args;
ASSERT(args != NULL);
ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
@@ -941,6 +959,8 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
xfs_da_node_entry_t *btree;
int tmp;
+ trace_xfs_da_node_remove(state->args);
+
node = drop_blk->bp->data;
ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
ASSERT(drop_blk->index >= 0);
@@ -984,6 +1004,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
int tmp;
xfs_trans_t *tp;
+ trace_xfs_da_node_unbalance(state->args);
+
drop_node = drop_blk->bp->data;
save_node = save_blk->bp->data;
ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
@@ -1230,6 +1252,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
/*
* Link new block in before existing block.
*/
+ trace_xfs_da_link_before(args);
new_info->forw = cpu_to_be32(old_blk->blkno);
new_info->back = old_info->back;
if (old_info->back) {
@@ -1251,6 +1274,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
/*
* Link new block in after existing block.
*/
+ trace_xfs_da_link_after(args);
new_info->forw = old_info->forw;
new_info->back = cpu_to_be32(old_blk->blkno);
if (old_info->forw) {
@@ -1348,6 +1372,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
* Unlink the leaf block from the doubly linked chain of leaves.
*/
if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
+ trace_xfs_da_unlink_back(args);
save_info->back = drop_info->back;
if (drop_info->back) {
error = xfs_da_read_buf(args->trans, args->dp,
@@ -1365,6 +1390,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
xfs_da_buf_done(bp);
}
} else {
+ trace_xfs_da_unlink_forward(args);
save_info->forw = drop_info->forw;
if (drop_info->forw) {
error = xfs_da_read_buf(args->trans, args->dp,
@@ -1652,6 +1678,8 @@ xfs_da_grow_inode(
int count;
int error;
+ trace_xfs_da_grow_inode(args);
+
if (args->whichfork == XFS_DATA_FORK) {
bno = args->dp->i_mount->m_dirleafblk;
count = args->dp->i_mount->m_dirblkfsbs;
@@ -1690,6 +1718,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
xfs_dir2_leaf_t *dead_leaf2;
xfs_dahash_t dead_hash;
+ trace_xfs_da_swap_lastblock(args);
+
dead_buf = *dead_bufp;
dead_blkno = *dead_blknop;
tp = args->trans;
@@ -1878,6 +1908,8 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
xfs_trans_t *tp;
xfs_mount_t *mp;
+ trace_xfs_da_shrink_inode(args);
+
dp = args->dp;
w = args->whichfork;
tp = args->trans;
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 286a051f12cf..1ad3a4b8ca40 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -37,9 +37,9 @@ STATIC int
xfs_trim_extents(
struct xfs_mount *mp,
xfs_agnumber_t agno,
- xfs_fsblock_t start,
- xfs_fsblock_t end,
- xfs_fsblock_t minlen,
+ xfs_daddr_t start,
+ xfs_daddr_t end,
+ xfs_daddr_t minlen,
__uint64_t *blocks_trimmed)
{
struct block_device *bdev = mp->m_ddev_targp->bt_bdev;
@@ -67,7 +67,7 @@ xfs_trim_extents(
/*
* Look up the longest btree in the AGF and start with it.
*/
- error = xfs_alloc_lookup_le(cur, 0,
+ error = xfs_alloc_lookup_ge(cur, 0,
be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i);
if (error)
goto out_del_cursor;
@@ -77,8 +77,10 @@ xfs_trim_extents(
* enough to be worth discarding.
*/
while (i) {
- xfs_agblock_t fbno;
- xfs_extlen_t flen;
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
+ xfs_daddr_t dbno;
+ xfs_extlen_t dlen;
error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
if (error)
@@ -87,9 +89,17 @@ xfs_trim_extents(
ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
/*
+ * use daddr format for all range/len calculations as that is
+ * the format the range/len variables are supplied in by
+ * userspace.
+ */
+ dbno = XFS_AGB_TO_DADDR(mp, agno, fbno);
+ dlen = XFS_FSB_TO_BB(mp, flen);
+
+ /*
* Too small? Give up.
*/
- if (flen < minlen) {
+ if (dlen < minlen) {
trace_xfs_discard_toosmall(mp, agno, fbno, flen);
goto out_del_cursor;
}
@@ -99,8 +109,7 @@ xfs_trim_extents(
* supposed to discard skip it. Do not bother to trim
* down partially overlapping ranges for now.
*/
- if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
- XFS_AGB_TO_FSB(mp, agno, fbno) > end) {
+ if (dbno + dlen < start || dbno > end) {
trace_xfs_discard_exclude(mp, agno, fbno, flen);
goto next_extent;
}
@@ -115,10 +124,7 @@ xfs_trim_extents(
}
trace_xfs_discard_extent(mp, agno, fbno, flen);
- error = -blkdev_issue_discard(bdev,
- XFS_AGB_TO_DADDR(mp, agno, fbno),
- XFS_FSB_TO_BB(mp, flen),
- GFP_NOFS, 0);
+ error = -blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
if (error)
goto out_del_cursor;
*blocks_trimmed += flen;
@@ -137,6 +143,15 @@ out_put_perag:
return error;
}
+/*
+ * trim a range of the filesystem.
+ *
+ * Note: the parameters passed from userspace are byte ranges into the
+ * filesystem which does not match to the format we use for filesystem block
+ * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
+ * is a linear address range. Hence we need to use DADDR based conversions and
+ * comparisons for determining the correct offset and regions to trim.
+ */
int
xfs_ioc_trim(
struct xfs_mount *mp,
@@ -145,7 +160,7 @@ xfs_ioc_trim(
struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
unsigned int granularity = q->limits.discard_granularity;
struct fstrim_range range;
- xfs_fsblock_t start, end, minlen;
+ xfs_daddr_t start, end, minlen;
xfs_agnumber_t start_agno, end_agno, agno;
__uint64_t blocks_trimmed = 0;
int error, last_error = 0;
@@ -159,22 +174,22 @@ xfs_ioc_trim(
/*
* Truncating down the len isn't actually quite correct, but using
- * XFS_B_TO_FSB would mean we trivially get overflows for values
+ * BBTOB would mean we trivially get overflows for values
* of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default
* used by the fstrim application. In the end it really doesn't
* matter as trimming blocks is an advisory interface.
*/
- start = XFS_B_TO_FSBT(mp, range.start);
- end = start + XFS_B_TO_FSBT(mp, range.len) - 1;
- minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
+ start = BTOBB(range.start);
+ end = start + BTOBBT(range.len) - 1;
+ minlen = BTOBB(max_t(u64, granularity, range.minlen));
- if (start >= mp->m_sb.sb_dblocks)
+ if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks)
return -XFS_ERROR(EINVAL);
- if (end > mp->m_sb.sb_dblocks - 1)
- end = mp->m_sb.sb_dblocks - 1;
+ if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
+ end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
- start_agno = XFS_FSB_TO_AGNO(mp, start);
- end_agno = XFS_FSB_TO_AGNO(mp, end);
+ start_agno = xfs_daddr_to_agno(mp, start);
+ end_agno = xfs_daddr_to_agno(mp, end);
for (agno = start_agno; agno <= end_agno; agno++) {
error = -xfs_trim_extents(mp, agno, start, end, minlen,
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 4be16a0cbe5a..1155208fa830 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1065,7 +1065,7 @@ out:
return -ENOMEM;
}
-void __exit
+void
xfs_qm_exit(void)
{
kmem_zone_destroy(xfs_qm_dqtrxzone);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index a98cb4524e6c..bcc6c249b2c7 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -289,7 +289,7 @@ xfs_iget_cache_hit(
if (lock_flags != 0)
xfs_ilock(ip, lock_flags);
- xfs_iflags_clear(ip, XFS_ISTALE);
+ xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
XFS_STATS_INC(xs_ig_found);
return 0;
@@ -314,6 +314,7 @@ xfs_iget_cache_miss(
struct xfs_inode *ip;
int error;
xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
+ int iflags;
ip = xfs_inode_alloc(mp, ino);
if (!ip)
@@ -358,8 +359,11 @@ xfs_iget_cache_miss(
* memory barrier that ensures this detection works correctly at lookup
* time.
*/
+ iflags = XFS_INEW;
+ if (flags & XFS_IGET_DONTCACHE)
+ iflags |= XFS_IDONTCACHE;
ip->i_udquot = ip->i_gdquot = NULL;
- xfs_iflags_set(ip, XFS_INEW);
+ xfs_iflags_set(ip, iflags);
/* insert the new inode */
spin_lock(&pag->pag_ici_lock);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f123dbe6d42a..7fee3387e1c8 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -387,10 +387,11 @@ xfs_set_projid(struct xfs_inode *ip,
#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
#define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
+#define XFS_IDONTCACHE (1 << 9) /* don't cache the inode long term */
/*
* Per-lifetime flags need to be reset when re-using a reclaimable inode during
- * inode lookup. Thi prevents unintended behaviour on the new inode from
+ * inode lookup. This prevents unintended behaviour on the new inode from
* ocurring.
*/
#define XFS_IRECLAIM_RESET_FLAGS \
@@ -553,6 +554,7 @@ do { \
*/
#define XFS_IGET_CREATE 0x1
#define XFS_IGET_UNTRUSTED 0x2
+#define XFS_IGET_DONTCACHE 0x4
int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
xfs_ino_t, struct xfs_dinode **,
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index f588320dc4b9..91f8ff547ab3 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -209,6 +209,7 @@ xfs_open_by_handle(
struct file *filp;
struct inode *inode;
struct dentry *dentry;
+ fmode_t fmode;
if (!capable(CAP_SYS_ADMIN))
return -XFS_ERROR(EPERM);
@@ -228,26 +229,21 @@ xfs_open_by_handle(
hreq->oflags |= O_LARGEFILE;
#endif
- /* Put open permission in namei format. */
permflag = hreq->oflags;
- if ((permflag+1) & O_ACCMODE)
- permflag++;
- if (permflag & O_TRUNC)
- permflag |= 2;
-
+ fmode = OPEN_FMODE(permflag);
if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
- (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
+ (fmode & FMODE_WRITE) && IS_APPEND(inode)) {
error = -XFS_ERROR(EPERM);
goto out_dput;
}
- if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+ if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
error = -XFS_ERROR(EACCES);
goto out_dput;
}
/* Can't write directories. */
- if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
+ if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) {
error = -XFS_ERROR(EISDIR);
goto out_dput;
}
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 9720c54bbed0..acc2bf264dab 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -75,7 +75,8 @@ xfs_bulkstat_one_int(
return XFS_ERROR(ENOMEM);
error = xfs_iget(mp, NULL, ino,
- XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip);
+ (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
+ XFS_ILOCK_SHARED, &ip);
if (error) {
*stat = BULKSTAT_RV_NOTHING;
goto out_free;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 98a9cb5ffd17..6db1fef38bff 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -726,8 +726,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
.lv_iovecp = &reg,
};
- /* remove inited flag */
+ /* remove inited flag, and account for space used */
tic->t_flags = 0;
+ tic->t_curr_res -= sizeof(magic);
error = xlog_write(log, &vec, tic, &lsn,
NULL, XLOG_UNMOUNT_TRANS);
/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7c75c7374d5a..8ecad5bad66c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3161,37 +3161,26 @@ xlog_recover_process_iunlinks(
*/
continue;
}
+ /*
+ * Unlock the buffer so that it can be acquired in the normal
+ * course of the transaction to truncate and free each inode.
+ * Because we are not racing with anyone else here for the AGI
+ * buffer, we don't even need to hold it locked to read the
+ * initial unlinked bucket entries out of the buffer. We keep
+ * buffer reference though, so that it stays pinned in memory
+ * while we need the buffer.
+ */
agi = XFS_BUF_TO_AGI(agibp);
+ xfs_buf_unlock(agibp);
for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
agino = be32_to_cpu(agi->agi_unlinked[bucket]);
while (agino != NULLAGINO) {
- /*
- * Release the agi buffer so that it can
- * be acquired in the normal course of the
- * transaction to truncate and free the inode.
- */
- xfs_buf_relse(agibp);
-
agino = xlog_recover_process_one_iunlink(mp,
agno, agino, bucket);
-
- /*
- * Reacquire the agibuffer and continue around
- * the loop. This should never fail as we know
- * the buffer was good earlier on.
- */
- error = xfs_read_agi(mp, NULL, agno, &agibp);
- ASSERT(error == 0);
- agi = XFS_BUF_TO_AGI(agibp);
}
}
-
- /*
- * Release the buffer for the current agi so we can
- * go on to the next one.
- */
- xfs_buf_relse(agibp);
+ xfs_buf_rele(agibp);
}
mp->m_dmevmask = mp_dmevmask;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 87323f1ded64..ca4f31534a0a 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -183,6 +183,7 @@ error_cancel:
oblocks = map.br_startoff + map.br_blockcount;
}
return 0;
+
error:
return error;
}
@@ -2139,11 +2140,9 @@ xfs_rtfree_extent(
xfs_buf_t *sumbp; /* summary file block buffer */
mp = tp->t_mountp;
- /*
- * Synchronize by locking the bitmap inode.
- */
- xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+
+ ASSERT(mp->m_rbmip->i_itemp != NULL);
+ ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
#if defined(__KERNEL__) && defined(DEBUG)
/*
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 912442cf0f82..dab9a5f6dfd6 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -950,6 +950,22 @@ xfs_fs_evict_inode(
xfs_inactive(ip);
}
+/*
+ * We do an unlocked check for XFS_IDONTCACHE here because we are already
+ * serialised against cache hits here via the inode->i_lock and igrab() in
+ * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
+ * racing with us, and it avoids needing to grab a spinlock here for every inode
+ * we drop the final reference on.
+ */
+STATIC int
+xfs_fs_drop_inode(
+ struct inode *inode)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+
+ return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
+}
+
STATIC void
xfs_free_fsname(
struct xfs_mount *mp)
@@ -1433,6 +1449,7 @@ static const struct super_operations xfs_super_operations = {
.destroy_inode = xfs_fs_destroy_inode,
.dirty_inode = xfs_fs_dirty_inode,
.evict_inode = xfs_fs_evict_inode,
+ .drop_inode = xfs_fs_drop_inode,
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs,
.freeze_fs = xfs_fs_freeze,
@@ -1606,12 +1623,28 @@ xfs_init_workqueues(void)
xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
if (!xfs_syncd_wq)
return -ENOMEM;
+
+ /*
+ * The allocation workqueue can be used in memory reclaim situations
+ * (writepage path), and parallelism is only limited by the number of
+ * AGs in all the filesystems mounted. Hence use the default large
+ * max_active value for this workqueue.
+ */
+ xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0);
+ if (!xfs_alloc_wq)
+ goto out_destroy_syncd;
+
return 0;
+
+out_destroy_syncd:
+ destroy_workqueue(xfs_syncd_wq);
+ return -ENOMEM;
}
STATIC void
xfs_destroy_workqueues(void)
{
+ destroy_workqueue(xfs_alloc_wq);
destroy_workqueue(xfs_syncd_wq);
}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 75eb54af4d58..06838c42b2a0 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -627,16 +627,19 @@ DECLARE_EVENT_CLASS(xfs_namespace_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, dp_ino)
+ __field(int, namelen)
__dynamic_array(char, name, name->len)
),
TP_fast_assign(
__entry->dev = VFS_I(dp)->i_sb->s_dev;
__entry->dp_ino = dp->i_ino;
+ __entry->namelen = name->len;
memcpy(__get_str(name), name->name, name->len);
),
- TP_printk("dev %d:%d dp ino 0x%llx name %s",
+ TP_printk("dev %d:%d dp ino 0x%llx name %.*s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->dp_ino,
+ __entry->namelen,
__get_str(name))
)
@@ -658,6 +661,8 @@ TRACE_EVENT(xfs_rename,
__field(dev_t, dev)
__field(xfs_ino_t, src_dp_ino)
__field(xfs_ino_t, target_dp_ino)
+ __field(int, src_namelen)
+ __field(int, target_namelen)
__dynamic_array(char, src_name, src_name->len)
__dynamic_array(char, target_name, target_name->len)
),
@@ -665,15 +670,20 @@ TRACE_EVENT(xfs_rename,
__entry->dev = VFS_I(src_dp)->i_sb->s_dev;
__entry->src_dp_ino = src_dp->i_ino;
__entry->target_dp_ino = target_dp->i_ino;
+ __entry->src_namelen = src_name->len;
+ __entry->target_namelen = target_name->len;
memcpy(__get_str(src_name), src_name->name, src_name->len);
- memcpy(__get_str(target_name), target_name->name, target_name->len);
+ memcpy(__get_str(target_name), target_name->name,
+ target_name->len);
),
TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
- " src name %s target name %s",
+ " src name %.*s target name %.*s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->src_dp_ino,
__entry->target_dp_ino,
+ __entry->src_namelen,
__get_str(src_name),
+ __entry->target_namelen,
__get_str(target_name))
)
@@ -1408,7 +1418,7 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
-DECLARE_EVENT_CLASS(xfs_dir2_class,
+DECLARE_EVENT_CLASS(xfs_da_class,
TP_PROTO(struct xfs_da_args *args),
TP_ARGS(args),
TP_STRUCT__entry(
@@ -1443,7 +1453,7 @@ DECLARE_EVENT_CLASS(xfs_dir2_class,
)
#define DEFINE_DIR2_EVENT(name) \
-DEFINE_EVENT(xfs_dir2_class, name, \
+DEFINE_EVENT(xfs_da_class, name, \
TP_PROTO(struct xfs_da_args *args), \
TP_ARGS(args))
DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
@@ -1472,6 +1482,64 @@ DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
+#define DEFINE_ATTR_EVENT(name) \
+DEFINE_EVENT(xfs_da_class, name, \
+ TP_PROTO(struct xfs_da_args *args), \
+ TP_ARGS(args))
+DEFINE_ATTR_EVENT(xfs_attr_sf_add);
+DEFINE_ATTR_EVENT(xfs_attr_sf_addname);
+DEFINE_ATTR_EVENT(xfs_attr_sf_create);
+DEFINE_ATTR_EVENT(xfs_attr_sf_lookup);
+DEFINE_ATTR_EVENT(xfs_attr_sf_remove);
+DEFINE_ATTR_EVENT(xfs_attr_sf_removename);
+DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf);
+
+DEFINE_ATTR_EVENT(xfs_attr_leaf_add);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_add_old);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_add_new);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_addname);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_create);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_lookup);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_replace);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_removename);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_split);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_split_before);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_split_after);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_clearflag);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_setflag);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_flipflags);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_to_sf);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_to_node);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_rebalance);
+DEFINE_ATTR_EVENT(xfs_attr_leaf_unbalance);
+
+DEFINE_ATTR_EVENT(xfs_attr_node_addname);
+DEFINE_ATTR_EVENT(xfs_attr_node_lookup);
+DEFINE_ATTR_EVENT(xfs_attr_node_replace);
+DEFINE_ATTR_EVENT(xfs_attr_node_removename);
+
+#define DEFINE_DA_EVENT(name) \
+DEFINE_EVENT(xfs_da_class, name, \
+ TP_PROTO(struct xfs_da_args *args), \
+ TP_ARGS(args))
+DEFINE_DA_EVENT(xfs_da_split);
+DEFINE_DA_EVENT(xfs_da_join);
+DEFINE_DA_EVENT(xfs_da_link_before);
+DEFINE_DA_EVENT(xfs_da_link_after);
+DEFINE_DA_EVENT(xfs_da_unlink_back);
+DEFINE_DA_EVENT(xfs_da_unlink_forward);
+DEFINE_DA_EVENT(xfs_da_root_split);
+DEFINE_DA_EVENT(xfs_da_root_join);
+DEFINE_DA_EVENT(xfs_da_node_add);
+DEFINE_DA_EVENT(xfs_da_node_create);
+DEFINE_DA_EVENT(xfs_da_node_split);
+DEFINE_DA_EVENT(xfs_da_node_remove);
+DEFINE_DA_EVENT(xfs_da_node_rebalance);
+DEFINE_DA_EVENT(xfs_da_node_unbalance);
+DEFINE_DA_EVENT(xfs_da_swap_lastblock);
+DEFINE_DA_EVENT(xfs_da_grow_inode);
+DEFINE_DA_EVENT(xfs_da_shrink_inode);
+
DECLARE_EVENT_CLASS(xfs_dir2_space_class,
TP_PROTO(struct xfs_da_args *args, int idx),
TP_ARGS(args, idx),