diff options
author | David Woodhouse <dwmw2@infradead.org> | 2006-06-26 17:35:44 +0200 |
---|---|---|
committer | David Woodhouse <dwmw2@infradead.org> | 2006-06-26 17:35:44 +0200 |
commit | 62ed948cb1405fe95d61d8c6445c102e0c9da0a6 (patch) | |
tree | f139adcc861a05e7cc09cdb387a271a652fc2d07 /fs | |
parent | [MTD] Initialize 'writesize' (diff) | |
parent | [PATCH] uclinux: use PER_LINUX_32BIT in binfmt_flat (diff) | |
download | linux-62ed948cb1405fe95d61d8c6445c102e0c9da0a6.tar.xz linux-62ed948cb1405fe95d61d8c6445c102e0c9da0a6.zip |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
284 files changed, 9122 insertions, 10785 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 2cb87ba4b1c1..5c6bdf82146c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -530,9 +530,6 @@ error: if (vfid) v9fs_fid_destroy(vfid); - if (inode) - iput(inode); - return err; } @@ -1054,6 +1051,9 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer, int ret; char *link = __getname(); + if (unlikely(!link)) + return -ENOMEM; + if (buflen > PATH_MAX) buflen = PATH_MAX; @@ -1171,9 +1171,6 @@ error: if (vfid) v9fs_fid_destroy(vfid); - if (inode) - iput(inode); - return err; } @@ -1227,6 +1224,9 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, } name = __getname(); + if (unlikely(!name)) + return -ENOMEM; + sprintf(name, "%d\n", oldfid->fid); retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name); __putname(name); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 61c599b4a1e3..8b15bb22caca 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -99,12 +99,13 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, * @flags: mount flags * @dev_name: device name that was mounted * @data: mount options + * @mnt: mountpoint record to be instantiated * */ -static struct super_block *v9fs_get_sb(struct file_system_type - *fs_type, int flags, - const char *dev_name, void *data) +static int v9fs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, + struct vfsmount *mnt) { struct super_block *sb = NULL; struct v9fs_fcall *fcall = NULL; @@ -123,17 +124,19 @@ static struct super_block *v9fs_get_sb(struct file_system_type v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); if (!v9ses) - return ERR_PTR(-ENOMEM); + return -ENOMEM; if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { dprintk(DEBUG_ERROR, "problem initiating session\n"); - sb = ERR_PTR(newfid); + retval = newfid; goto out_free_session; } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); - if (IS_ERR(sb)) + if (IS_ERR(sb)) { + retval = PTR_ERR(sb); goto out_close_session; + } v9fs_fill_super(sb, v9ses, flags); inode = v9fs_get_inode(sb, S_IFDIR | mode); @@ -184,19 +187,19 @@ static struct super_block *v9fs_get_sb(struct file_system_type goto put_back_sb; } - return sb; + return simple_set_mnt(mnt, sb); out_close_session: v9fs_session_close(v9ses); out_free_session: kfree(v9ses); - return sb; + return retval; put_back_sb: /* deactivate_super calls v9fs_kill_super which will frees the rest */ up_write(&sb->s_umount); deactivate_super(sb); - return ERR_PTR(retval); + return retval; } /** @@ -253,11 +256,12 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt) } static void -v9fs_umount_begin(struct super_block *sb) +v9fs_umount_begin(struct vfsmount *vfsmnt, int flags) { - struct v9fs_session_info *v9ses = sb->s_fs_info; + struct v9fs_session_info *v9ses = vfsmnt->mnt_sb->s_fs_info; - v9fs_session_cancel(v9ses); + if (flags & MNT_FORCE) + v9fs_session_cancel(v9ses); } static struct super_operations v9fs_super_ops = { diff --git a/fs/Kconfig b/fs/Kconfig index 20f9b557732d..1cdc043922d5 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -53,7 +53,7 @@ config EXT2_FS_SECURITY config EXT2_FS_XIP bool "Ext2 execute in place support" - depends on EXT2_FS + depends on EXT2_FS && MMU help Execute in place can be used on memory-backed block devices. If you enable this option, you can select to mount block devices which are @@ -776,7 +776,8 @@ endmenu menu "Pseudo filesystems" config PROC_FS - bool "/proc file system support" + bool "/proc file system support" if EMBEDDED + default y help This is a virtual file system providing information about the status of the system. "Virtual" means that it doesn't take up any space on @@ -1370,11 +1371,19 @@ config UFS_FS config UFS_FS_WRITE bool "UFS file system write support (DANGEROUS)" - depends on UFS_FS && EXPERIMENTAL && BROKEN + depends on UFS_FS && EXPERIMENTAL help Say Y here if you want to try writing to UFS partitions. This is experimental, so you should back up your UFS partitions beforehand. +config UFS_DEBUG + bool "UFS debugging" + depends on UFS_FS + help + If you are experiencing any problems with the UFS filesystem, say + Y here. This will result in _many_ additional debugging messages to be + written to the system log. + endmenu menu "Network File Systems" diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 252abda0d200..ba1c88af49fe 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -196,17 +196,17 @@ static int adfs_remount(struct super_block *sb, int *flags, char *data) return parse_options(sb, data); } -static int adfs_statfs(struct super_block *sb, struct kstatfs *buf) +static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct adfs_sb_info *asb = ADFS_SB(sb); + struct adfs_sb_info *asb = ADFS_SB(dentry->d_sb); buf->f_type = ADFS_SUPER_MAGIC; buf->f_namelen = asb->s_namelen; - buf->f_bsize = sb->s_blocksize; + buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_blocks = asb->s_size; buf->f_files = asb->s_ids_per_zone * asb->s_map_size; buf->f_bavail = - buf->f_bfree = adfs_map_free(sb); + buf->f_bfree = adfs_map_free(dentry->d_sb); buf->f_ffree = (long)(buf->f_bfree * buf->f_files) / (long)buf->f_blocks; return 0; @@ -470,10 +470,11 @@ error: return -EINVAL; } -static struct super_block *adfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int adfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super, + mnt); } static struct file_system_type adfs_fs_type = { diff --git a/fs/affs/super.c b/fs/affs/super.c index 4d7e5b19e5cd..5200f4938df0 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -18,7 +18,7 @@ extern struct timezone sys_tz; -static int affs_statfs(struct super_block *sb, struct kstatfs *buf); +static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); static int affs_remount (struct super_block *sb, int *flags, char *data); static void @@ -271,6 +271,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) int reserved; unsigned long mount_flags; int tmp_flags; /* fix remount prototype... */ + u8 sig[4]; pr_debug("AFFS: read_super(%s)\n",data ? (const char *)data : "no options"); @@ -370,8 +371,9 @@ got_root: printk(KERN_ERR "AFFS: Cannot read boot block\n"); goto out_error; } - chksum = be32_to_cpu(*(__be32 *)boot_bh->b_data); + memcpy(sig, boot_bh->b_data, 4); brelse(boot_bh); + chksum = be32_to_cpu(*(__be32 *)sig); /* Dircache filesystems are compatible with non-dircache ones * when reading. As long as they aren't supported, writing is @@ -420,11 +422,11 @@ got_root: } if (mount_flags & SF_VERBOSE) { - chksum = cpu_to_be32(chksum); - printk(KERN_NOTICE "AFFS: Mounting volume \"%*s\": Type=%.3s\\%c, Blocksize=%d\n", - AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0], + u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0]; + printk(KERN_NOTICE "AFFS: Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n", + len > 31 ? 31 : len, AFFS_ROOT_TAIL(sb, root_bh)->disk_name + 1, - (char *)&chksum,((char *)&chksum)[3] + '0',blocksize); + sig, sig[3] + '0', blocksize); } sb->s_flags |= MS_NODEV | MS_NOSUID; @@ -508,8 +510,9 @@ affs_remount(struct super_block *sb, int *flags, char *data) } static int -affs_statfs(struct super_block *sb, struct kstatfs *buf) +affs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; int free; pr_debug("AFFS: statfs() partsize=%d, reserved=%d\n",AFFS_SB(sb)->s_partition_size, @@ -524,10 +527,11 @@ affs_statfs(struct super_block *sb, struct kstatfs *buf) return 0; } -static struct super_block *affs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int affs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super, + mnt); } static struct file_system_type affs_fs_type = { diff --git a/fs/afs/dir.c b/fs/afs/dir.c index a6dff6a4f204..2fc99877cb0d 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -185,9 +185,7 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index) _enter("{%lu},%lu", dir->i_ino, index); - page = read_cache_page(dir->i_mapping,index, - (filler_t *) dir->i_mapping->a_ops->readpage, - NULL); + page = read_mapping_page(dir->i_mapping, index, NULL); if (!IS_ERR(page)) { wait_on_page_locked(page); kmap(page); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 4e6eeb59b83c..99785a79d043 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -63,7 +63,6 @@ unsigned long afs_mntpt_expiry_timeout = 20; int afs_mntpt_check_symlink(struct afs_vnode *vnode) { struct page *page; - filler_t *filler; size_t size; char *buf; int ret; @@ -71,10 +70,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode) _enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique); /* read the contents of the symlink into the pagecache */ - filler = (filler_t *) AFS_VNODE_TO_I(vnode)->i_mapping->a_ops->readpage; - - page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, - filler, NULL); + page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, NULL); if (IS_ERR(page)) { ret = PTR_ERR(page); goto out; @@ -160,7 +156,6 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) struct page *page = NULL; size_t size; char *buf, *devname = NULL, *options = NULL; - filler_t *filler; int ret; kenter("{%s}", mntpt->d_name.name); @@ -182,9 +177,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) goto error; /* read the contents of the AFS special symlink */ - filler = (filler_t *)mntpt->d_inode->i_mapping->a_ops->readpage; - - page = read_cache_page(mntpt->d_inode->i_mapping, 0, filler, NULL); + page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); if (IS_ERR(page)) { ret = PTR_ERR(page); goto error; @@ -210,7 +203,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) /* try and do the mount */ kdebug("--- attempting mount %s -o %s ---", devname, options); - mnt = do_kern_mount("afs", 0, devname, options); + mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options); kdebug("--- mount result %p ---", mnt); free_page((unsigned long) devname); diff --git a/fs/afs/super.c b/fs/afs/super.c index 53c56e7231ab..67d1f5c819ec 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -38,9 +38,9 @@ struct afs_mount_params { static void afs_i_init_once(void *foo, kmem_cache_t *cachep, unsigned long flags); -static struct super_block *afs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data); +static int afs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt); static struct inode *afs_alloc_inode(struct super_block *sb); @@ -48,7 +48,7 @@ static void afs_put_super(struct super_block *sb); static void afs_destroy_inode(struct inode *inode); -static struct file_system_type afs_fs_type = { +struct file_system_type afs_fs_type = { .owner = THIS_MODULE, .name = "afs", .get_sb = afs_get_sb, @@ -294,10 +294,11 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent) * get an AFS superblock * - TODO: don't use get_sb_nodev(), but rather call sget() directly */ -static struct super_block *afs_get_sb(struct file_system_type *fs_type, - int flags, - const char *dev_name, - void *options) +static int afs_get_sb(struct file_system_type *fs_type, + int flags, + const char *dev_name, + void *options, + struct vfsmount *mnt) { struct afs_mount_params params; struct super_block *sb; @@ -311,7 +312,7 @@ static struct super_block *afs_get_sb(struct file_system_type *fs_type, ret = afscm_start(); if (ret < 0) { _leave(" = %d", ret); - return ERR_PTR(ret); + return ret; } /* parse the options */ @@ -348,18 +349,19 @@ static struct super_block *afs_get_sb(struct file_system_type *fs_type, goto error; } sb->s_flags |= MS_ACTIVE; + simple_set_mnt(mnt, sb); afs_put_volume(params.volume); afs_put_cell(params.default_cell); - _leave(" = %p", sb); - return sb; + _leave(" = 0 [%p]", 0, sb); + return 0; error: afs_put_volume(params.volume); afs_put_cell(params.default_cell); afscm_stop(); _leave(" = %d", ret); - return ERR_PTR(ret); + return ret; } /* end afs_get_sb() */ /*****************************************************************************/ diff --git a/fs/afs/super.h b/fs/afs/super.h index ac11362f4e95..32de8cc6fae8 100644 --- a/fs/afs/super.h +++ b/fs/afs/super.h @@ -38,6 +38,8 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) return sb->s_fs_info; } +extern struct file_system_type afs_fs_type; + #endif /* __KERNEL__ */ #endif /* _LINUX_AFS_SUPER_H */ @@ -777,11 +777,11 @@ out: static int __aio_run_iocbs(struct kioctx *ctx) { struct kiocb *iocb; - LIST_HEAD(run_list); + struct list_head run_list; assert_spin_locked(&ctx->ctx_lock); - list_splice_init(&ctx->run_list, &run_list); + list_replace_init(&ctx->run_list, &run_list); while (!list_empty(&run_list)) { iocb = list_entry(run_list.next, struct kiocb, ki_run_list); diff --git a/fs/autofs/init.c b/fs/autofs/init.c index b977ece69f0c..aca123752406 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -14,10 +14,10 @@ #include <linux/init.h> #include "autofs_i.h" -static struct super_block *autofs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int autofs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, autofs_fill_super); + return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt); } static struct file_system_type autofs_fs_type = { diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index b8ce02607d66..4456d1daa40f 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -174,6 +174,12 @@ static int autofs4_tree_busy(struct vfsmount *mnt, struct autofs_info *ino = autofs4_dentry_ino(p); unsigned int ino_count = atomic_read(&ino->count); + /* + * Clean stale dentries below that have not been + * invalidated after a mount fail during lookup + */ + d_invalidate(p); + /* allow for dget above and top is already dgot */ if (p == top) ino_count += 2; diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index acecec8578ce..5d9193332bef 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c @@ -14,10 +14,10 @@ #include <linux/init.h> #include "autofs_i.h" -static struct super_block *autofs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int autofs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, autofs4_fill_super); + return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt); } static struct file_system_type autofs_fs_type = { diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 68ebd10f345d..08201fab26cd 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -49,7 +49,7 @@ static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, char **out, int *out_len); static void befs_put_super(struct super_block *); static int befs_remount(struct super_block *, int *, char *); -static int befs_statfs(struct super_block *, struct kstatfs *); +static int befs_statfs(struct dentry *, struct kstatfs *); static int parse_options(char *, befs_mount_options *); static const struct super_operations befs_sops = { @@ -880,8 +880,9 @@ befs_remount(struct super_block *sb, int *flags, char *data) } static int -befs_statfs(struct super_block *sb, struct kstatfs *buf) +befs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; befs_debug(sb, "---> befs_statfs()"); @@ -899,11 +900,12 @@ befs_statfs(struct super_block *sb, struct kstatfs *buf) return 0; } -static struct super_block * +static int befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super, + mnt); } static struct file_system_type befs_fs_type = { diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 55a7a78332f8..cf74f3d4d966 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -203,8 +203,9 @@ static void bfs_put_super(struct super_block *s) s->s_fs_info = NULL; } -static int bfs_statfs(struct super_block *s, struct kstatfs *buf) +static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *s = dentry->d_sb; struct bfs_sb_info *info = BFS_SB(s); u64 id = huge_encode_dev(s->s_bdev->bd_dev); buf->f_type = BFS_MAGIC; @@ -410,10 +411,10 @@ out: return -EINVAL; } -static struct super_block *bfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int bfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt); } static struct file_system_type bfs_fs_type = { diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 537893a16014..d0434406eaeb 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -38,15 +38,13 @@ #include <linux/security.h> #include <linux/syscalls.h> #include <linux/random.h> - +#include <linux/elf.h> #include <asm/uaccess.h> #include <asm/param.h> #include <asm/page.h> -#include <linux/elf.h> - -static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs); -static int load_elf_library(struct file*); +static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); +static int load_elf_library(struct file *); static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); @@ -59,15 +57,15 @@ extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); * don't even try. */ #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) -static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file); +static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file); #else #define elf_core_dump NULL #endif #if ELF_EXEC_PAGESIZE > PAGE_SIZE -# define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE +#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE #else -# define ELF_MIN_ALIGN PAGE_SIZE +#define ELF_MIN_ALIGN PAGE_SIZE #endif #ifndef ELF_CORE_EFLAGS @@ -86,7 +84,7 @@ static struct linux_binfmt elf_format = { .min_coredump = ELF_EXEC_PAGESIZE }; -#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE) +#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE) static int set_brk(unsigned long start, unsigned long end) { @@ -104,13 +102,11 @@ static int set_brk(unsigned long start, unsigned long end) return 0; } - /* We need to explicitly zero any fractional pages after the data section (i.e. bss). This would contain the junk from the file that should not - be in memory */ - - + be in memory + */ static int padzero(unsigned long elf_bss) { unsigned long nbyte; @@ -129,7 +125,9 @@ static int padzero(unsigned long elf_bss) #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items)) #define STACK_ROUND(sp, items) \ ((15 + (unsigned long) ((sp) + (items))) &~ 15UL) -#define STACK_ALLOC(sp, len) ({ elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; old_sp; }) +#define STACK_ALLOC(sp, len) ({ \ + elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \ + old_sp; }) #else #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items)) #define STACK_ROUND(sp, items) \ @@ -138,7 +136,7 @@ static int padzero(unsigned long elf_bss) #endif static int -create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec, +create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, int interp_aout, unsigned long load_addr, unsigned long interp_load_addr) { @@ -161,7 +159,6 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec, * for userspace to get any other way, in others (i386) it is * merely difficult. */ - u_platform = NULL; if (k_platform) { size_t len = strlen(k_platform) + 1; @@ -171,7 +168,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec, * evictions by the processes running on the same package. One * thing we can do is to shuffle the initial stack for them. */ - + p = arch_align_stack(p); u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); @@ -180,9 +177,12 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec, } /* Create the ELF interpreter info */ - elf_info = (elf_addr_t *) current->mm->saved_auxv; + elf_info = (elf_addr_t *)current->mm->saved_auxv; #define NEW_AUX_ENT(id, val) \ - do { elf_info[ei_index++] = id; elf_info[ei_index++] = val; } while (0) + do { \ + elf_info[ei_index++] = id; \ + elf_info[ei_index++] = val; \ + } while (0) #ifdef ARCH_DLINFO /* @@ -195,21 +195,22 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec, NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE); NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff); - NEW_AUX_ENT(AT_PHENT, sizeof (struct elf_phdr)); + NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); NEW_AUX_ENT(AT_PHNUM, exec->e_phnum); NEW_AUX_ENT(AT_BASE, interp_load_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec->e_entry); - NEW_AUX_ENT(AT_UID, (elf_addr_t) tsk->uid); - NEW_AUX_ENT(AT_EUID, (elf_addr_t) tsk->euid); - NEW_AUX_ENT(AT_GID, (elf_addr_t) tsk->gid); - NEW_AUX_ENT(AT_EGID, (elf_addr_t) tsk->egid); - NEW_AUX_ENT(AT_SECURE, (elf_addr_t) security_bprm_secureexec(bprm)); + NEW_AUX_ENT(AT_UID, tsk->uid); + NEW_AUX_ENT(AT_EUID, tsk->euid); + NEW_AUX_ENT(AT_GID, tsk->gid); + NEW_AUX_ENT(AT_EGID, tsk->egid); + NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); if (k_platform) { - NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform); + NEW_AUX_ENT(AT_PLATFORM, + (elf_addr_t)(unsigned long)u_platform); } if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { - NEW_AUX_ENT(AT_EXECFD, (elf_addr_t) bprm->interp_data); + NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); } #undef NEW_AUX_ENT /* AT_NULL is zero; clear the rest too */ @@ -232,7 +233,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec, /* Point sp at the lowest address on the stack */ #ifdef CONFIG_STACK_GROWSUP sp = (elf_addr_t __user *)bprm->p - items - ei_index; - bprm->exec = (unsigned long) sp; /* XXX: PARISC HACK */ + bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */ #else sp = (elf_addr_t __user *)bprm->p; #endif @@ -285,7 +286,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec, #ifndef elf_map static unsigned long elf_map(struct file *filep, unsigned long addr, - struct elf_phdr *eppnt, int prot, int type) + struct elf_phdr *eppnt, int prot, int type) { unsigned long map_addr; unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr); @@ -310,9 +311,8 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, is only provided so that we can read a.out libraries that have an ELF header */ -static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, - struct file * interpreter, - unsigned long *interp_load_addr) +static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, + struct file *interpreter, unsigned long *interp_load_addr) { struct elf_phdr *elf_phdata; struct elf_phdr *eppnt; @@ -342,15 +342,15 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, goto out; /* Now read in all of the header information */ - size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum; if (size > ELF_MIN_ALIGN) goto out; - elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL); + elf_phdata = kmalloc(size, GFP_KERNEL); if (!elf_phdata) goto out; - retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size); + retval = kernel_read(interpreter, interp_elf_ex->e_phoff, + (char *)elf_phdata,size); error = -EIO; if (retval != size) { if (retval < 0) @@ -359,58 +359,65 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, } eppnt = elf_phdata; - for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) { - if (eppnt->p_type == PT_LOAD) { - int elf_type = MAP_PRIVATE | MAP_DENYWRITE; - int elf_prot = 0; - unsigned long vaddr = 0; - unsigned long k, map_addr; - - if (eppnt->p_flags & PF_R) elf_prot = PROT_READ; - if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; - if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; - vaddr = eppnt->p_vaddr; - if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) - elf_type |= MAP_FIXED; - - map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type); - error = map_addr; - if (BAD_ADDR(map_addr)) - goto out_close; - - if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) { - load_addr = map_addr - ELF_PAGESTART(vaddr); - load_addr_set = 1; - } - - /* - * Check to see if the section's size will overflow the - * allowed task size. Note that p_filesz must always be - * <= p_memsize so it is only necessary to check p_memsz. - */ - k = load_addr + eppnt->p_vaddr; - if (k > TASK_SIZE || eppnt->p_filesz > eppnt->p_memsz || - eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) { - error = -ENOMEM; - goto out_close; - } - - /* - * Find the end of the file mapping for this phdr, and keep - * track of the largest address we see for this. - */ - k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; - if (k > elf_bss) - elf_bss = k; - - /* - * Do the same thing for the memory mapping - between - * elf_bss and last_bss is the bss section. - */ - k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; - if (k > last_bss) - last_bss = k; - } + for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { + if (eppnt->p_type == PT_LOAD) { + int elf_type = MAP_PRIVATE | MAP_DENYWRITE; + int elf_prot = 0; + unsigned long vaddr = 0; + unsigned long k, map_addr; + + if (eppnt->p_flags & PF_R) + elf_prot = PROT_READ; + if (eppnt->p_flags & PF_W) + elf_prot |= PROT_WRITE; + if (eppnt->p_flags & PF_X) + elf_prot |= PROT_EXEC; + vaddr = eppnt->p_vaddr; + if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) + elf_type |= MAP_FIXED; + + map_addr = elf_map(interpreter, load_addr + vaddr, + eppnt, elf_prot, elf_type); + error = map_addr; + if (BAD_ADDR(map_addr)) + goto out_close; + + if (!load_addr_set && + interp_elf_ex->e_type == ET_DYN) { + load_addr = map_addr - ELF_PAGESTART(vaddr); + load_addr_set = 1; + } + + /* + * Check to see if the section's size will overflow the + * allowed task size. Note that p_filesz must always be + * <= p_memsize so it's only necessary to check p_memsz. + */ + k = load_addr + eppnt->p_vaddr; + if (k > TASK_SIZE || + eppnt->p_filesz > eppnt->p_memsz || + eppnt->p_memsz > TASK_SIZE || + TASK_SIZE - eppnt->p_memsz < k) { + error = -ENOMEM; + goto out_close; + } + + /* + * Find the end of the file mapping for this phdr, and + * keep track of the largest address we see for this. + */ + k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; + if (k > elf_bss) + elf_bss = k; + + /* + * Do the same thing for the memory mapping - between + * elf_bss and last_bss is the bss section. + */ + k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; + if (k > last_bss) + last_bss = k; + } } /* @@ -424,7 +431,8 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, goto out_close; } - elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* What we have mapped so far */ + /* What we have mapped so far */ + elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* Map the last of the bss segment */ if (last_bss > elf_bss) { @@ -436,7 +444,7 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, } *interp_load_addr = load_addr; - error = ((unsigned long) interp_elf_ex->e_entry) + load_addr; + error = ((unsigned long)interp_elf_ex->e_entry) + load_addr; out_close: kfree(elf_phdata); @@ -444,8 +452,8 @@ out: return error; } -static unsigned long load_aout_interp(struct exec * interp_ex, - struct file * interpreter) +static unsigned long load_aout_interp(struct exec *interp_ex, + struct file *interpreter) { unsigned long text_data, elf_entry = ~0UL; char __user * addr; @@ -464,7 +472,7 @@ static unsigned long load_aout_interp(struct exec * interp_ex, case ZMAGIC: case QMAGIC: offset = N_TXTOFF(*interp_ex); - addr = (char __user *) N_TXTADDR(*interp_ex); + addr = (char __user *)N_TXTADDR(*interp_ex); break; default: goto out; @@ -480,7 +488,6 @@ static unsigned long load_aout_interp(struct exec * interp_ex, flush_icache_range((unsigned long)addr, (unsigned long)addr + text_data); - down_write(¤t->mm->mmap_sem); do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1), interp_ex->a_bss); @@ -519,7 +526,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top) #endif } -static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) +static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) { struct file *interpreter = NULL; /* to shut gcc up */ unsigned long load_addr = 0, load_bias = 0; @@ -528,7 +535,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) unsigned int interpreter_type = INTERPRETER_NONE; unsigned char ibcs2_interpreter = 0; unsigned long error; - struct elf_phdr * elf_ppnt, *elf_phdata; + struct elf_phdr *elf_ppnt, *elf_phdata; unsigned long elf_bss, elf_brk; int elf_exec_fileno; int retval, i; @@ -553,7 +560,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) } /* Get the exec-header */ - loc->elf_ex = *((struct elfhdr *) bprm->buf); + loc->elf_ex = *((struct elfhdr *)bprm->buf); retval = -ENOEXEC; /* First of all, some simple consistency checks */ @@ -568,7 +575,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) goto out; /* Now read in all of the header information */ - if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr)) goto out; if (loc->elf_ex.e_phnum < 1 || @@ -576,18 +582,19 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) goto out; size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr); retval = -ENOMEM; - elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL); + elf_phdata = kmalloc(size, GFP_KERNEL); if (!elf_phdata) goto out; - retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *) elf_phdata, size); + retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, + (char *)elf_phdata, size); if (retval != size) { if (retval >= 0) retval = -EIO; goto out_free_ph; } - files = current->files; /* Refcounted so ok */ + files = current->files; /* Refcounted so ok */ retval = unshare_files(); if (retval < 0) goto out_free_ph; @@ -598,7 +605,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* exec will make our files private anyway, but for the a.out loader stuff we need to do it earlier */ - retval = get_unused_fd(); if (retval < 0) goto out_free_fh; @@ -620,7 +626,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) * shared libraries - for now assume that this * is an a.out format binary */ - retval = -ENOEXEC; if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2) @@ -628,13 +633,13 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) retval = -ENOMEM; elf_interpreter = kmalloc(elf_ppnt->p_filesz, - GFP_KERNEL); + GFP_KERNEL); if (!elf_interpreter) goto out_free_file; retval = kernel_read(bprm->file, elf_ppnt->p_offset, - elf_interpreter, - elf_ppnt->p_filesz); + elf_interpreter, + elf_ppnt->p_filesz); if (retval != elf_ppnt->p_filesz) { if (retval >= 0) retval = -EIO; @@ -678,7 +683,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) goto out_free_interp; - retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); + retval = kernel_read(interpreter, 0, bprm->buf, + BINPRM_BUF_SIZE); if (retval != BINPRM_BUF_SIZE) { if (retval >= 0) retval = -EIO; @@ -686,8 +692,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) } /* Get the exec headers */ - loc->interp_ex = *((struct exec *) bprm->buf); - loc->interp_elf_ex = *((struct elfhdr *) bprm->buf); + loc->interp_ex = *((struct exec *)bprm->buf); + loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); break; } elf_ppnt++; @@ -739,7 +745,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* OK, we are done with that, now set up the arg stuff, and then start this sucker up */ - if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) { char *passed_p = passed_fileno; sprintf(passed_fileno, "%d", elf_exec_fileno); @@ -759,7 +764,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* Discard our unneeded old files struct */ if (files) { - steal_locks(files); put_files_struct(files); files = NULL; } @@ -778,7 +782,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) if (elf_read_implies_exec(loc->elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; - if ( !(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) current->flags |= PF_RANDOMIZE; arch_pick_mmap_layout(current->mm); @@ -799,8 +803,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) the correct location in memory. At this point, we assume that the image should be loaded at fixed address, not at a variable address. */ - - for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { + for(i = 0, elf_ppnt = elf_phdata; + i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { int elf_prot = 0, elf_flags; unsigned long k, vaddr; @@ -828,30 +832,35 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) load_bias, nbyte)) { /* * This bss-zeroing can fail if the ELF - * file specifies odd protections. So + * file specifies odd protections. So * we don't check the return value */ } } } - if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ; - if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; - if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; + if (elf_ppnt->p_flags & PF_R) + elf_prot |= PROT_READ; + if (elf_ppnt->p_flags & PF_W) + elf_prot |= PROT_WRITE; + if (elf_ppnt->p_flags & PF_X) + elf_prot |= PROT_EXEC; - elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE; + elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; vaddr = elf_ppnt->p_vaddr; if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { elf_flags |= MAP_FIXED; } else if (loc->elf_ex.e_type == ET_DYN) { - /* Try and get dynamic programs out of the way of the default mmap - base, as well as whatever program they might try to exec. This - is because the brk will follow the loader, and is not movable. */ + /* Try and get dynamic programs out of the way of the + * default mmap base, as well as whatever program they + * might try to exec. This is because the brk will + * follow the loader, and is not movable. */ load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); } - error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags); + error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, + elf_prot, elf_flags); if (BAD_ADDR(error)) { send_sig(SIGKILL, current, 0); goto out_free_dentry; @@ -868,8 +877,10 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) } } k = elf_ppnt->p_vaddr; - if (k < start_code) start_code = k; - if (start_data < k) start_data = k; + if (k < start_code) + start_code = k; + if (start_data < k) + start_data = k; /* * Check to see if the section's size will overflow the @@ -879,7 +890,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz || elf_ppnt->p_memsz > TASK_SIZE || TASK_SIZE - elf_ppnt->p_memsz < k) { - /* set_brk can never work. Avoid overflows. */ + /* set_brk can never work. Avoid overflows. */ send_sig(SIGKILL, current, 0); goto out_free_dentry; } @@ -967,8 +978,9 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; - create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT), - load_addr, interp_load_addr); + create_elf_tables(bprm, &loc->elf_ex, + (interpreter_type == INTERPRETER_AOUT), + load_addr, interp_load_addr); /* N.B. passed_fileno might not be initialized? */ if (interpreter_type == INTERPRETER_AOUT) current->mm->arg_start += strlen(passed_fileno) + 1; @@ -982,7 +994,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* Why this, you ask??? Well SVr4 maps page 0 as read-only, and some applications "depend" upon this behavior. Since we do not have the power to recompile these, we - emulate the SVr4 behavior. Sigh. */ + emulate the SVr4 behavior. Sigh. */ down_write(¤t->mm->mmap_sem); error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); @@ -1037,7 +1049,6 @@ out_free_ph: /* This is really simpleminded and specialized - we are loading an a.out library that is given an ELF header. */ - static int load_elf_library(struct file *file) { struct elf_phdr *elf_phdata; @@ -1047,7 +1058,7 @@ static int load_elf_library(struct file *file) struct elfhdr elf_ex; error = -ENOEXEC; - retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex)); + retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex)); if (retval != sizeof(elf_ex)) goto out; @@ -1056,7 +1067,7 @@ static int load_elf_library(struct file *file) /* First of all, some simple consistency checks */ if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || - !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap) + !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap) goto out; /* Now read in all of the header information */ @@ -1104,7 +1115,8 @@ static int load_elf_library(struct file *file) goto out_free_ph; } - len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1); + len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + + ELF_MIN_ALIGN - 1); bss = eppnt->p_memsz + eppnt->p_vaddr; if (bss > len) { down_write(¤t->mm->mmap_sem); @@ -1163,7 +1175,7 @@ static int maydump(struct vm_area_struct *vma) if (vma->vm_flags & (VM_IO | VM_RESERVED)) return 0; - /* Dump shared memory only if mapped from an anonymous file. */ + /* Dump shared memory only if mapped from an anonymous file. */ if (vma->vm_flags & VM_SHARED) return vma->vm_file->f_dentry->d_inode->i_nlink == 0; @@ -1174,7 +1186,7 @@ static int maydump(struct vm_area_struct *vma) return 1; } -#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) +#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) /* An ELF note in memory */ struct memelfnote @@ -1277,11 +1289,11 @@ static void fill_note(struct memelfnote *note, const char *name, int type, } /* - * fill up all the fields in prstatus from the given task struct, except registers - * which need to be filled up separately. + * fill up all the fields in prstatus from the given task struct, except + * registers which need to be filled up separately. */ static void fill_prstatus(struct elf_prstatus *prstatus, - struct task_struct *p, long signr) + struct task_struct *p, long signr) { prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; prstatus->pr_sigpend = p->pending.signal.sig[0]; @@ -1366,8 +1378,8 @@ struct elf_thread_status /* * In order to add the specific thread information for the elf file format, - * we need to keep a linked list of every threads pr_status and then - * create a single section for them in the final core file. + * we need to keep a linked list of every threads pr_status and then create + * a single section for them in the final core file. */ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) { @@ -1378,19 +1390,23 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) fill_prstatus(&t->prstatus, p, signr); elf_core_copy_task_regs(p, &t->prstatus.pr_reg); - fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus)); + fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), + &(t->prstatus)); t->num_notes++; sz += notesize(&t->notes[0]); - if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu))) { - fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), &(t->fpu)); + if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, + &t->fpu))) { + fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), + &(t->fpu)); t->num_notes++; sz += notesize(&t->notes[1]); } #ifdef ELF_CORE_COPY_XFPREGS if (elf_core_copy_task_xfpregs(p, &t->xfpu)) { - fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), &t->xfpu); + fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), + &t->xfpu); t->num_notes++; sz += notesize(&t->notes[2]); } @@ -1405,7 +1421,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) * and then they are actually written out. If we run out of core limit * we just truncate. */ -static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file) +static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) { #define NUM_NOTES 6 int has_dumped = 0; @@ -1434,12 +1450,12 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file) /* * We no longer stop all VM operations. * - * This is because those proceses that could possibly change map_count or - * the mmap / vma pages are now blocked in do_exit on current finishing - * this core dump. + * This is because those proceses that could possibly change map_count + * or the mmap / vma pages are now blocked in do_exit on current + * finishing this core dump. * * Only ptrace can touch these memory addresses, but it doesn't change - * the map_count or the pages allocated. So no possibility of crashing + * the map_count or the pages allocated. So no possibility of crashing * exists while dumping the mm->vm_next areas to the core file. */ @@ -1501,7 +1517,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file) #endif /* Set up header */ - fill_elf_header(elf, segs+1); /* including notes section */ + fill_elf_header(elf, segs + 1); /* including notes section */ has_dumped = 1; current->flags |= PF_DUMPCORE; @@ -1511,24 +1527,24 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file) * with info from their /proc. */ - fill_note(notes +0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus); - + fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus); fill_psinfo(psinfo, current->group_leader, current->mm); - fill_note(notes +1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); + fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); numnote = 2; - auxv = (elf_addr_t *) current->mm->saved_auxv; + auxv = (elf_addr_t *)current->mm->saved_auxv; i = 0; do i += 2; while (auxv[i - 2] != AT_NULL); fill_note(¬es[numnote++], "CORE", NT_AUXV, - i * sizeof (elf_addr_t), auxv); + i * sizeof(elf_addr_t), auxv); /* Try to dump the FPU. */ - if ((prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, fpu))) + if ((prstatus->pr_fpvalid = + elf_core_copy_task_fpregs(current, regs, fpu))) fill_note(notes + numnote++, "CORE", NT_PRFPREG, sizeof(*fpu), fpu); #ifdef ELF_CORE_COPY_XFPREGS @@ -1577,8 +1593,10 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file) phdr.p_memsz = sz; offset += phdr.p_filesz; phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; - if (vma->vm_flags & VM_WRITE) phdr.p_flags |= PF_W; - if (vma->vm_flags & VM_EXEC) phdr.p_flags |= PF_X; + if (vma->vm_flags & VM_WRITE) + phdr.p_flags |= PF_W; + if (vma->vm_flags & VM_EXEC) + phdr.p_flags |= PF_X; phdr.p_align = ELF_EXEC_PAGESIZE; DUMP_WRITE(&phdr, sizeof(phdr)); @@ -1595,7 +1613,9 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file) /* write out the thread status notes section */ list_for_each(t, &thread_list) { - struct elf_thread_status *tmp = list_entry(t, struct elf_thread_status, list); + struct elf_thread_status *tmp = + list_entry(t, struct elf_thread_status, list); + for (i = 0; i < tmp->num_notes; i++) if (!writenote(&tmp->notes[i], file)) goto end_coredump; @@ -1612,18 +1632,19 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file) for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { - struct page* page; + struct page *page; struct vm_area_struct *vma; if (get_user_pages(current, current->mm, addr, 1, 0, 1, &page, &vma) <= 0) { - DUMP_SEEK (file->f_pos + PAGE_SIZE); + DUMP_SEEK(file->f_pos + PAGE_SIZE); } else { if (page == ZERO_PAGE(addr)) { - DUMP_SEEK (file->f_pos + PAGE_SIZE); + DUMP_SEEK(file->f_pos + PAGE_SIZE); } else { void *kaddr; - flush_cache_page(vma, addr, page_to_pfn(page)); + flush_cache_page(vma, addr, + page_to_pfn(page)); kaddr = kmap(page); if ((size += PAGE_SIZE) > limit || !dump_write(file, kaddr, @@ -1645,7 +1666,8 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file) if ((off_t)file->f_pos != offset) { /* Sanity check */ - printk(KERN_WARNING "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n", + printk(KERN_WARNING + "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n", (off_t)file->f_pos, offset); } diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index a2e48c999c24..eba4e23b9ca0 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -435,9 +435,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, struct elf_fdpic_params *interp_params) { unsigned long sp, csp, nitems; - elf_caddr_t *argv, *envp; + elf_caddr_t __user *argv, *envp; size_t platform_len = 0, len; - char *k_platform, *u_platform, *p; + char *k_platform; + char __user *u_platform, *p; long hwcap; int loop; @@ -462,12 +463,11 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, if (k_platform) { platform_len = strlen(k_platform) + 1; sp -= platform_len; + u_platform = (char __user *) sp; if (__copy_to_user(u_platform, k_platform, platform_len) != 0) return -EFAULT; } - u_platform = (char *) sp; - #if defined(__i386__) && defined(CONFIG_SMP) /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions * by the processes running on the same package. One thing we can do @@ -490,7 +490,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, sp = (sp - len) & ~7UL; exec_params->map_addr = sp; - if (copy_to_user((void *) sp, exec_params->loadmap, len) != 0) + if (copy_to_user((void __user *) sp, exec_params->loadmap, len) != 0) return -EFAULT; current->mm->context.exec_fdpic_loadmap = (unsigned long) sp; @@ -501,7 +501,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, sp = (sp - len) & ~7UL; interp_params->map_addr = sp; - if (copy_to_user((void *) sp, interp_params->loadmap, len) != 0) + if (copy_to_user((void __user *) sp, interp_params->loadmap, len) != 0) return -EFAULT; current->mm->context.interp_fdpic_loadmap = (unsigned long) sp; @@ -527,7 +527,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, /* put the ELF interpreter info on the stack */ #define NEW_AUX_ENT(nr, id, val) \ do { \ - struct { unsigned long _id, _val; } *ent = (void *) csp; \ + struct { unsigned long _id, _val; } __user *ent = (void __user *) csp; \ __put_user((id), &ent[nr]._id); \ __put_user((val), &ent[nr]._val); \ } while (0) @@ -564,13 +564,13 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, /* allocate room for argv[] and envv[] */ csp -= (bprm->envc + 1) * sizeof(elf_caddr_t); - envp = (elf_caddr_t *) csp; + envp = (elf_caddr_t __user *) csp; csp -= (bprm->argc + 1) * sizeof(elf_caddr_t); - argv = (elf_caddr_t *) csp; + argv = (elf_caddr_t __user *) csp; /* stack argc */ csp -= sizeof(unsigned long); - __put_user(bprm->argc, (unsigned long *) csp); + __put_user(bprm->argc, (unsigned long __user *) csp); BUG_ON(csp != sp); @@ -581,7 +581,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, current->mm->arg_start = current->mm->start_stack - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p); #endif - p = (char *) current->mm->arg_start; + p = (char __user *) current->mm->arg_start; for (loop = bprm->argc; loop > 0; loop--) { __put_user((elf_caddr_t) p, argv++); len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES); @@ -1025,7 +1025,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, /* clear the bit between beginning of mapping and beginning of PT_LOAD */ if (prot & PROT_WRITE && disp > 0) { kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp); - clear_user((void *) maddr, disp); + clear_user((void __user *) maddr, disp); maddr += disp; } @@ -1059,7 +1059,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, if (prot & PROT_WRITE && excess1 > 0) { kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr + phdr->p_filesz, excess1); - clear_user((void *) maddr + phdr->p_filesz, excess1); + clear_user((void __user *) maddr + phdr->p_filesz, excess1); } #else diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index b1c902e319c1..c94d52eafd1b 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -510,7 +510,7 @@ static int load_flat_file(struct linux_binprm * bprm, } /* OK, This is the point of no return */ - set_personality(PER_LINUX); + set_personality(PER_LINUX_32BIT); } /* diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index d73d75591a39..34ebbc191e46 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -55,6 +55,7 @@ typedef struct { } Node; static DEFINE_RWLOCK(entries_lock); +static struct file_system_type bm_fs_type; static struct vfsmount *bm_mnt; static int entry_count; @@ -203,7 +204,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto _error; if (files) { - steal_locks(files); put_files_struct(files); files = NULL; } @@ -638,7 +638,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, if (!inode) goto out2; - err = simple_pin_fs("binfmt_misc", &bm_mnt, &entry_count); + err = simple_pin_fs(&bm_fs_type, &bm_mnt, &entry_count); if (err) { iput(inode); inode = NULL; @@ -740,10 +740,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent) return err; } -static struct super_block *bm_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int bm_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, bm_fill_super); + return get_sb_single(fs_type, flags, data, bm_fill_super, mnt); } static struct linux_binfmt misc_format = { diff --git a/fs/block_dev.c b/fs/block_dev.c index f5958f413bd1..028d9fb9c2d5 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -300,10 +300,10 @@ static struct super_operations bdev_sops = { .clear_inode = bdev_clear_inode, }; -static struct super_block *bd_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int bd_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576); + return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); } static struct file_system_type bd_type = { @@ -414,21 +414,31 @@ EXPORT_SYMBOL(bdput); static struct block_device *bd_acquire(struct inode *inode) { struct block_device *bdev; + spin_lock(&bdev_lock); bdev = inode->i_bdev; - if (bdev && igrab(bdev->bd_inode)) { + if (bdev) { + atomic_inc(&bdev->bd_inode->i_count); spin_unlock(&bdev_lock); return bdev; } spin_unlock(&bdev_lock); + bdev = bdget(inode->i_rdev); if (bdev) { spin_lock(&bdev_lock); - if (inode->i_bdev) - __bd_forget(inode); - inode->i_bdev = bdev; - inode->i_mapping = bdev->bd_inode->i_mapping; - list_add(&inode->i_devices, &bdev->bd_inodes); + if (!inode->i_bdev) { + /* + * We take an additional bd_inode->i_count for inode, + * and it's released in clear_inode() of inode. + * So, we can access it via ->i_mapping always + * without igrab(). + */ + atomic_inc(&bdev->bd_inode->i_count); + inode->i_bdev = bdev; + inode->i_mapping = bdev->bd_inode->i_mapping; + list_add(&inode->i_devices, &bdev->bd_inodes); + } spin_unlock(&bdev_lock); } return bdev; @@ -438,10 +448,18 @@ static struct block_device *bd_acquire(struct inode *inode) void bd_forget(struct inode *inode) { + struct block_device *bdev = NULL; + spin_lock(&bdev_lock); - if (inode->i_bdev) + if (inode->i_bdev) { + if (inode->i_sb != blockdev_superblock) + bdev = inode->i_bdev; __bd_forget(inode); + } spin_unlock(&bdev_lock); + + if (bdev) + iput(bdev->bd_inode); } int bd_claim(struct block_device *bdev, void *holder) diff --git a/fs/buffer.c b/fs/buffer.c index 23f1f3a68077..373bb6292bdc 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -331,7 +331,6 @@ long do_fsync(struct file *file, int datasync) goto out; } - current->flags |= PF_SYNCWRITE; ret = filemap_fdatawrite(mapping); /* @@ -346,7 +345,6 @@ long do_fsync(struct file *file, int datasync) err = filemap_fdatawait(mapping); if (!ret) ret = err; - current->flags &= ~PF_SYNCWRITE; out: return ret; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c262d8874ce9..8b4de6eaabd0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -166,8 +166,9 @@ cifs_put_super(struct super_block *sb) } static int -cifs_statfs(struct super_block *sb, struct kstatfs *buf) +cifs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; int xid; int rc = -EOPNOTSUPP; struct cifs_sb_info *cifs_sb; @@ -402,12 +403,14 @@ static struct quotactl_ops cifs_quotactl_ops = { #endif #ifdef CONFIG_CIFS_EXPERIMENTAL -static void cifs_umount_begin(struct super_block * sblock) +static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) { struct cifs_sb_info *cifs_sb; struct cifsTconInfo * tcon; - cifs_sb = CIFS_SB(sblock); + if (!(flags & MNT_FORCE)) + return; + cifs_sb = CIFS_SB(vfsmnt->mnt_sb); if(cifs_sb == NULL) return; @@ -460,9 +463,9 @@ struct super_operations cifs_super_ops = { .remount_fs = cifs_remount, }; -static struct super_block * +static int cifs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { int rc; struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL); @@ -470,7 +473,7 @@ cifs_get_sb(struct file_system_type *fs_type, cFYI(1, ("Devname: %s flags: %d ", dev_name, flags)); if (IS_ERR(sb)) - return sb; + return PTR_ERR(sb); sb->s_flags = flags; @@ -478,10 +481,10 @@ cifs_get_sb(struct file_system_type *fs_type, if (rc) { up_write(&sb->s_umount); deactivate_super(sb); - return ERR_PTR(rc); + return rc; } sb->s_flags |= MS_ACTIVE; - return sb; + return simple_set_mnt(mnt, sb); } static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index c98755dca868..d56c0577c710 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -74,7 +74,7 @@ extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, size_t write_size, loff_t * poffset); extern int cifs_lock(struct file *, int, struct file_lock *); extern int cifs_fsync(struct file *, struct dentry *, int); -extern int cifs_flush(struct file *); +extern int cifs_flush(struct file *, fl_owner_t id); extern int cifs_file_mmap(struct file * , struct vm_area_struct *); extern const struct file_operations cifs_dir_ops; extern int cifs_dir_open(struct inode *inode, struct file *file); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e2b4ce1dad66..b4a18c1cab0a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1079,9 +1079,9 @@ static int cifs_writepages(struct address_space *mapping, unsigned int bytes_written; struct cifs_sb_info *cifs_sb; int done = 0; - pgoff_t end = -1; + pgoff_t end; pgoff_t index; - int is_range = 0; + int range_whole = 0; struct kvec iov[32]; int len; int n_iov = 0; @@ -1122,16 +1122,14 @@ static int cifs_writepages(struct address_space *mapping, xid = GetXid(); pagevec_init(&pvec, 0); - if (wbc->sync_mode == WB_SYNC_NONE) + if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ - else { - index = 0; - scanned = 1; - } - if (wbc->start || wbc->end) { - index = wbc->start >> PAGE_CACHE_SHIFT; - end = wbc->end >> PAGE_CACHE_SHIFT; - is_range = 1; + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; scanned = 1; } retry: @@ -1167,7 +1165,7 @@ retry: break; } - if (unlikely(is_range) && (page->index > end)) { + if (!wbc->range_cyclic && page->index > end) { done = 1; unlock_page(page); break; @@ -1271,7 +1269,7 @@ retry: index = 0; goto retry; } - if (!is_range) + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; FreeXid(xid); @@ -1419,7 +1417,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) * As file closes, flush all cached write data for this inode checking * for write behind errors. */ -int cifs_flush(struct file *file) +int cifs_flush(struct file *file, fl_owner_t id) { struct inode * inode = file->f_dentry->d_inode; int rc = 0; diff --git a/fs/coda/file.c b/fs/coda/file.c index 7c2642431fa5..cc66c681bd11 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -164,7 +164,7 @@ int coda_open(struct inode *coda_inode, struct file *coda_file) return 0; } -int coda_flush(struct file *coda_file) +int coda_flush(struct file *coda_file, fl_owner_t id) { unsigned short flags = coda_file->f_flags & ~O_EXCL; unsigned short coda_flags = coda_flags_to_cflags(flags); diff --git a/fs/coda/inode.c b/fs/coda/inode.c index ada1a81df6bd..87f1dc8aa24b 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -36,7 +36,7 @@ /* VFS super_block ops */ static void coda_clear_inode(struct inode *); static void coda_put_super(struct super_block *); -static int coda_statfs(struct super_block *sb, struct kstatfs *buf); +static int coda_statfs(struct dentry *dentry, struct kstatfs *buf); static kmem_cache_t * coda_inode_cachep; @@ -278,13 +278,13 @@ struct inode_operations coda_file_inode_operations = { .setattr = coda_setattr, }; -static int coda_statfs(struct super_block *sb, struct kstatfs *buf) +static int coda_statfs(struct dentry *dentry, struct kstatfs *buf) { int error; lock_kernel(); - error = venus_statfs(sb, buf); + error = venus_statfs(dentry, buf); unlock_kernel(); @@ -307,10 +307,10 @@ static int coda_statfs(struct super_block *sb, struct kstatfs *buf) /* init_coda: used by filesystems.c to register coda */ -static struct super_block *coda_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int coda_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, coda_fill_super); + return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt); } struct file_system_type coda_fs_type = { diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index 1bae99650a91..b040eba13a7d 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -611,7 +611,7 @@ int venus_pioctl(struct super_block *sb, struct CodaFid *fid, return error; } -int venus_statfs(struct super_block *sb, struct kstatfs *sfs) +int venus_statfs(struct dentry *dentry, struct kstatfs *sfs) { union inputArgs *inp; union outputArgs *outp; @@ -620,7 +620,7 @@ int venus_statfs(struct super_block *sb, struct kstatfs *sfs) insize = max_t(unsigned int, INSIZE(statfs), OUTSIZE(statfs)); UPARG(CODA_STATFS); - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); + error = coda_upcall(coda_sbp(dentry->d_sb), insize, &outsize, inp); if (!error) { sfs->f_blocks = outp->coda_statfs.stat.f_blocks; diff --git a/fs/compat.c b/fs/compat.c index b1f64786a613..7e7e5bc4f3cf 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -197,7 +197,7 @@ asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs error = user_path_walk(path, &nd); if (!error) { struct kstatfs tmp; - error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp); + error = vfs_statfs(nd.dentry, &tmp); if (!error) error = put_compat_statfs(buf, &tmp); path_release(&nd); @@ -215,7 +215,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user file = fget(fd); if (!file) goto out; - error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp); + error = vfs_statfs(file->f_dentry, &tmp); if (!error) error = put_compat_statfs(buf, &tmp); fput(file); @@ -265,7 +265,7 @@ asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, s error = user_path_walk(path, &nd); if (!error) { struct kstatfs tmp; - error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp); + error = vfs_statfs(nd.dentry, &tmp); if (!error) error = put_compat_statfs64(buf, &tmp); path_release(&nd); @@ -286,7 +286,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c file = fget(fd); if (!file) goto out; - error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp); + error = vfs_statfs(file->f_dentry, &tmp); if (!error) error = put_compat_statfs64(buf, &tmp); fput(file); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index d2c38875ab29..9eb9824dd332 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -205,38 +205,6 @@ static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); } -struct compat_dmx_event { - dmx_event_t event; - compat_time_t timeStamp; - union - { - dmx_scrambling_status_t scrambling; - } u; -}; - -static int do_dmx_get_event(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - struct dmx_event kevent; - mm_segment_t old_fs = get_fs(); - int err; - - set_fs(KERNEL_DS); - err = sys_ioctl(fd, cmd, (unsigned long) &kevent); - set_fs(old_fs); - - if (!err) { - struct compat_dmx_event __user *up = compat_ptr(arg); - - err = put_user(kevent.event, &up->event); - err |= put_user(kevent.timeStamp, &up->timeStamp); - err |= put_user(kevent.u.scrambling, &up->u.scrambling); - if (err) - err = -EFAULT; - } - - return err; -} - struct compat_video_event { int32_t type; compat_time_t timestamp; @@ -2964,7 +2932,6 @@ HANDLE_IOCTL(NCP_IOC_SETPRIVATEDATA_32, do_ncp_setprivatedata) #endif /* dvb */ -HANDLE_IOCTL(DMX_GET_EVENT, do_dmx_get_event) HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event) HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture) HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette) diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index f920d30478e5..3e5fe843e1df 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -103,10 +103,10 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct super_block *configfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int configfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, configfs_fill_super); + return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt); } static struct file_system_type configfs_fs_type = { @@ -118,7 +118,7 @@ static struct file_system_type configfs_fs_type = { int configfs_pin_fs(void) { - return simple_pin_fs("configfs", &configfs_mount, + return simple_pin_fs(&configfs_fs_type, &configfs_mount, &configfs_mnt_count); } diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 9efcc3a164e8..c45d73860803 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -181,9 +181,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i struct page *page = NULL; if (blocknr + i < devsize) { - page = read_cache_page(mapping, blocknr + i, - (filler_t *)mapping->a_ops->readpage, - NULL); + page = read_mapping_page(mapping, blocknr + i, NULL); /* synchronous error? */ if (IS_ERR(page)) page = NULL; @@ -322,8 +320,10 @@ out: return -EINVAL; } -static int cramfs_statfs(struct super_block *sb, struct kstatfs *buf) +static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; + buf->f_type = CRAMFS_MAGIC; buf->f_bsize = PAGE_CACHE_SIZE; buf->f_blocks = CRAMFS_SB(sb)->blocks; @@ -528,10 +528,11 @@ static struct super_operations cramfs_ops = { .statfs = cramfs_statfs, }; -static struct super_block *cramfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int cramfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super, + mnt); } static struct file_system_type cramfs_fs_type = { diff --git a/fs/dcache.c b/fs/dcache.c index 940d188e5d14..b85fda360533 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -359,12 +359,13 @@ restart: } /* - * Throw away a dentry - free the inode, dput the parent. - * This requires that the LRU list has already been - * removed. + * Throw away a dentry - free the inode, dput the parent. This requires that + * the LRU list has already been removed. + * * Called with dcache_lock, drops it and then regains. + * Called with dentry->d_lock held, drops it. */ -static inline void prune_one_dentry(struct dentry * dentry) +static void prune_one_dentry(struct dentry * dentry) { struct dentry * parent; @@ -382,6 +383,8 @@ static inline void prune_one_dentry(struct dentry * dentry) /** * prune_dcache - shrink the dcache * @count: number of entries to try and free + * @sb: if given, ignore dentries for other superblocks + * which are being unmounted. * * Shrink the dcache. This is done when we need * more memory, or simply when we need to unmount @@ -392,16 +395,29 @@ static inline void prune_one_dentry(struct dentry * dentry) * all the dentries are in use. */ -static void prune_dcache(int count) +static void prune_dcache(int count, struct super_block *sb) { spin_lock(&dcache_lock); for (; count ; count--) { struct dentry *dentry; struct list_head *tmp; + struct rw_semaphore *s_umount; cond_resched_lock(&dcache_lock); tmp = dentry_unused.prev; + if (sb) { + /* Try to find a dentry for this sb, but don't try + * too hard, if they aren't near the tail they will + * be moved down again soon + */ + int skip = count; + while (skip && tmp != &dentry_unused && + list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { + skip--; + tmp = tmp->prev; + } + } if (tmp == &dentry_unused) break; list_del_init(tmp); @@ -427,7 +443,45 @@ static void prune_dcache(int count) spin_unlock(&dentry->d_lock); continue; } - prune_one_dentry(dentry); + /* + * If the dentry is not DCACHED_REFERENCED, it is time + * to remove it from the dcache, provided the super block is + * NULL (which means we are trying to reclaim memory) + * or this dentry belongs to the same super block that + * we want to shrink. + */ + /* + * If this dentry is for "my" filesystem, then I can prune it + * without taking the s_umount lock (I already hold it). + */ + if (sb && dentry->d_sb == sb) { + prune_one_dentry(dentry); + continue; + } + /* + * ...otherwise we need to be sure this filesystem isn't being + * unmounted, otherwise we could race with + * generic_shutdown_super(), and end up holding a reference to + * an inode while the filesystem is unmounted. + * So we try to get s_umount, and make sure s_root isn't NULL. + * (Take a local copy of s_umount to avoid a use-after-free of + * `dentry'). + */ + s_umount = &dentry->d_sb->s_umount; + if (down_read_trylock(s_umount)) { + if (dentry->d_sb->s_root != NULL) { + prune_one_dentry(dentry); + up_read(s_umount); + continue; + } + up_read(s_umount); + } + spin_unlock(&dentry->d_lock); + /* Cannot remove the first dentry, and it isn't appropriate + * to move it to the head of the list, so give up, and try + * later + */ + break; } spin_unlock(&dcache_lock); } @@ -630,46 +684,7 @@ void shrink_dcache_parent(struct dentry * parent) int found; while ((found = select_parent(parent)) != 0) - prune_dcache(found); -} - -/** - * shrink_dcache_anon - further prune the cache - * @head: head of d_hash list of dentries to prune - * - * Prune the dentries that are anonymous - * - * parsing d_hash list does not hlist_for_each_entry_rcu() as it - * done under dcache_lock. - * - */ -void shrink_dcache_anon(struct hlist_head *head) -{ - struct hlist_node *lp; - int found; - do { - found = 0; - spin_lock(&dcache_lock); - hlist_for_each(lp, head) { - struct dentry *this = hlist_entry(lp, struct dentry, d_hash); - if (!list_empty(&this->d_lru)) { - dentry_stat.nr_unused--; - list_del_init(&this->d_lru); - } - - /* - * move only zero ref count dentries to the end - * of the unused list for prune_dcache - */ - if (!atomic_read(&this->d_count)) { - list_add_tail(&this->d_lru, &dentry_unused); - dentry_stat.nr_unused++; - found++; - } - } - spin_unlock(&dcache_lock); - prune_dcache(found); - } while(found); + prune_dcache(found, parent->d_sb); } /* @@ -689,7 +704,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask) if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; - prune_dcache(nr); + prune_dcache(nr, NULL); } return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b55b4ea9a676..6fa1e04f8415 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -111,11 +111,11 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent) return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); } -static struct super_block *debug_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int debug_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, debug_fill_super); + return get_sb_single(fs_type, flags, data, debug_fill_super, mnt); } static struct file_system_type debug_fs_type = { @@ -199,7 +199,7 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode, pr_debug("debugfs: creating file '%s'\n",name); - error = simple_pin_fs("debugfs", &debugfs_mount, &debugfs_mount_count); + error = simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); if (error) goto exit; diff --git a/fs/devfs/base.c b/fs/devfs/base.c index 52f5059c4f31..51a97f132745 100644 --- a/fs/devfs/base.c +++ b/fs/devfs/base.c @@ -2549,11 +2549,11 @@ static int devfs_fill_super(struct super_block *sb, void *data, int silent) return -EINVAL; } /* End Function devfs_fill_super */ -static struct super_block *devfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int devfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, devfs_fill_super); + return get_sb_single(fs_type, flags, data, devfs_fill_super, mnt); } static struct file_system_type devfs_fs_type = { diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 14c5620b5cab..f7aef5bb584a 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -130,10 +130,10 @@ fail: return -ENOMEM; } -static struct super_block *devpts_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int devpts_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, devpts_fill_super); + return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt); } static struct file_system_type devpts_fs_type = { diff --git a/fs/direct-io.c b/fs/direct-io.c index b05d1b218776..538fb0418fba 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -162,7 +162,7 @@ static int dio_refill_pages(struct dio *dio) NULL); /* vmas */ up_read(¤t->mm->mmap_sem); - if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) { + if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { struct page *page = ZERO_PAGE(dio->curr_user_address); /* * A memory fault, but the filesystem has some outstanding @@ -535,7 +535,7 @@ static int get_more_blocks(struct dio *dio) map_bh->b_state = 0; map_bh->b_size = fs_count << dio->inode->i_blkbits; - create = dio->rw == WRITE; + create = dio->rw & WRITE; if (dio->lock_type == DIO_LOCKING) { if (dio->block_in_file < (i_size_read(dio->inode) >> dio->blkbits)) @@ -867,7 +867,7 @@ do_holes: loff_t i_size_aligned; /* AKPM: eargh, -ENOTBLK is a hack */ - if (dio->rw == WRITE) { + if (dio->rw & WRITE) { page_cache_release(page); return -ENOTBLK; } @@ -1045,7 +1045,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, } } /* end iovec loop */ - if (ret == -ENOTBLK && rw == WRITE) { + if (ret == -ENOTBLK && (rw & WRITE)) { /* * The remaining part of the request will be * be handled by buffered I/O when we return @@ -1089,7 +1089,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, if (dio->is_async) { int should_wait = 0; - if (dio->result < dio->size && rw == WRITE) { + if (dio->result < dio->size && (rw & WRITE)) { dio->waiter = current; should_wait = 1; } @@ -1142,7 +1142,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ret = transferred; /* We could have also come here on an AIO file extend */ - if (!is_sync_kiocb(iocb) && rw == WRITE && + if (!is_sync_kiocb(iocb) && (rw & WRITE) && ret >= 0 && dio->result == dio->size) /* * For AIO writes where we have completed the @@ -1194,7 +1194,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, int acquire_i_mutex = 0; if (rw & WRITE) - current->flags |= PF_SYNCWRITE; + rw = WRITE_SYNC; if (bdev) bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); @@ -1270,7 +1270,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, * even for AIO, we need to wait for i/o to complete before * returning in this case. */ - dio->is_async = !is_sync_kiocb(iocb) && !((rw == WRITE) && + dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && (end > i_size_read(inode))); retval = direct_io_worker(rw, iocb, inode, iov, offset, @@ -1284,8 +1284,6 @@ out: mutex_unlock(&inode->i_mutex); else if (acquire_i_mutex) mutex_lock(&inode->i_mutex); - if (rw & WRITE) - current->flags &= ~PF_SYNCWRITE; return retval; } EXPORT_SYMBOL(__blockdev_direct_IO); diff --git a/fs/efs/super.c b/fs/efs/super.c index dff623e3ddbf..8ac2462ae5dd 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -15,13 +15,13 @@ #include <linux/buffer_head.h> #include <linux/vfs.h> -static int efs_statfs(struct super_block *s, struct kstatfs *buf); +static int efs_statfs(struct dentry *dentry, struct kstatfs *buf); static int efs_fill_super(struct super_block *s, void *d, int silent); -static struct super_block *efs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int efs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt); } static struct file_system_type efs_fs_type = { @@ -322,8 +322,8 @@ out_no_fs: return -EINVAL; } -static int efs_statfs(struct super_block *s, struct kstatfs *buf) { - struct efs_sb_info *sb = SUPER_INFO(s); +static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct efs_sb_info *sb = SUPER_INFO(dentry->d_sb); buf->f_type = EFS_SUPER_MAGIC; /* efs magic number */ buf->f_bsize = EFS_BLOCKSIZE; /* blocksize */ diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 2695337d4d64..9c677bbd0b08 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1,6 +1,6 @@ /* * fs/eventpoll.c ( Efficent event polling implementation ) - * Copyright (C) 2001,...,2003 Davide Libenzi + * Copyright (C) 2001,...,2006 Davide Libenzi * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -268,9 +268,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout); static int eventpollfs_delete_dentry(struct dentry *dentry); static struct inode *ep_eventpoll_inode(void); -static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data); +static int eventpollfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt); /* * This semaphore is used to serialize ep_free() and eventpoll_release_file(). @@ -1004,7 +1004,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) - wake_up(&ep->wq); + __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE); if (waitqueue_active(&ep->poll_wait)) pwake++; } @@ -1083,7 +1083,8 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) - wake_up(&ep->wq); + __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | + TASK_INTERRUPTIBLE); if (waitqueue_active(&ep->poll_wait)) pwake++; } @@ -1260,7 +1261,8 @@ is_linked: * wait list. */ if (waitqueue_active(&ep->wq)) - wake_up(&ep->wq); + __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | + TASK_INTERRUPTIBLE); if (waitqueue_active(&ep->poll_wait)) pwake++; @@ -1444,7 +1446,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist) * wait list. */ if (waitqueue_active(&ep->wq)) - wake_up(&ep->wq); + __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | + TASK_INTERRUPTIBLE); if (waitqueue_active(&ep->poll_wait)) pwake++; } @@ -1516,7 +1519,7 @@ retry: * ep_poll_callback() when events will become available. */ init_waitqueue_entry(&wait, current); - add_wait_queue(&ep->wq, &wait); + __add_wait_queue(&ep->wq, &wait); for (;;) { /* @@ -1536,7 +1539,7 @@ retry: jtimeout = schedule_timeout(jtimeout); write_lock_irqsave(&ep->lock, flags); } - remove_wait_queue(&ep->wq, &wait); + __remove_wait_queue(&ep->wq, &wait); set_current_state(TASK_RUNNING); } @@ -1595,11 +1598,12 @@ eexit_1: } -static struct super_block * +static int eventpollfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC); + return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC, + mnt); } diff --git a/fs/exec.c b/fs/exec.c index d07858c0b7c4..0b88bf646143 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -866,7 +866,6 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ /* This is the point of no return */ - steal_locks(files); put_files_struct(files); current->sas_ss_sp = current->sas_ss_size = 0; diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile index c5d02da73bc3..e0b2b43c1fdb 100644 --- a/fs/ext2/Makefile +++ b/fs/ext2/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_EXT2_FS) += ext2.o -ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ +ext2-y := balloc.o dir.o file.o fsync.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 2c00953d4b0b..433a213a8bd9 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -521,6 +521,26 @@ io_error: goto out_release; } +#ifdef EXT2FS_DEBUG + +static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; + +unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) +{ + unsigned int i; + unsigned long sum = 0; + + if (!map) + return (0); + for (i = 0; i < numchars; i++) + sum += nibblemap[map->b_data[i] & 0xf] + + nibblemap[(map->b_data[i] >> 4) & 0xf]; + return (sum); +} + +#endif /* EXT2FS_DEBUG */ + +/* Superblock must be locked */ unsigned long ext2_count_free_blocks (struct super_block * sb) { struct ext2_group_desc * desc; @@ -530,7 +550,6 @@ unsigned long ext2_count_free_blocks (struct super_block * sb) unsigned long bitmap_count, x; struct ext2_super_block *es; - lock_super (sb); es = EXT2_SB(sb)->s_es; desc_count = 0; bitmap_count = 0; @@ -554,7 +573,6 @@ unsigned long ext2_count_free_blocks (struct super_block * sb) printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n", (long)le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count); - unlock_super (sb); return bitmap_count; #else for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c deleted file mode 100644 index e9983a0dd396..000000000000 --- a/fs/ext2/bitmap.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * linux/fs/ext2/bitmap.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - */ - -#ifdef EXT2FS_DEBUG - -#include <linux/buffer_head.h> - -#include "ext2.h" - -static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; - -unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) -{ - unsigned int i; - unsigned long sum = 0; - - if (!map) - return (0); - for (i = 0; i < numchars; i++) - sum += nibblemap[map->b_data[i] & 0xf] + - nibblemap[(map->b_data[i] >> 4) & 0xf]; - return (sum); -} - -#endif /* EXT2FS_DEBUG */ - diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index d672aa9f4061..92ea8265d7d5 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -159,8 +159,7 @@ fail: static struct page * ext2_get_page(struct inode *dir, unsigned long n) { struct address_space *mapping = dir->i_mapping; - struct page *page = read_cache_page(mapping, n, - (filler_t*)mapping->a_ops->readpage, NULL); + struct page *page = read_mapping_page(mapping, n, NULL); if (!IS_ERR(page)) { wait_on_page_locked(page); kmap(page); @@ -400,8 +399,7 @@ ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry) de = ext2_find_entry (dir, dentry, &page); if (de) { res = le32_to_cpu(de->inode); - kunmap(page); - page_cache_release(page); + ext2_put_page(page); } return res; } diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c index c9c2e5ffa48e..7806b9e8155b 100644 --- a/fs/ext2/fsync.c +++ b/fs/ext2/fsync.c @@ -24,7 +24,7 @@ #include "ext2.h" #include <linux/smp_lock.h> -#include <linux/buffer_head.h> /* for fsync_inode_buffers() */ +#include <linux/buffer_head.h> /* for sync_mapping_buffers() */ /* diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index e52765219e16..308c252568c6 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -638,6 +638,7 @@ fail: return ERR_PTR(err); } +/* Superblock must be locked */ unsigned long ext2_count_free_inodes (struct super_block * sb) { struct ext2_group_desc *desc; @@ -649,7 +650,6 @@ unsigned long ext2_count_free_inodes (struct super_block * sb) unsigned long bitmap_count = 0; struct buffer_head *bitmap_bh = NULL; - lock_super (sb); es = EXT2_SB(sb)->s_es; for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { unsigned x; @@ -672,7 +672,6 @@ unsigned long ext2_count_free_inodes (struct super_block * sb) printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n", percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter), desc_count, bitmap_count); - unlock_super(sb); return desc_count; #else for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7e30bae174ed..d4233b2e6436 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -39,7 +39,7 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es); static int ext2_remount (struct super_block * sb, int * flags, char * data); -static int ext2_statfs (struct super_block * sb, struct kstatfs * buf); +static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); void ext2_error (struct super_block * sb, const char * function, const char * fmt, ...) @@ -834,9 +834,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) printk ("EXT2-fs: not enough memory\n"); goto failed_mount; } - percpu_counter_init(&sbi->s_freeblocks_counter); - percpu_counter_init(&sbi->s_freeinodes_counter); - percpu_counter_init(&sbi->s_dirs_counter); bgl_lock_init(&sbi->s_blockgroup_lock); sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts), GFP_KERNEL); @@ -857,12 +854,18 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) } if (!ext2_check_descriptors (sb)) { printk ("EXT2-fs: group descriptors corrupted!\n"); - db_count = i; goto failed_mount2; } sbi->s_gdb_count = db_count; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); + + percpu_counter_init(&sbi->s_freeblocks_counter, + ext2_count_free_blocks(sb)); + percpu_counter_init(&sbi->s_freeinodes_counter, + ext2_count_free_inodes(sb)); + percpu_counter_init(&sbi->s_dirs_counter, + ext2_count_dirs(sb)); /* * set up enough so that it can read an inode */ @@ -874,24 +877,18 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) { iput(root); printk(KERN_ERR "EXT2-fs: get root inode failed\n"); - goto failed_mount2; + goto failed_mount3; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { dput(sb->s_root); sb->s_root = NULL; printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n"); - goto failed_mount2; + goto failed_mount3; } if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) ext2_warning(sb, __FUNCTION__, "mounting ext3 filesystem as ext2"); ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); - percpu_counter_mod(&sbi->s_freeblocks_counter, - ext2_count_free_blocks(sb)); - percpu_counter_mod(&sbi->s_freeinodes_counter, - ext2_count_free_inodes(sb)); - percpu_counter_mod(&sbi->s_dirs_counter, - ext2_count_dirs(sb)); return 0; cantfind_ext2: @@ -899,7 +896,10 @@ cantfind_ext2: printk("VFS: Can't find an ext2 filesystem on dev %s.\n", sb->s_id); goto failed_mount; - +failed_mount3: + percpu_counter_destroy(&sbi->s_freeblocks_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); @@ -1038,12 +1038,14 @@ restore_opts: return err; } -static int ext2_statfs (struct super_block * sb, struct kstatfs * buf) +static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) { + struct super_block *sb = dentry->d_sb; struct ext2_sb_info *sbi = EXT2_SB(sb); unsigned long overhead; int i; + lock_super(sb); if (test_opt (sb, MINIX_DF)) overhead = 0; else { @@ -1084,13 +1086,14 @@ static int ext2_statfs (struct super_block * sb, struct kstatfs * buf) buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count); buf->f_ffree = ext2_count_free_inodes (sb); buf->f_namelen = EXT2_NAME_LEN; + unlock_super(sb); return 0; } -static struct super_block *ext2_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int ext2_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt); } #ifdef CONFIG_QUOTA diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 77927d6938f6..96172e89ddc3 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -163,20 +163,19 @@ restart: #endif static int -goal_in_my_reservation(struct ext3_reserve_window *rsv, int goal, +goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, unsigned int group, struct super_block * sb) { - unsigned long group_first_block, group_last_block; + ext3_fsblk_t group_first_block, group_last_block; - group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + - group * EXT3_BLOCKS_PER_GROUP(sb); + group_first_block = ext3_group_first_block_no(sb, group); group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; if ((rsv->_rsv_start > group_last_block) || (rsv->_rsv_end < group_first_block)) return 0; - if ((goal >= 0) && ((goal + group_first_block < rsv->_rsv_start) - || (goal + group_first_block > rsv->_rsv_end))) + if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) + || (grp_goal + group_first_block > rsv->_rsv_end))) return 0; return 1; } @@ -187,7 +186,7 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, int goal, * Returns NULL if there are no windows or if all windows start after the goal. */ static struct ext3_reserve_window_node * -search_reserve_window(struct rb_root *root, unsigned long goal) +search_reserve_window(struct rb_root *root, ext3_fsblk_t goal) { struct rb_node *n = root->rb_node; struct ext3_reserve_window_node *rsv; @@ -223,7 +222,7 @@ void ext3_rsv_window_add(struct super_block *sb, { struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root; struct rb_node *node = &rsv->rsv_node; - unsigned int start = rsv->rsv_start; + ext3_fsblk_t start = rsv->rsv_start; struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; @@ -310,20 +309,20 @@ void ext3_discard_reservation(struct inode *inode) /* Free given blocks, update quota and i_blocks field */ void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb, - unsigned long block, unsigned long count, - int *pdquot_freed_blocks) + ext3_fsblk_t block, unsigned long count, + unsigned long *pdquot_freed_blocks) { struct buffer_head *bitmap_bh = NULL; struct buffer_head *gd_bh; unsigned long block_group; - unsigned long bit; + ext3_grpblk_t bit; unsigned long i; unsigned long overflow; struct ext3_group_desc * desc; struct ext3_super_block * es; struct ext3_sb_info *sbi; int err = 0, ret; - unsigned group_freed; + ext3_grpblk_t group_freed; *pdquot_freed_blocks = 0; sbi = EXT3_SB(sb); @@ -333,7 +332,7 @@ void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb, block + count > le32_to_cpu(es->s_blocks_count)) { ext3_error (sb, "ext3_free_blocks", "Freeing blocks not in datazone - " - "block = %lu, count = %lu", block, count); + "block = "E3FSBLK", count = %lu", block, count); goto error_return; } @@ -369,7 +368,7 @@ do_more: sbi->s_itb_per_group)) ext3_error (sb, "ext3_free_blocks", "Freeing blocks in system zones - " - "Block = %lu, count = %lu", + "Block = "E3FSBLK", count = %lu", block, count); /* @@ -453,7 +452,8 @@ do_more: bit + i, bitmap_bh->b_data)) { jbd_unlock_bh_state(bitmap_bh); ext3_error(sb, __FUNCTION__, - "bit already cleared for block %lu", block + i); + "bit already cleared for block "E3FSBLK, + block + i); jbd_lock_bh_state(bitmap_bh); BUFFER_TRACE(bitmap_bh, "bit already cleared"); } else { @@ -493,10 +493,10 @@ error_return: /* Free given blocks, update quota and i_blocks field */ void ext3_free_blocks(handle_t *handle, struct inode *inode, - unsigned long block, unsigned long count) + ext3_fsblk_t block, unsigned long count) { struct super_block * sb; - int dquot_freed_blocks; + unsigned long dquot_freed_blocks; sb = inode->i_sb; if (!sb) { @@ -525,7 +525,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode, * data-writes at some point, and disable it for metadata allocations or * sync-data inodes. */ -static int ext3_test_allocatable(int nr, struct buffer_head *bh) +static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh) { int ret; struct journal_head *jh = bh2jh(bh); @@ -542,11 +542,11 @@ static int ext3_test_allocatable(int nr, struct buffer_head *bh) return ret; } -static int -bitmap_search_next_usable_block(int start, struct buffer_head *bh, - int maxblocks) +static ext3_grpblk_t +bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, + ext3_grpblk_t maxblocks) { - int next; + ext3_grpblk_t next; struct journal_head *jh = bh2jh(bh); /* @@ -576,10 +576,11 @@ bitmap_search_next_usable_block(int start, struct buffer_head *bh, * the initial goal; then for a free byte somewhere in the bitmap; then * for any free bit in the bitmap. */ -static int -find_next_usable_block(int start, struct buffer_head *bh, int maxblocks) +static ext3_grpblk_t +find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, + ext3_grpblk_t maxblocks) { - int here, next; + ext3_grpblk_t here, next; char *p, *r; if (start > 0) { @@ -591,7 +592,7 @@ find_next_usable_block(int start, struct buffer_head *bh, int maxblocks) * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the * next 64-bit boundary is simple.. */ - int end_goal = (start + 63) & ~63; + ext3_grpblk_t end_goal = (start + 63) & ~63; if (end_goal > maxblocks) end_goal = maxblocks; here = ext3_find_next_zero_bit(bh->b_data, end_goal, start); @@ -628,7 +629,7 @@ find_next_usable_block(int start, struct buffer_head *bh, int maxblocks) * zero (failure). */ static inline int -claim_block(spinlock_t *lock, int block, struct buffer_head *bh) +claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh) { struct journal_head *jh = bh2jh(bh); int ret; @@ -651,19 +652,18 @@ claim_block(spinlock_t *lock, int block, struct buffer_head *bh) * new bitmap. In that case we must release write access to the old one via * ext3_journal_release_buffer(), else we'll run out of credits. */ -static int +static ext3_grpblk_t ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, - struct buffer_head *bitmap_bh, int goal, + struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal, unsigned long *count, struct ext3_reserve_window *my_rsv) { - int group_first_block, start, end; + ext3_fsblk_t group_first_block; + ext3_grpblk_t start, end; unsigned long num = 0; /* we do allocation within the reservation window if we have a window */ if (my_rsv) { - group_first_block = - le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + - group * EXT3_BLOCKS_PER_GROUP(sb); + group_first_block = ext3_group_first_block_no(sb, group); if (my_rsv->_rsv_start >= group_first_block) start = my_rsv->_rsv_start - group_first_block; else @@ -673,13 +673,13 @@ ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, if (end > EXT3_BLOCKS_PER_GROUP(sb)) /* reservation window crosses group boundary */ end = EXT3_BLOCKS_PER_GROUP(sb); - if ((start <= goal) && (goal < end)) - start = goal; + if ((start <= grp_goal) && (grp_goal < end)) + start = grp_goal; else - goal = -1; + grp_goal = -1; } else { - if (goal > 0) - start = goal; + if (grp_goal > 0) + start = grp_goal; else start = 0; end = EXT3_BLOCKS_PER_GROUP(sb); @@ -688,43 +688,43 @@ ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb)); repeat: - if (goal < 0 || !ext3_test_allocatable(goal, bitmap_bh)) { - goal = find_next_usable_block(start, bitmap_bh, end); - if (goal < 0) + if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) { + grp_goal = find_next_usable_block(start, bitmap_bh, end); + if (grp_goal < 0) goto fail_access; if (!my_rsv) { int i; - for (i = 0; i < 7 && goal > start && - ext3_test_allocatable(goal - 1, + for (i = 0; i < 7 && grp_goal > start && + ext3_test_allocatable(grp_goal - 1, bitmap_bh); - i++, goal--) + i++, grp_goal--) ; } } - start = goal; + start = grp_goal; - if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) { + if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) { /* * The block was allocated by another thread, or it was * allocated and then freed by another thread */ start++; - goal++; + grp_goal++; if (start >= end) goto fail_access; goto repeat; } num++; - goal++; - while (num < *count && goal < end - && ext3_test_allocatable(goal, bitmap_bh) - && claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) { + grp_goal++; + while (num < *count && grp_goal < end + && ext3_test_allocatable(grp_goal, bitmap_bh) + && claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) { num++; - goal++; + grp_goal++; } *count = num; - return goal - num; + return grp_goal - num; fail_access: *count = num; return -1; @@ -766,12 +766,13 @@ fail_access: static int find_next_reservable_window( struct ext3_reserve_window_node *search_head, struct ext3_reserve_window_node *my_rsv, - struct super_block * sb, int start_block, - int last_block) + struct super_block * sb, + ext3_fsblk_t start_block, + ext3_fsblk_t last_block) { struct rb_node *next; struct ext3_reserve_window_node *rsv, *prev; - int cur; + ext3_fsblk_t cur; int size = my_rsv->rsv_goal_size; /* TODO: make the start of the reservation window byte-aligned */ @@ -873,10 +874,10 @@ static int find_next_reservable_window( * * @rsv: the reservation * - * @goal: The goal (group-relative). It is where the search for a + * @grp_goal: The goal (group-relative). It is where the search for a * free reservable space should start from. - * if we have a goal(goal >0 ), then start from there, - * no goal(goal = -1), we start from the first block + * if we have a grp_goal(grp_goal >0 ), then start from there, + * no grp_goal(grp_goal = -1), we start from the first block * of the group. * * @sb: the super block @@ -885,25 +886,24 @@ static int find_next_reservable_window( * */ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, - int goal, struct super_block *sb, + ext3_grpblk_t grp_goal, struct super_block *sb, unsigned int group, struct buffer_head *bitmap_bh) { struct ext3_reserve_window_node *search_head; - int group_first_block, group_end_block, start_block; - int first_free_block; + ext3_fsblk_t group_first_block, group_end_block, start_block; + ext3_grpblk_t first_free_block; struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; unsigned long size; int ret; spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; - group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + - group * EXT3_BLOCKS_PER_GROUP(sb); + group_first_block = ext3_group_first_block_no(sb, group); group_end_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; - if (goal < 0) + if (grp_goal < 0) start_block = group_first_block; else - start_block = goal + group_first_block; + start_block = grp_goal + group_first_block; size = my_rsv->rsv_goal_size; @@ -1057,14 +1057,15 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv, * sorted double linked list should be fast. * */ -static int +static ext3_grpblk_t ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, unsigned int group, struct buffer_head *bitmap_bh, - int goal, struct ext3_reserve_window_node * my_rsv, + ext3_grpblk_t grp_goal, + struct ext3_reserve_window_node * my_rsv, unsigned long *count, int *errp) { - unsigned long group_first_block; - int ret = 0; + ext3_fsblk_t group_first_block; + ext3_grpblk_t ret = 0; int fatal; unsigned long num = *count; @@ -1090,17 +1091,16 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, */ if (my_rsv == NULL ) { ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, - goal, count, NULL); + grp_goal, count, NULL); goto out; } /* - * goal is a group relative block number (if there is a goal) - * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb) + * grp_goal is a group relative block number (if there is a goal) + * 0 < grp_goal < EXT3_BLOCKS_PER_GROUP(sb) * first block is a filesystem wide block number * first block is the block number of the first block in this group */ - group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + - group * EXT3_BLOCKS_PER_GROUP(sb); + group_first_block = ext3_group_first_block_no(sb, group); /* * Basically we will allocate a new block from inode's reservation @@ -1119,24 +1119,24 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, */ while (1) { if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || - !goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) { + !goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) { if (my_rsv->rsv_goal_size < *count) my_rsv->rsv_goal_size = *count; - ret = alloc_new_reservation(my_rsv, goal, sb, + ret = alloc_new_reservation(my_rsv, grp_goal, sb, group, bitmap_bh); if (ret < 0) break; /* failed */ - if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) - goal = -1; - } else if (goal > 0 && (my_rsv->rsv_end-goal+1) < *count) + if (!goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) + grp_goal = -1; + } else if (grp_goal > 0 && (my_rsv->rsv_end-grp_goal+1) < *count) try_to_extend_reservation(my_rsv, sb, - *count-my_rsv->rsv_end + goal - 1); + *count-my_rsv->rsv_end + grp_goal - 1); if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) || (my_rsv->rsv_end < group_first_block)) BUG(); - ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, + ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, grp_goal, &num, &my_rsv->rsv_window); if (ret >= 0) { my_rsv->rsv_alloc_hit += num; @@ -1164,7 +1164,7 @@ out: static int ext3_has_free_blocks(struct ext3_sb_info *sbi) { - int free_blocks, root_blocks; + ext3_fsblk_t free_blocks, root_blocks; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); @@ -1200,19 +1200,20 @@ int ext3_should_retry_alloc(struct super_block *sb, int *retries) * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ -int ext3_new_blocks(handle_t *handle, struct inode *inode, - unsigned long goal, unsigned long *count, int *errp) +ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, + ext3_fsblk_t goal, unsigned long *count, int *errp) { struct buffer_head *bitmap_bh = NULL; struct buffer_head *gdp_bh; int group_no; int goal_group; - int ret_block; + ext3_grpblk_t grp_target_blk; /* blockgroup relative goal block */ + ext3_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ + ext3_fsblk_t ret_block; /* filesyetem-wide allocated block */ int bgi; /* blockgroup iteration index */ - int target_block; int fatal = 0, err; int performed_allocation = 0; - int free_blocks; + ext3_grpblk_t free_blocks; /* number of free blocks in a group */ struct super_block *sb; struct ext3_group_desc *gdp; struct ext3_super_block *es; @@ -1285,16 +1286,17 @@ retry: my_rsv = NULL; if (free_blocks > 0) { - ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % + grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) % EXT3_BLOCKS_PER_GROUP(sb)); bitmap_bh = read_block_bitmap(sb, group_no); if (!bitmap_bh) goto io_error; - ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no, - bitmap_bh, ret_block, my_rsv, &num, &fatal); + grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle, + group_no, bitmap_bh, grp_target_blk, + my_rsv, &num, &fatal); if (fatal) goto out; - if (ret_block >= 0) + if (grp_alloc_blk >= 0) goto allocated; } @@ -1327,11 +1329,15 @@ retry: bitmap_bh = read_block_bitmap(sb, group_no); if (!bitmap_bh) goto io_error; - ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no, - bitmap_bh, -1, my_rsv, &num, &fatal); + /* + * try to allocate block(s) from this group, without a goal(-1). + */ + grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle, + group_no, bitmap_bh, -1, my_rsv, + &num, &fatal); if (fatal) goto out; - if (ret_block >= 0) + if (grp_alloc_blk >= 0) goto allocated; } /* @@ -1360,18 +1366,18 @@ allocated: if (fatal) goto out; - target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb) - + le32_to_cpu(es->s_first_data_block); + ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no); - if (in_range(le32_to_cpu(gdp->bg_block_bitmap), target_block, num) || - in_range(le32_to_cpu(gdp->bg_inode_bitmap), target_block, num) || - in_range(target_block, le32_to_cpu(gdp->bg_inode_table), + if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) || + in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) || + in_range(ret_block, le32_to_cpu(gdp->bg_inode_table), EXT3_SB(sb)->s_itb_per_group) || - in_range(target_block + num - 1, le32_to_cpu(gdp->bg_inode_table), + in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table), EXT3_SB(sb)->s_itb_per_group)) ext3_error(sb, "ext3_new_block", "Allocating block in system zone - " - "blocks from %u, length %lu", target_block, num); + "blocks from "E3FSBLK", length %lu", + ret_block, num); performed_allocation = 1; @@ -1380,7 +1386,7 @@ allocated: struct buffer_head *debug_bh; /* Record bitmap buffer state in the newly allocated block */ - debug_bh = sb_find_get_block(sb, target_block); + debug_bh = sb_find_get_block(sb, ret_block); if (debug_bh) { BUFFER_TRACE(debug_bh, "state when allocated"); BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); @@ -1393,24 +1399,21 @@ allocated: int i; for (i = 0; i < num; i++) { - if (ext3_test_bit(ret_block, + if (ext3_test_bit(grp_alloc_blk+i, bh2jh(bitmap_bh)->b_committed_data)) { printk("%s: block was unexpectedly set in " "b_committed_data\n", __FUNCTION__); } } } - ext3_debug("found bit %d\n", ret_block); + ext3_debug("found bit %d\n", grp_alloc_blk); spin_unlock(sb_bgl_lock(sbi, group_no)); jbd_unlock_bh_state(bitmap_bh); #endif - /* ret_block was blockgroup-relative. Now it becomes fs-relative */ - ret_block = target_block; - if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) { ext3_error(sb, "ext3_new_block", - "block(%d) >= blocks count(%d) - " + "block("E3FSBLK") >= blocks count(%d) - " "block_group = %d, es == %p ", ret_block, le32_to_cpu(es->s_blocks_count), group_no, es); goto out; @@ -1421,7 +1424,7 @@ allocated: * list of some description. We don't know in advance whether * the caller wants to use it as metadata or data. */ - ext3_debug("allocating block %d. Goal hits %d of %d.\n", + ext3_debug("allocating block %lu. Goal hits %d of %d.\n", ret_block, goal_hits, goal_attempts); spin_lock(sb_bgl_lock(sbi, group_no)); @@ -1461,23 +1464,24 @@ out: return 0; } -int ext3_new_block(handle_t *handle, struct inode *inode, - unsigned long goal, int *errp) +ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, + ext3_fsblk_t goal, int *errp) { unsigned long count = 1; return ext3_new_blocks(handle, inode, goal, &count, errp); } -unsigned long ext3_count_free_blocks(struct super_block *sb) +ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb) { - unsigned long desc_count; + ext3_fsblk_t desc_count; struct ext3_group_desc *gdp; int i; unsigned long ngroups = EXT3_SB(sb)->s_groups_count; #ifdef EXT3FS_DEBUG struct ext3_super_block *es; - unsigned long bitmap_count, x; + ext3_fsblk_t bitmap_count; + unsigned long x; struct buffer_head *bitmap_bh = NULL; es = EXT3_SB(sb)->s_es; @@ -1502,8 +1506,10 @@ unsigned long ext3_count_free_blocks(struct super_block *sb) bitmap_count += x; } brelse(bitmap_bh); - printk("ext3_count_free_blocks: stored = %u, computed = %lu, %lu\n", - le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count); + printk("ext3_count_free_blocks: stored = "E3FSBLK + ", computed = "E3FSBLK", "E3FSBLK"\n", + le32_to_cpu(es->s_free_blocks_count), + desc_count, bitmap_count); return bitmap_count; #else desc_count = 0; @@ -1520,7 +1526,7 @@ unsigned long ext3_count_free_blocks(struct super_block *sb) } static inline int -block_in_use(unsigned long block, struct super_block *sb, unsigned char *map) +block_in_use(ext3_fsblk_t block, struct super_block *sb, unsigned char *map) { return ext3_test_bit ((block - le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) % diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index dc826464f313..36546ed36a14 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -262,9 +262,11 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) int ngroups = sbi->s_groups_count; int inodes_per_group = EXT3_INODES_PER_GROUP(sb); int freei, avefreei; - int freeb, avefreeb; - int blocks_per_dir, ndirs; - int max_debt, max_dirs, min_blocks, min_inodes; + ext3_fsblk_t freeb, avefreeb; + ext3_fsblk_t blocks_per_dir; + int ndirs; + int max_debt, max_dirs, min_inodes; + ext3_grpblk_t min_blocks; int group = -1, i; struct ext3_group_desc *desc; struct buffer_head *bh; @@ -307,7 +309,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) min_inodes = avefreei - inodes_per_group / 4; min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4; - max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST); + max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST); if (max_debt * INODE_COST > inodes_per_group) max_debt = inodes_per_group / INODE_COST; if (max_debt > 255) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2edd7eec88fd..0321e1b9034a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -62,7 +62,7 @@ static int ext3_inode_is_fast_symlink(struct inode *inode) * still needs to be revoked. */ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, int blocknr) + struct buffer_head *bh, ext3_fsblk_t blocknr) { int err; @@ -407,13 +407,13 @@ no_block: * * Caller must make sure that @ind is valid and will stay that way. */ -static unsigned long ext3_find_near(struct inode *inode, Indirect *ind) +static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind) { struct ext3_inode_info *ei = EXT3_I(inode); __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; __le32 *p; - unsigned long bg_start; - unsigned long colour; + ext3_fsblk_t bg_start; + ext3_grpblk_t colour; /* Try to find previous block */ for (p = ind->p - 1; p >= start; p--) { @@ -429,8 +429,7 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind) * It is going to be referred to from the inode itself? OK, just put it * into the same cylinder group then. */ - bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + - le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); + bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group); colour = (current->pid % 16) * (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); return bg_start + colour; @@ -448,7 +447,7 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind) * stores it in *@goal and returns zero. */ -static unsigned long ext3_find_goal(struct inode *inode, long block, +static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block, Indirect chain[4], Indirect *partial) { struct ext3_block_alloc_info *block_i; @@ -516,13 +515,13 @@ static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks, * direct blocks */ static int ext3_alloc_blocks(handle_t *handle, struct inode *inode, - unsigned long goal, int indirect_blks, int blks, - unsigned long long new_blocks[4], int *err) + ext3_fsblk_t goal, int indirect_blks, int blks, + ext3_fsblk_t new_blocks[4], int *err) { int target, i; unsigned long count = 0; int index = 0; - unsigned long current_block = 0; + ext3_fsblk_t current_block = 0; int ret = 0; /* @@ -592,7 +591,7 @@ failed_out: * as described above and return 0. */ static int ext3_alloc_branch(handle_t *handle, struct inode *inode, - int indirect_blks, int *blks, unsigned long goal, + int indirect_blks, int *blks, ext3_fsblk_t goal, int *offsets, Indirect *branch) { int blocksize = inode->i_sb->s_blocksize; @@ -600,8 +599,8 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode, int err = 0; struct buffer_head *bh; int num; - unsigned long long new_blocks[4]; - unsigned long long current_block; + ext3_fsblk_t new_blocks[4]; + ext3_fsblk_t current_block; num = ext3_alloc_blocks(handle, inode, goal, indirect_blks, *blks, new_blocks, &err); @@ -688,7 +687,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, int i; int err = 0; struct ext3_block_alloc_info *block_i; - unsigned long current_block; + ext3_fsblk_t current_block; block_i = EXT3_I(inode)->i_block_alloc_info; /* @@ -795,13 +794,13 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, int offsets[4]; Indirect chain[4]; Indirect *partial; - unsigned long goal; + ext3_fsblk_t goal; int indirect_blks; int blocks_to_boundary = 0; int depth; struct ext3_inode_info *ei = EXT3_I(inode); int count = 0; - unsigned long first_block = 0; + ext3_fsblk_t first_block = 0; J_ASSERT(handle != NULL || create == 0); @@ -819,7 +818,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, count++; /*map more blocks*/ while (count < maxblocks && count <= blocks_to_boundary) { - unsigned long blk; + ext3_fsblk_t blk; if (!verify_chain(chain, partial)) { /* @@ -1759,7 +1758,7 @@ void ext3_set_aops(struct inode *inode) static int ext3_block_truncate_page(handle_t *handle, struct page *page, struct address_space *mapping, loff_t from) { - unsigned long index = from >> PAGE_CACHE_SHIFT; + ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned blocksize, iblock, length, pos; struct inode *inode = mapping->host; @@ -1960,7 +1959,7 @@ no_top: * than `count' because there can be holes in there. */ static void ext3_clear_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, unsigned long block_to_free, + struct buffer_head *bh, ext3_fsblk_t block_to_free, unsigned long count, __le32 *first, __le32 *last) { __le32 *p; @@ -2022,12 +2021,12 @@ static void ext3_free_data(handle_t *handle, struct inode *inode, struct buffer_head *this_bh, __le32 *first, __le32 *last) { - unsigned long block_to_free = 0; /* Starting block # of a run */ + ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */ unsigned long count = 0; /* Number of blocks in the run */ __le32 *block_to_free_p = NULL; /* Pointer into inode/ind corresponding to block_to_free */ - unsigned long nr; /* Current block # */ + ext3_fsblk_t nr; /* Current block # */ __le32 *p; /* Pointer into inode/ind for current block */ int err; @@ -2089,7 +2088,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, struct buffer_head *parent_bh, __le32 *first, __le32 *last, int depth) { - unsigned long nr; + ext3_fsblk_t nr; __le32 *p; if (is_handle_aborted(handle)) @@ -2113,7 +2112,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, */ if (!bh) { ext3_error(inode->i_sb, "ext3_free_branches", - "Read failure, inode=%ld, block=%ld", + "Read failure, inode=%ld, block="E3FSBLK, inode->i_ino, nr); continue; } @@ -2394,11 +2393,12 @@ out_stop: ext3_journal_stop(handle); } -static unsigned long ext3_get_inode_block(struct super_block *sb, +static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, unsigned long ino, struct ext3_iloc *iloc) { unsigned long desc, group_desc, block_group; - unsigned long offset, block; + unsigned long offset; + ext3_fsblk_t block; struct buffer_head *bh; struct ext3_group_desc * gdp; @@ -2448,7 +2448,7 @@ static unsigned long ext3_get_inode_block(struct super_block *sb, static int __ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc, int in_mem) { - unsigned long block; + ext3_fsblk_t block; struct buffer_head *bh; block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc); @@ -2459,7 +2459,8 @@ static int __ext3_get_inode_loc(struct inode *inode, if (!bh) { ext3_error (inode->i_sb, "ext3_get_inode_loc", "unable to read inode block - " - "inode=%lu, block=%lu", inode->i_ino, block); + "inode=%lu, block="E3FSBLK, + inode->i_ino, block); return -EIO; } if (!buffer_uptodate(bh)) { @@ -2540,7 +2541,7 @@ make_io: if (!buffer_uptodate(bh)) { ext3_error(inode->i_sb, "ext3_get_inode_loc", "unable to read inode block - " - "inode=%lu, block=%lu", + "inode=%lu, block="E3FSBLK, inode->i_ino, block); brelse(bh); return -EIO; diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 8c22aa9a7fbb..3a6b012d120c 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -204,7 +204,7 @@ flags_err: return 0; } case EXT3_IOC_GROUP_EXTEND: { - unsigned long n_blocks_count; + ext3_fsblk_t n_blocks_count; struct super_block *sb = inode->i_sb; int err; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index b8f5cd1e540d..d9176dba3698 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1379,7 +1379,6 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry, int dx_fallback=0; #endif unsigned blocksize; - unsigned nlen, rlen; u32 block, blocks; sb = dir->i_sb; @@ -1417,8 +1416,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry, return retval; de = (struct ext3_dir_entry_2 *) bh->b_data; de->inode = 0; - de->rec_len = cpu_to_le16(rlen = blocksize); - nlen = 0; + de->rec_len = cpu_to_le16(blocksize); return add_dirent_to_buf(handle, dentry, inode, de, bh); } diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 34b39e9a1e5a..dfd811895d8f 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -28,16 +28,16 @@ static int verify_group_input(struct super_block *sb, { struct ext3_sb_info *sbi = EXT3_SB(sb); struct ext3_super_block *es = sbi->s_es; - unsigned start = le32_to_cpu(es->s_blocks_count); - unsigned end = start + input->blocks_count; + ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count); + ext3_fsblk_t end = start + input->blocks_count; unsigned group = input->group; - unsigned itend = input->inode_table + sbi->s_itb_per_group; + ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; unsigned overhead = ext3_bg_has_super(sb, group) ? (1 + ext3_bg_num_gdb(sb, group) + le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; - unsigned metaend = start + overhead; + ext3_fsblk_t metaend = start + overhead; struct buffer_head *bh = NULL; - int free_blocks_count; + ext3_grpblk_t free_blocks_count; int err = -EINVAL; input->free_blocks_count = free_blocks_count = @@ -64,7 +64,8 @@ static int verify_group_input(struct super_block *sb, ext3_warning(sb, __FUNCTION__, "Bad blocks count %u", input->blocks_count); else if (!(bh = sb_bread(sb, end - 1))) - ext3_warning(sb, __FUNCTION__, "Cannot read last block (%u)", + ext3_warning(sb, __FUNCTION__, + "Cannot read last block ("E3FSBLK")", end - 1); else if (outside(input->block_bitmap, start, end)) ext3_warning(sb, __FUNCTION__, @@ -77,7 +78,7 @@ static int verify_group_input(struct super_block *sb, else if (outside(input->inode_table, start, end) || outside(itend - 1, start, end)) ext3_warning(sb, __FUNCTION__, - "Inode table not in group (blocks %u-%u)", + "Inode table not in group (blocks %u-"E3FSBLK")", input->inode_table, itend - 1); else if (input->inode_bitmap == input->block_bitmap) ext3_warning(sb, __FUNCTION__, @@ -85,24 +86,27 @@ static int verify_group_input(struct super_block *sb, input->block_bitmap); else if (inside(input->block_bitmap, input->inode_table, itend)) ext3_warning(sb, __FUNCTION__, - "Block bitmap (%u) in inode table (%u-%u)", + "Block bitmap (%u) in inode table (%u-"E3FSBLK")", input->block_bitmap, input->inode_table, itend-1); else if (inside(input->inode_bitmap, input->inode_table, itend)) ext3_warning(sb, __FUNCTION__, - "Inode bitmap (%u) in inode table (%u-%u)", + "Inode bitmap (%u) in inode table (%u-"E3FSBLK")", input->inode_bitmap, input->inode_table, itend-1); else if (inside(input->block_bitmap, start, metaend)) ext3_warning(sb, __FUNCTION__, - "Block bitmap (%u) in GDT table (%u-%u)", + "Block bitmap (%u) in GDT table" + " ("E3FSBLK"-"E3FSBLK")", input->block_bitmap, start, metaend - 1); else if (inside(input->inode_bitmap, start, metaend)) ext3_warning(sb, __FUNCTION__, - "Inode bitmap (%u) in GDT table (%u-%u)", + "Inode bitmap (%u) in GDT table" + " ("E3FSBLK"-"E3FSBLK")", input->inode_bitmap, start, metaend - 1); else if (inside(input->inode_table, start, metaend) || inside(itend - 1, start, metaend)) ext3_warning(sb, __FUNCTION__, - "Inode table (%u-%u) overlaps GDT table (%u-%u)", + "Inode table (%u-"E3FSBLK") overlaps" + "GDT table ("E3FSBLK"-"E3FSBLK")", input->inode_table, itend - 1, start, metaend - 1); else err = 0; @@ -112,7 +116,7 @@ static int verify_group_input(struct super_block *sb, } static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, - unsigned long blk) + ext3_fsblk_t blk) { struct buffer_head *bh; int err; @@ -163,15 +167,14 @@ static int setup_new_group_blocks(struct super_block *sb, struct ext3_new_group_data *input) { struct ext3_sb_info *sbi = EXT3_SB(sb); - unsigned long start = input->group * sbi->s_blocks_per_group + - le32_to_cpu(sbi->s_es->s_first_data_block); + ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group); int reserved_gdb = ext3_bg_has_super(sb, input->group) ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group); struct buffer_head *bh; handle_t *handle; - unsigned long block; - int bit; + ext3_fsblk_t block; + ext3_grpblk_t bit; int i; int err = 0, err2; @@ -328,7 +331,7 @@ static unsigned ext3_list_backups(struct super_block *sb, unsigned *three, static int verify_reserved_gdb(struct super_block *sb, struct buffer_head *primary) { - const unsigned long blk = primary->b_blocknr; + const ext3_fsblk_t blk = primary->b_blocknr; const unsigned long end = EXT3_SB(sb)->s_groups_count; unsigned three = 1; unsigned five = 5; @@ -340,7 +343,8 @@ static int verify_reserved_gdb(struct super_block *sb, while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ ext3_warning(sb, __FUNCTION__, - "reserved GDT %ld missing grp %d (%ld)", + "reserved GDT "E3FSBLK + " missing grp %d ("E3FSBLK")", blk, grp, grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); return -EINVAL; @@ -372,7 +376,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, struct super_block *sb = inode->i_sb; struct ext3_super_block *es = EXT3_SB(sb)->s_es; unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb); - unsigned long gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; + ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; struct buffer_head **o_group_desc, **n_group_desc; struct buffer_head *dind; int gdbackups; @@ -417,7 +421,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, data = (__u32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { ext3_warning(sb, __FUNCTION__, - "new group %u GDT block %lu not reserved", + "new group %u GDT block "E3FSBLK" not reserved", input->group, gdblock); err = -EINVAL; goto exit_dind; @@ -515,7 +519,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, struct buffer_head **primary; struct buffer_head *dind; struct ext3_iloc iloc; - unsigned long blk; + ext3_fsblk_t blk; __u32 *data, *end; int gdbackups = 0; int res, i; @@ -540,7 +544,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, for (res = 0; res < reserved_gdb; res++, blk++) { if (le32_to_cpu(*data) != blk) { ext3_warning(sb, __FUNCTION__, - "reserved block %lu not at offset %ld", + "reserved block "E3FSBLK + " not at offset %ld", blk, (long)(data - (__u32 *)dind->b_data)); err = -EINVAL; goto exit_bh; @@ -902,15 +907,16 @@ exit_put: * GDT blocks are reserved to grow to the desired size. */ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, - unsigned long n_blocks_count) + ext3_fsblk_t n_blocks_count) { - unsigned long o_blocks_count; + ext3_fsblk_t o_blocks_count; unsigned long o_groups_count; - unsigned long last; - int add; + ext3_grpblk_t last; + ext3_grpblk_t add; struct buffer_head * bh; handle_t *handle; - int err, freed_blocks; + int err; + unsigned long freed_blocks; /* We don't need to worry about locking wrt other resizers just * yet: we're going to revalidate es->s_blocks_count after @@ -919,12 +925,22 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, o_groups_count = EXT3_SB(sb)->s_groups_count; if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT3-fs: extending last group from %lu to %lu blocks\n", + printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n", o_blocks_count, n_blocks_count); if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) return 0; + if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { + printk(KERN_ERR "EXT3-fs: filesystem on %s:" + " too large to resize to %lu blocks safely\n", + sb->s_id, n_blocks_count); + if (sizeof(sector_t) < 8) + ext3_warning(sb, __FUNCTION__, + "CONFIG_LBD not enabled\n"); + return -EINVAL; + } + if (n_blocks_count < o_blocks_count) { ext3_warning(sb, __FUNCTION__, "can't shrink FS - resize aborted"); @@ -948,7 +964,8 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, if (o_blocks_count + add < n_blocks_count) ext3_warning(sb, __FUNCTION__, - "will only finish group (%lu blocks, %u new)", + "will only finish group ("E3FSBLK + " blocks, %u new)", o_blocks_count + add, add); /* See if the device is actually as big as what was requested */ @@ -991,10 +1008,10 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); sb->s_dirt = 1; unlock_super(sb); - ext3_debug("freeing blocks %ld through %ld\n", o_blocks_count, + ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, o_blocks_count + add); ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); - ext3_debug("freed blocks %ld through %ld\n", o_blocks_count, + ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count, o_blocks_count + add); if ((err = ext3_journal_stop(handle))) goto exit_put; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f8a5266ea1ff..b2891cc29db1 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -58,7 +58,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait); static const char *ext3_decode_error(struct super_block * sb, int errno, char nbuf[16]); static int ext3_remount (struct super_block * sb, int * flags, char * data); -static int ext3_statfs (struct super_block * sb, struct kstatfs * buf); +static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); static void ext3_unlockfs(struct super_block *sb); static void ext3_write_super (struct super_block * sb); static void ext3_write_super_lockfs(struct super_block *sb); @@ -499,20 +499,21 @@ static void ext3_clear_inode(struct inode *inode) { struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; #ifdef CONFIG_EXT3_FS_POSIX_ACL - if (EXT3_I(inode)->i_acl && - EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) { - posix_acl_release(EXT3_I(inode)->i_acl); - EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED; - } - if (EXT3_I(inode)->i_default_acl && - EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) { - posix_acl_release(EXT3_I(inode)->i_default_acl); - EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED; - } + if (EXT3_I(inode)->i_acl && + EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) { + posix_acl_release(EXT3_I(inode)->i_acl); + EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED; + } + if (EXT3_I(inode)->i_default_acl && + EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) { + posix_acl_release(EXT3_I(inode)->i_default_acl); + EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED; + } #endif ext3_discard_reservation(inode); EXT3_I(inode)->i_block_alloc_info = NULL; - kfree(rsv); + if (unlikely(rsv)) + kfree(rsv); } static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) @@ -688,14 +689,15 @@ static match_table_t tokens = { {Opt_resize, "resize"}, }; -static unsigned long get_sb_block(void **data) +static ext3_fsblk_t get_sb_block(void **data) { - unsigned long sb_block; + ext3_fsblk_t sb_block; char *options = (char *) *data; if (!options || strncmp(options, "sb=", 3) != 0) return 1; /* Default location */ options += 3; + /*todo: use simple_strtoll with >32bit ext3 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { printk("EXT3-fs: Invalid sb specification: %s\n", @@ -710,7 +712,7 @@ static unsigned long get_sb_block(void **data) static int parse_options (char *options, struct super_block *sb, unsigned long *inum, unsigned long *journal_devnum, - unsigned long *n_blocks_count, int is_remount) + ext3_fsblk_t *n_blocks_count, int is_remount) { struct ext3_sb_info *sbi = EXT3_SB(sb); char * p; @@ -1127,7 +1129,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, static int ext3_check_descriptors (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); - unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block); + ext3_fsblk_t block = le32_to_cpu(sbi->s_es->s_first_data_block); struct ext3_group_desc * gdp = NULL; int desc_block = 0; int i; @@ -1314,15 +1316,14 @@ static loff_t ext3_max_size(int bits) return res; } -static unsigned long descriptor_loc(struct super_block *sb, - unsigned long logic_sb_block, +static ext3_fsblk_t descriptor_loc(struct super_block *sb, + ext3_fsblk_t logic_sb_block, int nr) { struct ext3_sb_info *sbi = EXT3_SB(sb); - unsigned long bg, first_data_block, first_meta_bg; + unsigned long bg, first_meta_bg; int has_super = 0; - first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block); first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || @@ -1331,7 +1332,7 @@ static unsigned long descriptor_loc(struct super_block *sb, bg = sbi->s_desc_per_block * nr; if (ext3_bg_has_super(sb, bg)) has_super = 1; - return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); + return (has_super + ext3_group_first_block_no(sb, bg)); } @@ -1340,9 +1341,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) struct buffer_head * bh; struct ext3_super_block *es = NULL; struct ext3_sb_info *sbi; - unsigned long block; - unsigned long sb_block = get_sb_block(&data); - unsigned long logic_sb_block; + ext3_fsblk_t block; + ext3_fsblk_t sb_block = get_sb_block(&data); + ext3_fsblk_t logic_sb_block; unsigned long offset = 0; unsigned long journal_inum = 0; unsigned long journal_devnum = 0; @@ -1564,6 +1565,16 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; } + if (le32_to_cpu(es->s_blocks_count) > + (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { + printk(KERN_ERR "EXT3-fs: filesystem on %s:" + " too large to mount safely\n", sb->s_id); + if (sizeof(sector_t) < 8) + printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not " + "enabled\n"); + goto failed_mount; + } + if (EXT3_BLOCKS_PER_GROUP(sb) == 0) goto cantfind_ext3; sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - @@ -1579,9 +1590,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; } - percpu_counter_init(&sbi->s_freeblocks_counter); - percpu_counter_init(&sbi->s_freeinodes_counter); - percpu_counter_init(&sbi->s_dirs_counter); bgl_lock_init(&sbi->s_blockgroup_lock); for (i = 0; i < db_count; i++) { @@ -1595,12 +1603,20 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) } } if (!ext3_check_descriptors (sb)) { - printk (KERN_ERR "EXT3-fs: group descriptors corrupted !\n"); + printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n"); goto failed_mount2; } sbi->s_gdb_count = db_count; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); + + percpu_counter_init(&sbi->s_freeblocks_counter, + ext3_count_free_blocks(sb)); + percpu_counter_init(&sbi->s_freeinodes_counter, + ext3_count_free_inodes(sb)); + percpu_counter_init(&sbi->s_dirs_counter, + ext3_count_dirs(sb)); + /* per fileystem reservation list head & lock */ spin_lock_init(&sbi->s_rsv_window_lock); sbi->s_rsv_window_root = RB_ROOT; @@ -1639,16 +1655,16 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) if (!test_opt(sb, NOLOAD) && EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext3_load_journal(sb, es, journal_devnum)) - goto failed_mount2; + goto failed_mount3; } else if (journal_inum) { if (ext3_create_journal(sb, es, journal_inum)) - goto failed_mount2; + goto failed_mount3; } else { if (!silent) printk (KERN_ERR "ext3: No journal on filesystem on %s\n", sb->s_id); - goto failed_mount2; + goto failed_mount3; } /* We have now updated the journal if required, so we can @@ -1671,7 +1687,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { printk(KERN_ERR "EXT3-fs: Journal does not support " "requested data journaling mode\n"); - goto failed_mount3; + goto failed_mount4; } default: break; @@ -1694,13 +1710,13 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) if (!sb->s_root) { printk(KERN_ERR "EXT3-fs: get root inode failed\n"); iput(root); - goto failed_mount3; + goto failed_mount4; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { dput(sb->s_root); sb->s_root = NULL; printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n"); - goto failed_mount3; + goto failed_mount4; } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -1723,13 +1739,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": "writeback"); - percpu_counter_mod(&sbi->s_freeblocks_counter, - ext3_count_free_blocks(sb)); - percpu_counter_mod(&sbi->s_freeinodes_counter, - ext3_count_free_inodes(sb)); - percpu_counter_mod(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); - lock_kernel(); return 0; @@ -1739,8 +1748,12 @@ cantfind_ext3: sb->s_id); goto failed_mount; -failed_mount3: +failed_mount4: journal_destroy(sbi->s_journal); +failed_mount3: + percpu_counter_destroy(&sbi->s_freeblocks_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); @@ -1827,10 +1840,10 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, { struct buffer_head * bh; journal_t *journal; - int start; - int len; + ext3_fsblk_t start; + ext3_fsblk_t len; int hblock, blocksize; - unsigned long sb_block; + ext3_fsblk_t sb_block; unsigned long offset; struct ext3_super_block * es; struct block_device *bdev; @@ -2203,7 +2216,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) { struct ext3_super_block * es; struct ext3_sb_info *sbi = EXT3_SB(sb); - unsigned long n_blocks_count = 0; + ext3_fsblk_t n_blocks_count = 0; unsigned long old_sb_flags; struct ext3_mount_options old_opts; int err; @@ -2318,11 +2331,12 @@ restore_opts: return err; } -static int ext3_statfs (struct super_block * sb, struct kstatfs * buf) +static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) { + struct super_block *sb = dentry->d_sb; struct ext3_sb_info *sbi = EXT3_SB(sb); struct ext3_super_block *es = sbi->s_es; - unsigned long overhead; + ext3_fsblk_t overhead; int i; if (test_opt (sb, MINIX_DF)) @@ -2646,10 +2660,10 @@ out: #endif -static struct super_block *ext3_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int ext3_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt); } static struct file_system_type ext3_fs_type = { diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index e8d60bf6b7df..a44a0562203a 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -225,7 +225,7 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, error = -ENODATA; if (!EXT3_I(inode)->i_file_acl) goto cleanup; - ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); + ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); if (!bh) goto cleanup; @@ -233,7 +233,7 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext3_xattr_check_block(bh)) { bad_block: ext3_error(inode->i_sb, __FUNCTION__, - "inode %ld: bad block %d", inode->i_ino, + "inode %ld: bad block "E3FSBLK, inode->i_ino, EXT3_I(inode)->i_file_acl); error = -EIO; goto cleanup; @@ -366,7 +366,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) error = 0; if (!EXT3_I(inode)->i_file_acl) goto cleanup; - ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); + ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); error = -EIO; if (!bh) @@ -375,7 +375,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext3_xattr_check_block(bh)) { ext3_error(inode->i_sb, __FUNCTION__, - "inode %ld: bad block %d", inode->i_ino, + "inode %ld: bad block "E3FSBLK, inode->i_ino, EXT3_I(inode)->i_file_acl); error = -EIO; goto cleanup; @@ -647,7 +647,7 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i, le32_to_cpu(BHDR(bs->bh)->h_refcount)); if (ext3_xattr_check_block(bs->bh)) { ext3_error(sb, __FUNCTION__, - "inode %ld: bad block %d", inode->i_ino, + "inode %ld: bad block "E3FSBLK, inode->i_ino, EXT3_I(inode)->i_file_acl); error = -EIO; goto cleanup; @@ -792,11 +792,12 @@ inserted: get_bh(new_bh); } else { /* We need to allocate a new block */ - int goal = le32_to_cpu( + ext3_fsblk_t goal = le32_to_cpu( EXT3_SB(sb)->s_es->s_first_data_block) + - EXT3_I(inode)->i_block_group * + (ext3_fsblk_t)EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); - int block = ext3_new_block(handle, inode, goal, &error); + ext3_fsblk_t block = ext3_new_block(handle, inode, + goal, &error); if (error) goto cleanup; ea_idebug(inode, "creating block %d", block); @@ -847,7 +848,7 @@ cleanup_dquot: bad_block: ext3_error(inode->i_sb, __FUNCTION__, - "inode %ld: bad block %d", inode->i_ino, + "inode %ld: bad block "E3FSBLK, inode->i_ino, EXT3_I(inode)->i_file_acl); goto cleanup; @@ -1076,14 +1077,14 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); if (!bh) { ext3_error(inode->i_sb, __FUNCTION__, - "inode %ld: block %d read error", inode->i_ino, + "inode %ld: block "E3FSBLK" read error", inode->i_ino, EXT3_I(inode)->i_file_acl); goto cleanup; } if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) { ext3_error(inode->i_sb, __FUNCTION__, - "inode %ld: bad block %d", inode->i_ino, + "inode %ld: bad block "E3FSBLK, inode->i_ino, EXT3_I(inode)->i_file_acl); goto cleanup; } @@ -1210,11 +1211,11 @@ again: bh = sb_bread(inode->i_sb, ce->e_block); if (!bh) { ext3_error(inode->i_sb, __FUNCTION__, - "inode %ld: block %ld read error", + "inode %ld: block %lu read error", inode->i_ino, (unsigned long) ce->e_block); } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= EXT3_XATTR_REFCOUNT_MAX) { - ea_idebug(inode, "block %ld refcount %d>=%d", + ea_idebug(inode, "block %lu refcount %d>=%d", (unsigned long) ce->e_block, le32_to_cpu(BHDR(bh)->h_refcount), EXT3_XATTR_REFCOUNT_MAX); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index c1ce284f8a94..7c35d582ec10 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -539,18 +539,18 @@ static int fat_remount(struct super_block *sb, int *flags, char *data) return 0; } -static int fat_statfs(struct super_block *sb, struct kstatfs *buf) +static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct msdos_sb_info *sbi = MSDOS_SB(sb); + struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); /* If the count of free cluster is still unknown, counts it here. */ if (sbi->free_clusters == -1) { - int err = fat_count_free_clusters(sb); + int err = fat_count_free_clusters(dentry->d_sb); if (err) return err; } - buf->f_type = sb->s_magic; + buf->f_type = dentry->d_sb->s_magic; buf->f_bsize = sbi->cluster_size; buf->f_blocks = sbi->max_cluster - FAT_START_ENT; buf->f_bfree = sbi->free_clusters; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 944652e9dde1..308f2b6b5026 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -210,4 +210,3 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) return err; } -EXPORT_SYMBOL_GPL(fat_sync_bhs); diff --git a/fs/file_table.c b/fs/file_table.c index bcea1998b4de..506d5307108d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -300,5 +300,5 @@ void __init files_init(unsigned long mempages) if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); - percpu_counter_init(&nr_files); + percpu_counter_init(&nr_files, 0); } diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h index 583bd78086d8..d35979a58743 100644 --- a/fs/freevxfs/vxfs.h +++ b/fs/freevxfs/vxfs.h @@ -159,11 +159,11 @@ struct vxfs_sb { * In core superblock filesystem private data for VxFS. */ struct vxfs_sb_info { - struct vxfs_sb *vsi_raw; /* raw (on disk) supeblock */ + struct vxfs_sb *vsi_raw; /* raw (on disk) superblock */ struct buffer_head *vsi_bp; /* buffer for raw superblock*/ struct inode *vsi_fship; /* fileset header inode */ struct inode *vsi_ilist; /* inode list inode */ - struct inode *vsi_stilist; /* structual inode list inode */ + struct inode *vsi_stilist; /* structural inode list inode */ u_long vsi_iext; /* initial inode list */ ino_t vsi_fshino; /* fileset header inode */ daddr_t vsi_oltext; /* OLT extent */ diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c index 6dee109aeea4..78948b4b1894 100644 --- a/fs/freevxfs/vxfs_fshead.c +++ b/fs/freevxfs/vxfs_fshead.c @@ -112,7 +112,7 @@ vxfs_read_fshead(struct super_block *sbp) vip = vxfs_blkiget(sbp, infp->vsi_iext, infp->vsi_fshino); if (!vip) { - printk(KERN_ERR "vxfs: unabled to read fsh inode\n"); + printk(KERN_ERR "vxfs: unable to read fsh inode\n"); return -EINVAL; } if (!VXFS_ISFSH(vip)) { @@ -129,13 +129,13 @@ vxfs_read_fshead(struct super_block *sbp) infp->vsi_fship = vxfs_get_fake_inode(sbp, vip); if (!infp->vsi_fship) { - printk(KERN_ERR "vxfs: unabled to get fsh inode\n"); + printk(KERN_ERR "vxfs: unable to get fsh inode\n"); goto out_free_fship; } sfp = vxfs_getfsh(infp->vsi_fship, 0); if (!sfp) { - printk(KERN_ERR "vxfs: unabled to get structural fsh\n"); + printk(KERN_ERR "vxfs: unable to get structural fsh\n"); goto out_iput_fship; } @@ -145,7 +145,7 @@ vxfs_read_fshead(struct super_block *sbp) pfp = vxfs_getfsh(infp->vsi_fship, 1); if (!pfp) { - printk(KERN_ERR "vxfs: unabled to get primary fsh\n"); + printk(KERN_ERR "vxfs: unable to get primary fsh\n"); goto out_free_sfp; } @@ -159,7 +159,7 @@ vxfs_read_fshead(struct super_block *sbp) infp->vsi_stilist = vxfs_get_fake_inode(sbp, tip); if (!infp->vsi_stilist) { - printk(KERN_ERR "vxfs: unabled to get structual list inode\n"); + printk(KERN_ERR "vxfs: unable to get structural list inode\n"); kfree(tip); goto out_free_pfp; } @@ -174,7 +174,7 @@ vxfs_read_fshead(struct super_block *sbp) goto out_iput_stilist; infp->vsi_ilist = vxfs_get_fake_inode(sbp, tip); if (!infp->vsi_ilist) { - printk(KERN_ERR "vxfs: unabled to get inode list inode\n"); + printk(KERN_ERR "vxfs: unable to get inode list inode\n"); kfree(tip); goto out_iput_stilist; } diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c index 50aae77651b2..c1be118fc067 100644 --- a/fs/freevxfs/vxfs_subr.c +++ b/fs/freevxfs/vxfs_subr.c @@ -71,8 +71,7 @@ vxfs_get_page(struct address_space *mapping, u_long n) { struct page * pp; - pp = read_cache_page(mapping, n, - (filler_t*)mapping->a_ops->readpage, NULL); + pp = read_mapping_page(mapping, n, NULL); if (!IS_ERR(pp)) { wait_on_page_locked(pp); diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index b44c916d24a1..b74b791fc23b 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -40,6 +40,7 @@ #include <linux/slab.h> #include <linux/stat.h> #include <linux/vfs.h> +#include <linux/mount.h> #include "vxfs.h" #include "vxfs_extern.h" @@ -55,7 +56,7 @@ MODULE_ALIAS("vxfs"); /* makes mount -t vxfs autoload the module */ static void vxfs_put_super(struct super_block *); -static int vxfs_statfs(struct super_block *, struct kstatfs *); +static int vxfs_statfs(struct dentry *, struct kstatfs *); static int vxfs_remount(struct super_block *, int *, char *); static struct super_operations vxfs_super_ops = { @@ -90,12 +91,12 @@ vxfs_put_super(struct super_block *sbp) /** * vxfs_statfs - get filesystem information - * @sbp: VFS superblock + * @dentry: VFS dentry to locate superblock * @bufp: output buffer * * Description: * vxfs_statfs fills the statfs buffer @bufp with information - * about the filesystem described by @sbp. + * about the filesystem described by @dentry. * * Returns: * Zero. @@ -107,12 +108,12 @@ vxfs_put_super(struct super_block *sbp) * This is everything but complete... */ static int -vxfs_statfs(struct super_block *sbp, struct kstatfs *bufp) +vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) { - struct vxfs_sb_info *infp = VXFS_SBI(sbp); + struct vxfs_sb_info *infp = VXFS_SBI(dentry->d_sb); bufp->f_type = VXFS_SUPER_MAGIC; - bufp->f_bsize = sbp->s_blocksize; + bufp->f_bsize = dentry->d_sb->s_blocksize; bufp->f_blocks = infp->vsi_raw->vs_dsize; bufp->f_bfree = infp->vsi_raw->vs_free; bufp->f_bavail = 0; @@ -241,10 +242,11 @@ out: /* * The usual module blurb. */ -static struct super_block *vxfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int vxfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super, + mnt); } static struct file_system_type vxfs_fs_type = { diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f3fbe2d030f4..031b27a4bc9a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -461,6 +461,8 @@ void sync_inodes_sb(struct super_block *sb, int wait) { struct writeback_control wbc = { .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD, + .range_start = 0, + .range_end = LLONG_MAX, }; unsigned long nr_dirty = read_page_state(nr_dirty); unsigned long nr_unstable = read_page_state(nr_unstable); @@ -559,6 +561,8 @@ int write_inode_now(struct inode *inode, int sync) struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = WB_SYNC_ALL, + .range_start = 0, + .range_end = LLONG_MAX, }; if (!mapping_cap_writeback_dirty(inode->i_mapping)) @@ -619,7 +623,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int int need_write_inode_now = 0; int err2; - current->flags |= PF_SYNCWRITE; if (what & OSYNC_DATA) err = filemap_fdatawrite(mapping); if (what & (OSYNC_METADATA|OSYNC_DATA)) { @@ -632,7 +635,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int if (!err) err = err2; } - current->flags &= ~PF_SYNCWRITE; spin_lock(&inode_lock); if ((inode->i_state & I_DIRTY) && diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index c3e1f760cac9..72437065f6ad 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_FUSE_FS) += fuse.o -fuse-objs := dev.o dir.o file.o inode.o +fuse-objs := dev.o dir.o file.o inode.o control.o diff --git a/fs/fuse/control.c b/fs/fuse/control.c new file mode 100644 index 000000000000..a3bce3a77253 --- /dev/null +++ b/fs/fuse/control.c @@ -0,0 +1,218 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include "fuse_i.h" + +#include <linux/init.h> +#include <linux/module.h> + +#define FUSE_CTL_SUPER_MAGIC 0x65735543 + +/* + * This is non-NULL when the single instance of the control filesystem + * exists. Protected by fuse_mutex + */ +static struct super_block *fuse_control_sb; + +static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file) +{ + struct fuse_conn *fc; + mutex_lock(&fuse_mutex); + fc = file->f_dentry->d_inode->u.generic_ip; + if (fc) + fc = fuse_conn_get(fc); + mutex_unlock(&fuse_mutex); + return fc; +} + +static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct fuse_conn *fc = fuse_ctl_file_conn_get(file); + if (fc) { + fuse_abort_conn(fc); + fuse_conn_put(fc); + } + return count; +} + +static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + char tmp[32]; + size_t size; + + if (!*ppos) { + struct fuse_conn *fc = fuse_ctl_file_conn_get(file); + if (!fc) + return 0; + + file->private_data=(void *)(long)atomic_read(&fc->num_waiting); + fuse_conn_put(fc); + } + size = sprintf(tmp, "%ld\n", (long)file->private_data); + return simple_read_from_buffer(buf, len, ppos, tmp, size); +} + +static const struct file_operations fuse_ctl_abort_ops = { + .open = nonseekable_open, + .write = fuse_conn_abort_write, +}; + +static const struct file_operations fuse_ctl_waiting_ops = { + .open = nonseekable_open, + .read = fuse_conn_waiting_read, +}; + +static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, + struct fuse_conn *fc, + const char *name, + int mode, int nlink, + struct inode_operations *iop, + const struct file_operations *fop) +{ + struct dentry *dentry; + struct inode *inode; + + BUG_ON(fc->ctl_ndents >= FUSE_CTL_NUM_DENTRIES); + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + fc->ctl_dentry[fc->ctl_ndents++] = dentry; + inode = new_inode(fuse_control_sb); + if (!inode) + return NULL; + + inode->i_mode = mode; + inode->i_uid = fc->user_id; + inode->i_gid = fc->group_id; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + /* setting ->i_op to NULL is not allowed */ + if (iop) + inode->i_op = iop; + inode->i_fop = fop; + inode->i_nlink = nlink; + inode->u.generic_ip = fc; + d_add(dentry, inode); + return dentry; +} + +/* + * Add a connection to the control filesystem (if it exists). Caller + * must host fuse_mutex + */ +int fuse_ctl_add_conn(struct fuse_conn *fc) +{ + struct dentry *parent; + char name[32]; + + if (!fuse_control_sb) + return 0; + + parent = fuse_control_sb->s_root; + parent->d_inode->i_nlink++; + sprintf(name, "%llu", (unsigned long long) fc->id); + parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, + &simple_dir_inode_operations, + &simple_dir_operations); + if (!parent) + goto err; + + if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, + NULL, &fuse_ctl_waiting_ops) || + !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, + NULL, &fuse_ctl_abort_ops)) + goto err; + + return 0; + + err: + fuse_ctl_remove_conn(fc); + return -ENOMEM; +} + +/* + * Remove a connection from the control filesystem (if it exists). + * Caller must host fuse_mutex + */ +void fuse_ctl_remove_conn(struct fuse_conn *fc) +{ + int i; + + if (!fuse_control_sb) + return; + + for (i = fc->ctl_ndents - 1; i >= 0; i--) { + struct dentry *dentry = fc->ctl_dentry[i]; + dentry->d_inode->u.generic_ip = NULL; + d_drop(dentry); + dput(dentry); + } + fuse_control_sb->s_root->d_inode->i_nlink--; +} + +static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) +{ + struct tree_descr empty_descr = {""}; + struct fuse_conn *fc; + int err; + + err = simple_fill_super(sb, FUSE_CTL_SUPER_MAGIC, &empty_descr); + if (err) + return err; + + mutex_lock(&fuse_mutex); + BUG_ON(fuse_control_sb); + fuse_control_sb = sb; + list_for_each_entry(fc, &fuse_conn_list, entry) { + err = fuse_ctl_add_conn(fc); + if (err) { + fuse_control_sb = NULL; + mutex_unlock(&fuse_mutex); + return err; + } + } + mutex_unlock(&fuse_mutex); + + return 0; +} + +static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) +{ + return get_sb_single(fs_type, flags, raw_data, + fuse_ctl_fill_super, mnt); +} + +static void fuse_ctl_kill_sb(struct super_block *sb) +{ + mutex_lock(&fuse_mutex); + fuse_control_sb = NULL; + mutex_unlock(&fuse_mutex); + + kill_litter_super(sb); +} + +static struct file_system_type fuse_ctl_fs_type = { + .owner = THIS_MODULE, + .name = "fusectl", + .get_sb = fuse_ctl_get_sb, + .kill_sb = fuse_ctl_kill_sb, +}; + +int __init fuse_ctl_init(void) +{ + return register_filesystem(&fuse_ctl_fs_type); +} + +void fuse_ctl_cleanup(void) +{ + unregister_filesystem(&fuse_ctl_fs_type); +} diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 104a62dadb94..1e2006caf158 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -34,6 +34,7 @@ static void fuse_request_init(struct fuse_req *req) { memset(req, 0, sizeof(*req)); INIT_LIST_HEAD(&req->list); + INIT_LIST_HEAD(&req->intr_entry); init_waitqueue_head(&req->waitq); atomic_set(&req->count, 1); } @@ -64,18 +65,6 @@ static void restore_sigs(sigset_t *oldset) sigprocmask(SIG_SETMASK, oldset, NULL); } -/* - * Reset request, so that it can be reused - * - * The caller must be _very_ careful to make sure, that it is holding - * the only reference to req - */ -void fuse_reset_request(struct fuse_req *req) -{ - BUG_ON(atomic_read(&req->count) != 1); - fuse_request_init(req); -} - static void __fuse_get_request(struct fuse_req *req) { atomic_inc(&req->count); @@ -88,6 +77,13 @@ static void __fuse_put_request(struct fuse_req *req) atomic_dec(&req->count); } +static void fuse_req_init_context(struct fuse_req *req) +{ + req->in.h.uid = current->fsuid; + req->in.h.gid = current->fsgid; + req->in.h.pid = current->pid; +} + struct fuse_req *fuse_get_req(struct fuse_conn *fc) { struct fuse_req *req; @@ -103,14 +99,16 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) if (intr) goto out; + err = -ENOTCONN; + if (!fc->connected) + goto out; + req = fuse_request_alloc(); err = -ENOMEM; if (!req) goto out; - req->in.h.uid = current->fsuid; - req->in.h.gid = current->fsgid; - req->in.h.pid = current->pid; + fuse_req_init_context(req); req->waiting = 1; return req; @@ -119,142 +117,183 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) return ERR_PTR(err); } -void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) +/* + * Return request in fuse_file->reserved_req. However that may + * currently be in use. If that is the case, wait for it to become + * available. + */ +static struct fuse_req *get_reserved_req(struct fuse_conn *fc, + struct file *file) { - if (atomic_dec_and_test(&req->count)) { - if (req->waiting) - atomic_dec(&fc->num_waiting); - fuse_request_free(req); - } + struct fuse_req *req = NULL; + struct fuse_file *ff = file->private_data; + + do { + wait_event(fc->blocked_waitq, ff->reserved_req); + spin_lock(&fc->lock); + if (ff->reserved_req) { + req = ff->reserved_req; + ff->reserved_req = NULL; + get_file(file); + req->stolen_file = file; + } + spin_unlock(&fc->lock); + } while (!req); + + return req; } /* - * Called with sbput_sem held for read (request_end) or write - * (fuse_put_super). By the time fuse_put_super() is finished, all - * inodes belonging to background requests must be released, so the - * iputs have to be done within the locked region. + * Put stolen request back into fuse_file->reserved_req */ -void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req) +static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) { - iput(req->inode); - iput(req->inode2); + struct file *file = req->stolen_file; + struct fuse_file *ff = file->private_data; + spin_lock(&fc->lock); - list_del(&req->bg_entry); - if (fc->num_background == FUSE_MAX_BACKGROUND) { - fc->blocked = 0; - wake_up_all(&fc->blocked_waitq); - } - fc->num_background--; + fuse_request_init(req); + BUG_ON(ff->reserved_req); + ff->reserved_req = req; + wake_up(&fc->blocked_waitq); spin_unlock(&fc->lock); + fput(file); } /* - * This function is called when a request is finished. Either a reply - * has arrived or it was interrupted (and not yet sent) or some error - * occurred during communication with userspace, or the device file - * was closed. In case of a background request the reference to the - * stored objects are released. The requester thread is woken up (if - * still waiting), the 'end' callback is called if given, else the - * reference to the request is released + * Gets a requests for a file operation, always succeeds * - * Releasing extra reference for foreground requests must be done - * within the same locked region as setting state to finished. This - * is because fuse_reset_request() may be called after request is - * finished and it must be the sole possessor. If request is - * interrupted and put in the background, it will return with an error - * and hence never be reset and reused. + * This is used for sending the FLUSH request, which must get to + * userspace, due to POSIX locks which may need to be unlocked. * - * Called with fc->lock, unlocks it + * If allocation fails due to OOM, use the reserved request in + * fuse_file. + * + * This is very unlikely to deadlock accidentally, since the + * filesystem should not have it's own file open. If deadlock is + * intentional, it can still be broken by "aborting" the filesystem. */ -static void request_end(struct fuse_conn *fc, struct fuse_req *req) +struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) { - list_del(&req->list); - req->state = FUSE_REQ_FINISHED; - if (!req->background) { - spin_unlock(&fc->lock); - wake_up(&req->waitq); - fuse_put_request(fc, req); - } else { - void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; - req->end = NULL; - spin_unlock(&fc->lock); - down_read(&fc->sbput_sem); - if (fc->mounted) - fuse_release_background(fc, req); - up_read(&fc->sbput_sem); + struct fuse_req *req; - /* fput must go outside sbput_sem, otherwise it can deadlock */ - if (req->file) - fput(req->file); + atomic_inc(&fc->num_waiting); + wait_event(fc->blocked_waitq, !fc->blocked); + req = fuse_request_alloc(); + if (!req) + req = get_reserved_req(fc, file); - if (end) - end(fc, req); + fuse_req_init_context(req); + req->waiting = 1; + return req; +} + +void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) +{ + if (atomic_dec_and_test(&req->count)) { + if (req->waiting) + atomic_dec(&fc->num_waiting); + + if (req->stolen_file) + put_reserved_req(fc, req); else - fuse_put_request(fc, req); + fuse_request_free(req); } } /* - * Unfortunately request interruption not just solves the deadlock - * problem, it causes problems too. These stem from the fact, that an - * interrupted request is continued to be processed in userspace, - * while all the locks and object references (inode and file) held - * during the operation are released. - * - * To release the locks is exactly why there's a need to interrupt the - * request, so there's not a lot that can be done about this, except - * introduce additional locking in userspace. - * - * More important is to keep inode and file references until userspace - * has replied, otherwise FORGET and RELEASE could be sent while the - * inode/file is still used by the filesystem. - * - * For this reason the concept of "background" request is introduced. - * An interrupted request is backgrounded if it has been already sent - * to userspace. Backgrounding involves getting an extra reference to - * inode(s) or file used in the request, and adding the request to - * fc->background list. When a reply is received for a background - * request, the object references are released, and the request is - * removed from the list. If the filesystem is unmounted while there - * are still background requests, the list is walked and references - * are released as if a reply was received. + * This function is called when a request is finished. Either a reply + * has arrived or it was aborted (and not yet sent) or some error + * occurred during communication with userspace, or the device file + * was closed. The requester thread is woken up (if still waiting), + * the 'end' callback is called if given, else the reference to the + * request is released * - * There's one more use for a background request. The RELEASE message is - * always sent as background, since it doesn't return an error or - * data. + * Called with fc->lock, unlocks it */ -static void background_request(struct fuse_conn *fc, struct fuse_req *req) -{ - req->background = 1; - list_add(&req->bg_entry, &fc->background); - fc->num_background++; - if (fc->num_background == FUSE_MAX_BACKGROUND) - fc->blocked = 1; - if (req->inode) - req->inode = igrab(req->inode); - if (req->inode2) - req->inode2 = igrab(req->inode2); +static void request_end(struct fuse_conn *fc, struct fuse_req *req) +{ + void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; + req->end = NULL; + list_del(&req->list); + list_del(&req->intr_entry); + req->state = FUSE_REQ_FINISHED; + if (req->background) { + if (fc->num_background == FUSE_MAX_BACKGROUND) { + fc->blocked = 0; + wake_up_all(&fc->blocked_waitq); + } + fc->num_background--; + } + spin_unlock(&fc->lock); + dput(req->dentry); + mntput(req->vfsmount); if (req->file) - get_file(req->file); + fput(req->file); + wake_up(&req->waitq); + if (end) + end(fc, req); + else + fuse_put_request(fc, req); } -/* Called with fc->lock held. Releases, and then reacquires it. */ -static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) +static void wait_answer_interruptible(struct fuse_conn *fc, + struct fuse_req *req) { - sigset_t oldset; + if (signal_pending(current)) + return; spin_unlock(&fc->lock); - block_sigs(&oldset); wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); - restore_sigs(&oldset); spin_lock(&fc->lock); - if (req->state == FUSE_REQ_FINISHED && !req->interrupted) - return; +} + +static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) +{ + list_add_tail(&req->intr_entry, &fc->interrupts); + wake_up(&fc->waitq); + kill_fasync(&fc->fasync, SIGIO, POLL_IN); +} + +/* Called with fc->lock held. Releases, and then reacquires it. */ +static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) +{ + if (!fc->no_interrupt) { + /* Any signal may interrupt this */ + wait_answer_interruptible(fc, req); + + if (req->aborted) + goto aborted; + if (req->state == FUSE_REQ_FINISHED) + return; - if (!req->interrupted) { - req->out.h.error = -EINTR; req->interrupted = 1; + if (req->state == FUSE_REQ_SENT) + queue_interrupt(fc, req); + } + + if (req->force) { + spin_unlock(&fc->lock); + wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); + spin_lock(&fc->lock); + } else { + sigset_t oldset; + + /* Only fatal signals may interrupt this */ + block_sigs(&oldset); + wait_answer_interruptible(fc, req); + restore_sigs(&oldset); } + + if (req->aborted) + goto aborted; + if (req->state == FUSE_REQ_FINISHED) + return; + + req->out.h.error = -EINTR; + req->aborted = 1; + + aborted: if (req->locked) { /* This is uninterruptible sleep, because data is being copied to/from the buffers of req. During @@ -268,8 +307,11 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) if (req->state == FUSE_REQ_PENDING) { list_del(&req->list); __fuse_put_request(req); - } else if (req->state == FUSE_REQ_SENT) - background_request(fc, req); + } else if (req->state == FUSE_REQ_SENT) { + spin_unlock(&fc->lock); + wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); + spin_lock(&fc->lock); + } } static unsigned len_args(unsigned numargs, struct fuse_arg *args) @@ -283,13 +325,19 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args) return nbytes; } +static u64 fuse_get_unique(struct fuse_conn *fc) + { + fc->reqctr++; + /* zero is special */ + if (fc->reqctr == 0) + fc->reqctr = 1; + + return fc->reqctr; +} + static void queue_request(struct fuse_conn *fc, struct fuse_req *req) { - fc->reqctr++; - /* zero is special */ - if (fc->reqctr == 0) - fc->reqctr = 1; - req->in.h.unique = fc->reqctr; + req->in.h.unique = fuse_get_unique(fc); req->in.h.len = sizeof(struct fuse_in_header) + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); list_add_tail(&req->list, &fc->pending); @@ -302,9 +350,6 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) kill_fasync(&fc->fasync, SIGIO, POLL_IN); } -/* - * This can only be interrupted by a SIGKILL - */ void request_send(struct fuse_conn *fc, struct fuse_req *req) { req->isreply = 1; @@ -327,8 +372,12 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req) static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) { spin_lock(&fc->lock); - background_request(fc, req); if (fc->connected) { + req->background = 1; + fc->num_background++; + if (fc->num_background == FUSE_MAX_BACKGROUND) + fc->blocked = 1; + queue_request(fc, req); spin_unlock(&fc->lock); } else { @@ -352,14 +401,14 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req) /* * Lock the request. Up to the next unlock_request() there mustn't be * anything that could cause a page-fault. If the request was already - * interrupted bail out. + * aborted bail out. */ static int lock_request(struct fuse_conn *fc, struct fuse_req *req) { int err = 0; if (req) { spin_lock(&fc->lock); - if (req->interrupted) + if (req->aborted) err = -ENOENT; else req->locked = 1; @@ -369,7 +418,7 @@ static int lock_request(struct fuse_conn *fc, struct fuse_req *req) } /* - * Unlock request. If it was interrupted during being locked, the + * Unlock request. If it was aborted during being locked, the * requester thread is currently waiting for it to be unlocked, so * wake it up. */ @@ -378,7 +427,7 @@ static void unlock_request(struct fuse_conn *fc, struct fuse_req *req) if (req) { spin_lock(&fc->lock); req->locked = 0; - if (req->interrupted) + if (req->aborted) wake_up(&req->waitq); spin_unlock(&fc->lock); } @@ -557,13 +606,18 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, return err; } +static int request_pending(struct fuse_conn *fc) +{ + return !list_empty(&fc->pending) || !list_empty(&fc->interrupts); +} + /* Wait until a request is available on the pending list */ static void request_wait(struct fuse_conn *fc) { DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&fc->waitq, &wait); - while (fc->connected && list_empty(&fc->pending)) { + while (fc->connected && !request_pending(fc)) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) break; @@ -577,11 +631,50 @@ static void request_wait(struct fuse_conn *fc) } /* + * Transfer an interrupt request to userspace + * + * Unlike other requests this is assembled on demand, without a need + * to allocate a separate fuse_req structure. + * + * Called with fc->lock held, releases it + */ +static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, + const struct iovec *iov, unsigned long nr_segs) +{ + struct fuse_copy_state cs; + struct fuse_in_header ih; + struct fuse_interrupt_in arg; + unsigned reqsize = sizeof(ih) + sizeof(arg); + int err; + + list_del_init(&req->intr_entry); + req->intr_unique = fuse_get_unique(fc); + memset(&ih, 0, sizeof(ih)); + memset(&arg, 0, sizeof(arg)); + ih.len = reqsize; + ih.opcode = FUSE_INTERRUPT; + ih.unique = req->intr_unique; + arg.unique = req->in.h.unique; + + spin_unlock(&fc->lock); + if (iov_length(iov, nr_segs) < reqsize) + return -EINVAL; + + fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs); + err = fuse_copy_one(&cs, &ih, sizeof(ih)); + if (!err) + err = fuse_copy_one(&cs, &arg, sizeof(arg)); + fuse_copy_finish(&cs); + + return err ? err : reqsize; +} + +/* * Read a single request into the userspace filesystem's buffer. This * function waits until a request is available, then removes it from * the pending list and copies request data to userspace buffer. If - * no reply is needed (FORGET) or request has been interrupted or - * there was an error during the copying then it's finished by calling + * no reply is needed (FORGET) or request has been aborted or there + * was an error during the copying then it's finished by calling * request_end(). Otherwise add it to the processing list, and set * the 'sent' flag. */ @@ -601,7 +694,7 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, spin_lock(&fc->lock); err = -EAGAIN; if ((file->f_flags & O_NONBLOCK) && fc->connected && - list_empty(&fc->pending)) + !request_pending(fc)) goto err_unlock; request_wait(fc); @@ -609,9 +702,15 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, if (!fc->connected) goto err_unlock; err = -ERESTARTSYS; - if (list_empty(&fc->pending)) + if (!request_pending(fc)) goto err_unlock; + if (!list_empty(&fc->interrupts)) { + req = list_entry(fc->interrupts.next, struct fuse_req, + intr_entry); + return fuse_read_interrupt(fc, req, iov, nr_segs); + } + req = list_entry(fc->pending.next, struct fuse_req, list); req->state = FUSE_REQ_READING; list_move(&req->list, &fc->io); @@ -636,10 +735,10 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, fuse_copy_finish(&cs); spin_lock(&fc->lock); req->locked = 0; - if (!err && req->interrupted) + if (!err && req->aborted) err = -ENOENT; if (err) { - if (!req->interrupted) + if (!req->aborted) req->out.h.error = -EIO; request_end(fc, req); return err; @@ -649,6 +748,8 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, else { req->state = FUSE_REQ_SENT; list_move_tail(&req->list, &fc->processing); + if (req->interrupted) + queue_interrupt(fc, req); spin_unlock(&fc->lock); } return reqsize; @@ -675,7 +776,7 @@ static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) list_for_each(entry, &fc->processing) { struct fuse_req *req; req = list_entry(entry, struct fuse_req, list); - if (req->in.h.unique == unique) + if (req->in.h.unique == unique || req->intr_unique == unique) return req; } return NULL; @@ -741,17 +842,33 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, goto err_unlock; req = request_find(fc, oh.unique); - err = -EINVAL; if (!req) goto err_unlock; - if (req->interrupted) { + if (req->aborted) { spin_unlock(&fc->lock); fuse_copy_finish(&cs); spin_lock(&fc->lock); request_end(fc, req); return -ENOENT; } + /* Is it an interrupt reply? */ + if (req->intr_unique == oh.unique) { + err = -EINVAL; + if (nbytes != sizeof(struct fuse_out_header)) + goto err_unlock; + + if (oh.error == -ENOSYS) + fc->no_interrupt = 1; + else if (oh.error == -EAGAIN) + queue_interrupt(fc, req); + + spin_unlock(&fc->lock); + fuse_copy_finish(&cs); + return nbytes; + } + + req->state = FUSE_REQ_WRITING; list_move(&req->list, &fc->io); req->out.h = oh; req->locked = 1; @@ -764,9 +881,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, spin_lock(&fc->lock); req->locked = 0; if (!err) { - if (req->interrupted) + if (req->aborted) err = -ENOENT; - } else if (!req->interrupted) + } else if (!req->aborted) req->out.h.error = -EIO; request_end(fc, req); @@ -800,7 +917,7 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) spin_lock(&fc->lock); if (!fc->connected) mask = POLLERR; - else if (!list_empty(&fc->pending)) + else if (request_pending(fc)) mask |= POLLIN | POLLRDNORM; spin_unlock(&fc->lock); @@ -826,7 +943,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head) /* * Abort requests under I/O * - * The requests are set to interrupted and finished, and the request + * The requests are set to aborted and finished, and the request * waiter is woken up. This will make request_wait_answer() wait * until the request is unlocked and then return. * @@ -841,7 +958,7 @@ static void end_io_requests(struct fuse_conn *fc) list_entry(fc->io.next, struct fuse_req, list); void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; - req->interrupted = 1; + req->aborted = 1; req->out.h.error = -ECONNABORTED; req->state = FUSE_REQ_FINISHED; list_del_init(&req->list); @@ -874,19 +991,20 @@ static void end_io_requests(struct fuse_conn *fc) * onto the pending list is prevented by req->connected being false. * * Progression of requests under I/O to the processing list is - * prevented by the req->interrupted flag being true for these - * requests. For this reason requests on the io list must be aborted - * first. + * prevented by the req->aborted flag being true for these requests. + * For this reason requests on the io list must be aborted first. */ void fuse_abort_conn(struct fuse_conn *fc) { spin_lock(&fc->lock); if (fc->connected) { fc->connected = 0; + fc->blocked = 0; end_io_requests(fc); end_requests(fc, &fc->pending); end_requests(fc, &fc->processing); wake_up_all(&fc->waitq); + wake_up_all(&fc->blocked_waitq); kill_fasync(&fc->fasync, SIGIO, POLL_IN); } spin_unlock(&fc->lock); @@ -902,7 +1020,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file) end_requests(fc, &fc->processing); spin_unlock(&fc->lock); fasync_helper(-1, file, 0, &fc->fasync); - kobject_put(&fc->kobj); + fuse_conn_put(fc); } return 0; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8d7546e832e8..72a74cde6de8 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1,6 +1,6 @@ /* FUSE: Filesystem in Userspace - Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> + Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. @@ -79,7 +79,6 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, { req->in.h.opcode = FUSE_LOOKUP; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->in.numargs = 1; req->in.args[0].size = entry->d_name.len + 1; req->in.args[0].value = entry->d_name.name; @@ -225,6 +224,20 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } /* + * Synchronous release for the case when something goes wrong in CREATE_OPEN + */ +static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff, + u64 nodeid, int flags) +{ + struct fuse_req *req; + + req = fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE); + req->force = 1; + request_send(fc, req); + fuse_put_request(fc, req); +} + +/* * Atomic create+open operation * * If the filesystem doesn't support this, then fall back to separate @@ -237,6 +250,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct inode *inode; struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; + struct fuse_req *forget_req; struct fuse_open_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; @@ -247,9 +261,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (fc->no_create) return -ENOSYS; + forget_req = fuse_get_req(fc); + if (IS_ERR(forget_req)) + return PTR_ERR(forget_req); + req = fuse_get_req(fc); + err = PTR_ERR(req); if (IS_ERR(req)) - return PTR_ERR(req); + goto out_put_forget_req; err = -ENOMEM; ff = fuse_file_alloc(); @@ -262,7 +281,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, inarg.mode = mode; req->in.h.opcode = FUSE_CREATE; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -285,25 +303,23 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid)) goto out_free_ff; + fuse_put_request(fc, req); inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, &outentry.attr); - err = -ENOMEM; if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); ff->fh = outopen.fh; - /* Special release, with inode = NULL, this will - trigger a 'forget' request when the release is - complete */ - fuse_send_release(fc, ff, outentry.nodeid, NULL, flags, 0); - goto out_put_request; + fuse_sync_release(fc, ff, outentry.nodeid, flags); + fuse_send_forget(fc, forget_req, outentry.nodeid, 1); + return -ENOMEM; } - fuse_put_request(fc, req); + fuse_put_request(fc, forget_req); d_instantiate(entry, inode); fuse_change_timeout(entry, &outentry); file = lookup_instantiate_filp(nd, entry, generic_file_open); if (IS_ERR(file)) { ff->fh = outopen.fh; - fuse_send_release(fc, ff, outentry.nodeid, inode, flags, 0); + fuse_sync_release(fc, ff, outentry.nodeid, flags); return PTR_ERR(file); } fuse_finish_open(inode, file, ff, &outopen); @@ -313,6 +329,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, fuse_file_free(ff); out_put_request: fuse_put_request(fc, req); + out_put_forget_req: + fuse_put_request(fc, forget_req); return err; } @@ -328,7 +346,6 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, int err; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->out.numargs = 1; req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; @@ -448,7 +465,6 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) req->in.h.opcode = FUSE_UNLINK; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->in.numargs = 1; req->in.args[0].size = entry->d_name.len + 1; req->in.args[0].value = entry->d_name.name; @@ -480,7 +496,6 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) req->in.h.opcode = FUSE_RMDIR; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->in.numargs = 1; req->in.args[0].size = entry->d_name.len + 1; req->in.args[0].value = entry->d_name.name; @@ -510,8 +525,6 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, inarg.newdir = get_node_id(newdir); req->in.h.opcode = FUSE_RENAME; req->in.h.nodeid = get_node_id(olddir); - req->inode = olddir; - req->inode2 = newdir; req->in.numargs = 3; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -558,7 +571,6 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, memset(&inarg, 0, sizeof(inarg)); inarg.oldnodeid = get_node_id(inode); req->in.h.opcode = FUSE_LINK; - req->inode2 = inode; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -587,7 +599,6 @@ int fuse_do_getattr(struct inode *inode) req->in.h.opcode = FUSE_GETATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->out.numargs = 1; req->out.args[0].size = sizeof(arg); req->out.args[0].value = &arg; @@ -679,7 +690,6 @@ static int fuse_access(struct inode *inode, int mask) inarg.mask = mask; req->in.h.opcode = FUSE_ACCESS; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -820,7 +830,6 @@ static char *read_link(struct dentry *dentry) } req->in.h.opcode = FUSE_READLINK; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->out.argvar = 1; req->out.numargs = 1; req->out.args[0].size = PAGE_SIZE - 1; @@ -939,7 +948,6 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) iattr_to_fattr(attr, &inarg); req->in.h.opcode = FUSE_SETATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1002,7 +1010,6 @@ static int fuse_setxattr(struct dentry *entry, const char *name, inarg.flags = flags; req->in.h.opcode = FUSE_SETXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 3; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1041,7 +1048,6 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name, inarg.size = size; req->in.h.opcode = FUSE_GETXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1091,7 +1097,6 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) inarg.size = size; req->in.h.opcode = FUSE_LISTXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1135,7 +1140,6 @@ static int fuse_removexattr(struct dentry *entry, const char *name) req->in.h.opcode = FUSE_REMOVEXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = strlen(name) + 1; req->in.args[0].value = name; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fc342cf7c2cc..28aa81eae2cc 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -30,7 +30,6 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -49,8 +48,8 @@ struct fuse_file *fuse_file_alloc(void) struct fuse_file *ff; ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); if (ff) { - ff->release_req = fuse_request_alloc(); - if (!ff->release_req) { + ff->reserved_req = fuse_request_alloc(); + if (!ff->reserved_req) { kfree(ff); ff = NULL; } @@ -60,7 +59,7 @@ struct fuse_file *fuse_file_alloc(void) void fuse_file_free(struct fuse_file *ff) { - fuse_request_free(ff->release_req); + fuse_request_free(ff->reserved_req); kfree(ff); } @@ -113,37 +112,22 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) return err; } -/* Special case for failed iget in CREATE */ -static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) +struct fuse_req *fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, + int opcode) { - /* If called from end_io_requests(), req has more than one - reference and fuse_reset_request() cannot work */ - if (fc->connected) { - u64 nodeid = req->in.h.nodeid; - fuse_reset_request(req); - fuse_send_forget(fc, req, nodeid, 1); - } else - fuse_put_request(fc, req); -} - -void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, - u64 nodeid, struct inode *inode, int flags, int isdir) -{ - struct fuse_req * req = ff->release_req; + struct fuse_req *req = ff->reserved_req; struct fuse_release_in *inarg = &req->misc.release_in; inarg->fh = ff->fh; inarg->flags = flags; - req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; + req->in.h.opcode = opcode; req->in.h.nodeid = nodeid; - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_release_in); req->in.args[0].value = inarg; - request_send_background(fc, req); - if (!inode) - req->end = fuse_release_end; kfree(ff); + + return req; } int fuse_release_common(struct inode *inode, struct file *file, int isdir) @@ -151,8 +135,15 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir) struct fuse_file *ff = file->private_data; if (ff) { struct fuse_conn *fc = get_fuse_conn(inode); - u64 nodeid = get_node_id(inode); - fuse_send_release(fc, ff, nodeid, inode, file->f_flags, isdir); + struct fuse_req *req; + + req = fuse_release_fill(ff, get_node_id(inode), file->f_flags, + isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); + + /* Hold vfsmount and dentry until release is finished */ + req->vfsmount = mntget(file->f_vfsmnt); + req->dentry = dget(file->f_dentry); + request_send_background(fc, req); } /* Return value is ignored by VFS */ @@ -169,7 +160,29 @@ static int fuse_release(struct inode *inode, struct file *file) return fuse_release_common(inode, file, 0); } -static int fuse_flush(struct file *file) +/* + * Scramble the ID space with XTEA, so that the value of the files_struct + * pointer is not exposed to userspace. + */ +static u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) +{ + u32 *k = fc->scramble_key; + u64 v = (unsigned long) id; + u32 v0 = v; + u32 v1 = v >> 32; + u32 sum = 0; + int i; + + for (i = 0; i < 32; i++) { + v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]); + sum += 0x9E3779B9; + v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]); + } + + return (u64) v0 + ((u64) v1 << 32); +} + +static int fuse_flush(struct file *file, fl_owner_t id) { struct inode *inode = file->f_dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); @@ -184,19 +197,16 @@ static int fuse_flush(struct file *file) if (fc->no_flush) return 0; - req = fuse_get_req(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - + req = fuse_get_req_nofail(fc, file); memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; + inarg.lock_owner = fuse_lock_owner_id(fc, id); req->in.h.opcode = FUSE_FLUSH; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; - req->file = file; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; + req->force = 1; request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); @@ -232,8 +242,6 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, inarg.fsync_flags = datasync ? 1 : 0; req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; - req->file = file; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -266,8 +274,6 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, inarg->size = count; req->in.h.opcode = opcode; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; - req->file = file; req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_read_in); req->in.args[0].value = inarg; @@ -342,6 +348,8 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file, req->out.page_zeroing = 1; fuse_read_fill(req, file, inode, pos, count, FUSE_READ); if (fc->async_read) { + get_file(file); + req->file = file; req->end = fuse_readpages_end; request_send_background(fc, req); } else { @@ -420,8 +428,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, inarg.size = count; req->in.h.opcode = FUSE_WRITE; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; - req->file = file; req->in.argpages = 1; req->in.numargs = 2; req->in.args[0].size = sizeof(struct fuse_write_in); @@ -619,6 +625,126 @@ static int fuse_set_page_dirty(struct page *page) return 0; } +static int convert_fuse_file_lock(const struct fuse_file_lock *ffl, + struct file_lock *fl) +{ + switch (ffl->type) { + case F_UNLCK: + break; + + case F_RDLCK: + case F_WRLCK: + if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX || + ffl->end < ffl->start) + return -EIO; + + fl->fl_start = ffl->start; + fl->fl_end = ffl->end; + fl->fl_pid = ffl->pid; + break; + + default: + return -EIO; + } + fl->fl_type = ffl->type; + return 0; +} + +static void fuse_lk_fill(struct fuse_req *req, struct file *file, + const struct file_lock *fl, int opcode, pid_t pid) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_lk_in *arg = &req->misc.lk_in; + + arg->fh = ff->fh; + arg->owner = fuse_lock_owner_id(fc, fl->fl_owner); + arg->lk.start = fl->fl_start; + arg->lk.end = fl->fl_end; + arg->lk.type = fl->fl_type; + arg->lk.pid = pid; + req->in.h.opcode = opcode; + req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 1; + req->in.args[0].size = sizeof(*arg); + req->in.args[0].value = arg; +} + +static int fuse_getlk(struct file *file, struct file_lock *fl) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_lk_out outarg; + int err; + + req = fuse_get_req(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + fuse_lk_fill(req, file, fl, FUSE_GETLK, 0); + req->out.numargs = 1; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (!err) + err = convert_fuse_file_lock(&outarg.lk, fl); + + return err; +} + +static int fuse_setlk(struct file *file, struct file_lock *fl) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; + pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; + int err; + + /* Unlock on close is handled by the flush method */ + if (fl->fl_flags & FL_CLOSE) + return 0; + + req = fuse_get_req(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + fuse_lk_fill(req, file, fl, opcode, pid); + request_send(fc, req); + err = req->out.h.error; + /* locking is restartable */ + if (err == -EINTR) + err = -ERESTARTSYS; + fuse_put_request(fc, req); + return err; +} + +static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + int err; + + if (cmd == F_GETLK) { + if (fc->no_lock) { + if (!posix_test_lock(file, fl, fl)) + fl->fl_type = F_UNLCK; + err = 0; + } else + err = fuse_getlk(file, fl); + } else { + if (fc->no_lock) + err = posix_lock_file_wait(file, fl); + else + err = fuse_setlk(file, fl); + } + return err; +} + static const struct file_operations fuse_file_operations = { .llseek = generic_file_llseek, .read = generic_file_read, @@ -628,6 +754,7 @@ static const struct file_operations fuse_file_operations = { .flush = fuse_flush, .release = fuse_release, .fsync = fuse_fsync, + .lock = fuse_file_lock, .sendfile = generic_file_sendfile, }; @@ -639,6 +766,7 @@ static const struct file_operations fuse_direct_io_file_operations = { .flush = fuse_flush, .release = fuse_release, .fsync = fuse_fsync, + .lock = fuse_file_lock, /* no mmap and sendfile */ }; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0474202cb5dc..0dbf96621841 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -8,12 +8,13 @@ #include <linux/fuse.h> #include <linux/fs.h> +#include <linux/mount.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/mm.h> #include <linux/backing-dev.h> -#include <asm/semaphore.h> +#include <linux/mutex.h> /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 @@ -24,6 +25,9 @@ /** It could be as large as PATH_MAX, but would that have any uses? */ #define FUSE_NAME_MAX 1024 +/** Number of dentries for each connection in the control filesystem */ +#define FUSE_CTL_NUM_DENTRIES 3 + /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem module will check permissions based on the file mode. Otherwise no permission checking is done in the kernel */ @@ -33,6 +37,11 @@ doing the mount will be allowed to access the filesystem */ #define FUSE_ALLOW_OTHER (1 << 1) +/** List of active connections */ +extern struct list_head fuse_conn_list; + +/** Global mutex protecting fuse_conn_list and the control filesystem */ +extern struct mutex fuse_mutex; /** FUSE inode */ struct fuse_inode { @@ -56,7 +65,7 @@ struct fuse_inode { /** FUSE specific file data */ struct fuse_file { /** Request reserved for flush and release */ - struct fuse_req *release_req; + struct fuse_req *reserved_req; /** File handle used by userspace */ u64 fh; @@ -122,6 +131,7 @@ enum fuse_req_state { FUSE_REQ_PENDING, FUSE_REQ_READING, FUSE_REQ_SENT, + FUSE_REQ_WRITING, FUSE_REQ_FINISHED }; @@ -135,12 +145,15 @@ struct fuse_req { fuse_conn */ struct list_head list; - /** Entry on the background list */ - struct list_head bg_entry; + /** Entry on the interrupts list */ + struct list_head intr_entry; /** refcount */ atomic_t count; + /** Unique ID for the interrupt request */ + u64 intr_unique; + /* * The following bitfields are either set once before the * request is queued or setting/clearing them is protected by @@ -150,12 +163,18 @@ struct fuse_req { /** True if the request has reply */ unsigned isreply:1; - /** The request was interrupted */ - unsigned interrupted:1; + /** Force sending of the request even if interrupted */ + unsigned force:1; + + /** The request was aborted */ + unsigned aborted:1; /** Request is sent in the background */ unsigned background:1; + /** The request has been interrupted */ + unsigned interrupted:1; + /** Data is being copied to/from the request */ unsigned locked:1; @@ -181,6 +200,7 @@ struct fuse_req { struct fuse_init_in init_in; struct fuse_init_out init_out; struct fuse_read_in read_in; + struct fuse_lk_in lk_in; } misc; /** page vector */ @@ -192,17 +212,20 @@ struct fuse_req { /** offset of data on first page */ unsigned page_offset; - /** Inode used in the request */ - struct inode *inode; - - /** Second inode used in the request (or NULL) */ - struct inode *inode2; - /** File used in the request (or NULL) */ struct file *file; + /** vfsmount used in release */ + struct vfsmount *vfsmount; + + /** dentry used in release */ + struct dentry *dentry; + /** Request completion callback */ void (*end)(struct fuse_conn *, struct fuse_req *); + + /** Request is stolen from fuse_file->reserved_req */ + struct file *stolen_file; }; /** @@ -216,6 +239,9 @@ struct fuse_conn { /** Lock protecting accessess to members of this structure */ spinlock_t lock; + /** Refcount */ + atomic_t count; + /** The user id for this mount */ uid_t user_id; @@ -243,13 +269,12 @@ struct fuse_conn { /** The list of requests under I/O */ struct list_head io; - /** Requests put in the background (RELEASE or any other - interrupted request) */ - struct list_head background; - /** Number of requests currently in the background */ unsigned num_background; + /** Pending interrupts */ + struct list_head interrupts; + /** Flag indicating if connection is blocked. This will be the case before the INIT reply is received, and if there are too many outstading backgrounds requests */ @@ -258,15 +283,9 @@ struct fuse_conn { /** waitq for blocked connection */ wait_queue_head_t blocked_waitq; - /** RW semaphore for exclusion with fuse_put_super() */ - struct rw_semaphore sbput_sem; - /** The next unique request id */ u64 reqctr; - /** Mount is active */ - unsigned mounted; - /** Connection established, cleared on umount, connection abort and device release */ unsigned connected; @@ -305,12 +324,18 @@ struct fuse_conn { /** Is removexattr not implemented by fs? */ unsigned no_removexattr : 1; + /** Are file locking primitives not implemented by fs? */ + unsigned no_lock : 1; + /** Is access not implemented by fs? */ unsigned no_access : 1; /** Is create not implemented by fs? */ unsigned no_create : 1; + /** Is interrupt not implemented by fs? */ + unsigned no_interrupt : 1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -320,11 +345,23 @@ struct fuse_conn { /** Backing dev info */ struct backing_dev_info bdi; - /** kobject */ - struct kobject kobj; + /** Entry on the fuse_conn_list */ + struct list_head entry; + + /** Unique ID */ + u64 id; + + /** Dentries in the control filesystem */ + struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES]; + + /** number of dentries used in the above array */ + int ctl_ndents; /** O_ASYNC requests */ struct fasync_struct *fasync; + + /** Key for lock owner ID scrambling */ + u32 scramble_key[4]; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -337,11 +374,6 @@ static inline struct fuse_conn *get_fuse_conn(struct inode *inode) return get_fuse_conn_super(inode->i_sb); } -static inline struct fuse_conn *get_fuse_conn_kobj(struct kobject *obj) -{ - return container_of(obj, struct fuse_conn, kobj); -} - static inline struct fuse_inode *get_fuse_inode(struct inode *inode) { return container_of(inode, struct fuse_inode, inode); @@ -383,12 +415,9 @@ void fuse_file_free(struct fuse_file *ff); void fuse_finish_open(struct inode *inode, struct file *file, struct fuse_file *ff, struct fuse_open_out *outarg); -/** - * Send a RELEASE request - */ -void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, - u64 nodeid, struct inode *inode, int flags, int isdir); - +/** */ +struct fuse_req *fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, + int opcode); /** * Send RELEASE or RELEASEDIR request */ @@ -435,6 +464,9 @@ int fuse_dev_init(void); */ void fuse_dev_cleanup(void); +int fuse_ctl_init(void); +void fuse_ctl_cleanup(void); + /** * Allocate a request */ @@ -446,14 +478,14 @@ struct fuse_req *fuse_request_alloc(void); void fuse_request_free(struct fuse_req *req); /** - * Reinitialize a request, the preallocated flag is left unmodified + * Get a request, may fail with -ENOMEM */ -void fuse_reset_request(struct fuse_req *req); +struct fuse_req *fuse_get_req(struct fuse_conn *fc); /** - * Reserve a preallocated request + * Gets a requests for a file operation, always succeeds */ -struct fuse_req *fuse_get_req(struct fuse_conn *fc); +struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file); /** * Decrement reference count of a request. If count goes to zero free @@ -476,11 +508,6 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); */ void request_send_background(struct fuse_conn *fc, struct fuse_req *req); -/** - * Release inodes and file associated with background request - */ -void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req); - /* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); @@ -493,3 +520,23 @@ int fuse_do_getattr(struct inode *inode); * Invalidate inode attributes */ void fuse_invalidate_attr(struct inode *inode); + +/** + * Acquire reference to fuse_conn + */ +struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); + +/** + * Release reference to fuse_conn + */ +void fuse_conn_put(struct fuse_conn *fc); + +/** + * Add connection to control filesystem + */ +int fuse_ctl_add_conn(struct fuse_conn *fc); + +/** + * Remove connection from control filesystem + */ +void fuse_ctl_remove_conn(struct fuse_conn *fc); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7627022446b2..dcaaabd3b9c4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -11,25 +11,20 @@ #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/file.h> -#include <linux/mount.h> #include <linux/seq_file.h> #include <linux/init.h> #include <linux/module.h> #include <linux/parser.h> #include <linux/statfs.h> +#include <linux/random.h> MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Filesystem in Userspace"); MODULE_LICENSE("GPL"); static kmem_cache_t *fuse_inode_cachep; -static struct subsystem connections_subsys; - -struct fuse_conn_attr { - struct attribute attr; - ssize_t (*show)(struct fuse_conn *, char *); - ssize_t (*store)(struct fuse_conn *, const char *, size_t); -}; +struct list_head fuse_conn_list; +DEFINE_MUTEX(fuse_mutex); #define FUSE_SUPER_MAGIC 0x65735546 @@ -104,6 +99,14 @@ static void fuse_clear_inode(struct inode *inode) } } +static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) +{ + if (*flags & MS_MANDLOCK) + return -EINVAL; + + return 0; +} + void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr) { if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size) @@ -195,31 +198,29 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, return inode; } -static void fuse_umount_begin(struct super_block *sb) +static void fuse_umount_begin(struct vfsmount *vfsmnt, int flags) { - fuse_abort_conn(get_fuse_conn_super(sb)); + if (flags & MNT_FORCE) + fuse_abort_conn(get_fuse_conn_super(vfsmnt->mnt_sb)); } static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); - down_write(&fc->sbput_sem); - while (!list_empty(&fc->background)) - fuse_release_background(fc, - list_entry(fc->background.next, - struct fuse_req, bg_entry)); - spin_lock(&fc->lock); - fc->mounted = 0; fc->connected = 0; + fc->blocked = 0; spin_unlock(&fc->lock); - up_write(&fc->sbput_sem); /* Flush all readers on this fs */ kill_fasync(&fc->fasync, SIGIO, POLL_IN); wake_up_all(&fc->waitq); - kobject_del(&fc->kobj); - kobject_put(&fc->kobj); + wake_up_all(&fc->blocked_waitq); + mutex_lock(&fuse_mutex); + list_del(&fc->entry); + fuse_ctl_remove_conn(fc); + mutex_unlock(&fuse_mutex); + fuse_conn_put(fc); } static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) @@ -236,8 +237,9 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr /* fsid is left zero */ } -static int fuse_statfs(struct super_block *sb, struct kstatfs *buf) +static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_req *req; struct fuse_statfs_out outarg; @@ -368,11 +370,6 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } -static void fuse_conn_release(struct kobject *kobj) -{ - kfree(get_fuse_conn_kobj(kobj)); -} - static struct fuse_conn *new_conn(void) { struct fuse_conn *fc; @@ -380,24 +377,35 @@ static struct fuse_conn *new_conn(void) fc = kzalloc(sizeof(*fc), GFP_KERNEL); if (fc) { spin_lock_init(&fc->lock); + atomic_set(&fc->count, 1); init_waitqueue_head(&fc->waitq); init_waitqueue_head(&fc->blocked_waitq); INIT_LIST_HEAD(&fc->pending); INIT_LIST_HEAD(&fc->processing); INIT_LIST_HEAD(&fc->io); - INIT_LIST_HEAD(&fc->background); - init_rwsem(&fc->sbput_sem); - kobj_set_kset_s(fc, connections_subsys); - kobject_init(&fc->kobj); + INIT_LIST_HEAD(&fc->interrupts); atomic_set(&fc->num_waiting, 0); fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; fc->bdi.unplug_io_fn = default_unplug_io_fn; fc->reqctr = 0; fc->blocked = 1; + get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); } return fc; } +void fuse_conn_put(struct fuse_conn *fc) +{ + if (atomic_dec_and_test(&fc->count)) + kfree(fc); +} + +struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) +{ + atomic_inc(&fc->count); + return fc; +} + static struct inode *get_root_inode(struct super_block *sb, unsigned mode) { struct fuse_attr attr; @@ -413,6 +421,7 @@ static struct super_operations fuse_super_operations = { .destroy_inode = fuse_destroy_inode, .read_inode = fuse_read_inode, .clear_inode = fuse_clear_inode, + .remount_fs = fuse_remount_fs, .put_super = fuse_put_super, .umount_begin = fuse_umount_begin, .statfs = fuse_statfs, @@ -432,8 +441,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; if (arg->flags & FUSE_ASYNC_READ) fc->async_read = 1; - } else + if (!(arg->flags & FUSE_POSIX_LOCKS)) + fc->no_lock = 1; + } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; + fc->no_lock = 1; + } fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); fc->minor = arg->minor; @@ -451,7 +464,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->major = FUSE_KERNEL_VERSION; arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; - arg->flags |= FUSE_ASYNC_READ; + arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); @@ -467,10 +480,9 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) request_send_background(fc, req); } -static unsigned long long conn_id(void) +static u64 conn_id(void) { - /* BKL is held for ->get_sb() */ - static unsigned long long ctr = 1; + static u64 ctr = 1; return ctr++; } @@ -484,6 +496,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) struct fuse_req *init_req; int err; + if (sb->s_flags & MS_MANDLOCK) + return -EINVAL; + if (!parse_fuse_opt((char *) data, &d)) return -EINVAL; @@ -527,25 +542,21 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (!init_req) goto err_put_root; - err = kobject_set_name(&fc->kobj, "%llu", conn_id()); - if (err) - goto err_free_req; - - err = kobject_add(&fc->kobj); - if (err) - goto err_free_req; - - /* Setting file->private_data can't race with other mount() - instances, since BKL is held for ->get_sb() */ + mutex_lock(&fuse_mutex); err = -EINVAL; if (file->private_data) - goto err_kobject_del; + goto err_unlock; + fc->id = conn_id(); + err = fuse_ctl_add_conn(fc); + if (err) + goto err_unlock; + + list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - fc->mounted = 1; fc->connected = 1; - kobject_get(&fc->kobj); - file->private_data = fc; + file->private_data = fuse_conn_get(fc); + mutex_unlock(&fuse_mutex); /* * atomic_dec_and_test() in fput() provides the necessary * memory barrier for file->private_data to be visible on all @@ -557,23 +568,22 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) return 0; - err_kobject_del: - kobject_del(&fc->kobj); - err_free_req: + err_unlock: + mutex_unlock(&fuse_mutex); fuse_request_free(init_req); err_put_root: dput(root_dentry); err: fput(file); - kobject_put(&fc->kobj); + fuse_conn_put(fc); return err; } -static struct super_block *fuse_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *raw_data) +static int fuse_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *raw_data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super); + return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); } static struct file_system_type fuse_fs_type = { @@ -583,68 +593,8 @@ static struct file_system_type fuse_fs_type = { .kill_sb = kill_anon_super, }; -static ssize_t fuse_conn_waiting_show(struct fuse_conn *fc, char *page) -{ - return sprintf(page, "%i\n", atomic_read(&fc->num_waiting)); -} - -static ssize_t fuse_conn_abort_store(struct fuse_conn *fc, const char *page, - size_t count) -{ - fuse_abort_conn(fc); - return count; -} - -static struct fuse_conn_attr fuse_conn_waiting = - __ATTR(waiting, 0400, fuse_conn_waiting_show, NULL); -static struct fuse_conn_attr fuse_conn_abort = - __ATTR(abort, 0600, NULL, fuse_conn_abort_store); - -static struct attribute *fuse_conn_attrs[] = { - &fuse_conn_waiting.attr, - &fuse_conn_abort.attr, - NULL, -}; - -static ssize_t fuse_conn_attr_show(struct kobject *kobj, - struct attribute *attr, - char *page) -{ - struct fuse_conn_attr *fca = - container_of(attr, struct fuse_conn_attr, attr); - - if (fca->show) - return fca->show(get_fuse_conn_kobj(kobj), page); - else - return -EACCES; -} - -static ssize_t fuse_conn_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *page, size_t count) -{ - struct fuse_conn_attr *fca = - container_of(attr, struct fuse_conn_attr, attr); - - if (fca->store) - return fca->store(get_fuse_conn_kobj(kobj), page, count); - else - return -EACCES; -} - -static struct sysfs_ops fuse_conn_sysfs_ops = { - .show = &fuse_conn_attr_show, - .store = &fuse_conn_attr_store, -}; - -static struct kobj_type ktype_fuse_conn = { - .release = fuse_conn_release, - .sysfs_ops = &fuse_conn_sysfs_ops, - .default_attrs = fuse_conn_attrs, -}; - static decl_subsys(fuse, NULL, NULL); -static decl_subsys(connections, &ktype_fuse_conn, NULL); +static decl_subsys(connections, NULL, NULL); static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) @@ -718,6 +668,7 @@ static int __init fuse_init(void) printk("fuse init (API version %i.%i)\n", FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); + INIT_LIST_HEAD(&fuse_conn_list); res = fuse_fs_init(); if (res) goto err; @@ -730,8 +681,14 @@ static int __init fuse_init(void) if (res) goto err_dev_cleanup; + res = fuse_ctl_init(); + if (res) + goto err_sysfs_cleanup; + return 0; + err_sysfs_cleanup: + fuse_sysfs_cleanup(); err_dev_cleanup: fuse_dev_cleanup(); err_fs_cleanup: @@ -744,6 +701,7 @@ static void __exit fuse_exit(void) { printk(KERN_DEBUG "fuse exit\n"); + fuse_ctl_cleanup(); fuse_sysfs_cleanup(); fuse_fs_cleanup(); fuse_dev_cleanup(); diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index 1e44dcfe49c4..13231dd5ce66 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -280,7 +280,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) block = off >> PAGE_CACHE_SHIFT; node->page_offset = off & ~PAGE_CACHE_MASK; for (i = 0; i < tree->pages_per_bnode; i++) { - page = read_cache_page(mapping, block++, (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, block++, NULL); if (IS_ERR(page)) goto fail; if (PageError(page)) { diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index d20131ce4b95..400357994319 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -59,7 +59,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke unlock_new_inode(tree->inode); mapping = tree->inode->i_mapping; - page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) goto free_tree; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 1181d116117d..d9227bf14e86 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -80,8 +80,10 @@ static void hfs_put_super(struct super_block *sb) * * changed f_files/f_ffree to reflect the fs_ablock/free_ablocks. */ -static int hfs_statfs(struct super_block *sb, struct kstatfs *buf) +static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; + buf->f_type = HFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = (u32)HFS_SB(sb)->fs_ablocks * HFS_SB(sb)->fs_div; @@ -413,10 +415,11 @@ bail: return res; } -static struct super_block *hfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int hfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt); } static struct file_system_type hfs_fs_type = { diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index 9fb51632303c..d128a25b74d2 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -31,8 +31,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; - page = read_cache_page(mapping, offset / PAGE_CACHE_BITS, - (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); pptr = kmap(page); curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; i = offset % 32; @@ -72,8 +71,8 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma offset += PAGE_CACHE_BITS; if (offset >= size) break; - page = read_cache_page(mapping, offset / PAGE_CACHE_BITS, - (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, + NULL); curr = pptr = kmap(page); if ((size ^ offset) / PAGE_CACHE_BITS) end = pptr + PAGE_CACHE_BITS / 32; @@ -119,8 +118,8 @@ found: set_page_dirty(page); kunmap(page); offset += PAGE_CACHE_BITS; - page = read_cache_page(mapping, offset / PAGE_CACHE_BITS, - (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, + NULL); pptr = kmap(page); curr = pptr; end = pptr + PAGE_CACHE_BITS / 32; @@ -167,7 +166,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; pnr = offset / PAGE_CACHE_BITS; - page = read_cache_page(mapping, pnr, (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, pnr, NULL); pptr = kmap(page); curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; end = pptr + PAGE_CACHE_BITS / 32; @@ -199,7 +198,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) break; set_page_dirty(page); kunmap(page); - page = read_cache_page(mapping, ++pnr, (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, ++pnr, NULL); pptr = kmap(page); curr = pptr; end = pptr + PAGE_CACHE_BITS / 32; diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 746abc9ecf70..77bf434da679 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -440,7 +440,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) block = off >> PAGE_CACHE_SHIFT; node->page_offset = off & ~PAGE_CACHE_MASK; for (i = 0; i < tree->pages_per_bnode; block++, i++) { - page = read_cache_page(mapping, block, (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, block, NULL); if (IS_ERR(page)) goto fail; if (PageError(page)) { diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index effa8991999c..cfc852fdd1b5 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -38,7 +38,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) goto free_tree; mapping = tree->inode->i_mapping; - page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) goto free_tree; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 7843f792a4b7..0a92fa2336a2 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -212,8 +212,10 @@ static void hfsplus_put_super(struct super_block *sb) sb->s_fs_info = NULL; } -static int hfsplus_statfs(struct super_block *sb, struct kstatfs *buf) +static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; + buf->f_type = HFSPLUS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = HFSPLUS_SB(sb).total_blocks << HFSPLUS_SB(sb).fs_shift; @@ -450,10 +452,12 @@ static void hfsplus_destroy_inode(struct inode *inode) #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) -static struct super_block *hfsplus_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int hfsplus_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super, + mnt); } static struct file_system_type hfsplus_fs_type = { diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index bf0f8e16e433..8e0d37743e7c 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -239,7 +239,7 @@ static int read_inode(struct inode *ino) return(err); } -int hostfs_statfs(struct super_block *sb, struct kstatfs *sf) +int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) { /* do_statfs uses struct statfs64 internally, but the linux kernel * struct statfs still has 32-bit versions for most of these fields, @@ -252,7 +252,7 @@ int hostfs_statfs(struct super_block *sb, struct kstatfs *sf) long long f_files; long long f_ffree; - err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename, + err = do_statfs(HOSTFS_I(dentry->d_sb->s_root->d_inode)->host_filename, &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), &sf->f_namelen, sf->f_spare); @@ -993,11 +993,11 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) return(err); } -static struct super_block *hostfs_read_sb(struct file_system_type *type, - int flags, const char *dev_name, - void *data) +static int hostfs_read_sb(struct file_system_type *type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common)); + return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt); } static struct file_system_type hostfs_type = { diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index d72d8c87c996..f798480a363f 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -135,8 +135,9 @@ static unsigned count_bitmaps(struct super_block *s) return count; } -static int hpfs_statfs(struct super_block *s, struct kstatfs *buf) +static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *s = dentry->d_sb; struct hpfs_sb_info *sbi = hpfs_sb(s); lock_kernel(); @@ -662,10 +663,11 @@ bail0: return -EINVAL; } -static struct super_block *hpfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int hpfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super, + mnt); } static struct file_system_type hpfs_fs_type = { diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index 5e6363be246f..3a9bdf58166f 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -616,7 +616,7 @@ static const struct file_operations hppfs_dir_fops = { .fsync = hppfs_fsync, }; -static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf) +static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf) { sf->f_blocks = 0; sf->f_bfree = 0; @@ -769,11 +769,11 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent) return(err); } -static struct super_block *hppfs_read_super(struct file_system_type *type, - int flags, const char *dev_name, - void *data) +static int hppfs_read_super(struct file_system_type *type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return(get_sb_nodev(type, flags, data, hppfs_fill_super)); + return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt); } static struct file_system_type hppfs_type = { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3a5b4e923455..e6410d8edd0e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -59,7 +59,6 @@ static void huge_pagevec_release(struct pagevec *pvec) static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file->f_dentry->d_inode; - struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); loff_t len, vma_len; int ret; @@ -87,9 +86,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) goto out; - if (vma->vm_flags & VM_MAYSHARE) - if (hugetlb_extend_reservation(info, len >> HPAGE_SHIFT) != 0) - goto out; + if (vma->vm_flags & VM_MAYSHARE && + hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), + len >> HPAGE_SHIFT)) + goto out; ret = 0; hugetlb_prefault_arch_hook(vma->vm_mm); @@ -195,12 +195,8 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) const pgoff_t start = lstart >> HPAGE_SHIFT; struct pagevec pvec; pgoff_t next; - int i; + int i, freed = 0; - hugetlb_truncate_reservation(HUGETLBFS_I(inode), - lstart >> HPAGE_SHIFT); - if (!mapping->nrpages) - return; pagevec_init(&pvec, 0); next = start; while (1) { @@ -221,10 +217,12 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) truncate_huge_page(page); unlock_page(page); hugetlb_put_quota(mapping); + freed++; } huge_pagevec_release(&pvec); } BUG_ON(!lstart && mapping->nrpages); + hugetlb_unreserve_pages(inode, start, freed); } static void hugetlbfs_delete_inode(struct inode *inode) @@ -366,6 +364,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + INIT_LIST_HEAD(&inode->i_mapping->private_list); info = HUGETLBFS_I(inode); mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); switch (mode & S_IFMT) { @@ -467,9 +466,9 @@ static int hugetlbfs_set_page_dirty(struct page *page) return 0; } -static int hugetlbfs_statfs(struct super_block *sb, struct kstatfs *buf) +static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); + struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); buf->f_type = HUGETLBFS_MAGIC; buf->f_bsize = HPAGE_SIZE; @@ -538,7 +537,6 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) hugetlbfs_inc_free_inodes(sbinfo); return NULL; } - p->prereserved_hpages = 0; return &p->vfs_inode; } @@ -723,10 +721,10 @@ void hugetlb_put_quota(struct address_space *mapping) } } -static struct super_block *hugetlbfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int hugetlbfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super); + return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); } static struct file_system_type hugetlbfs_fs_type = { @@ -781,8 +779,7 @@ struct file *hugetlb_zero_setup(size_t size) goto out_file; error = -ENOMEM; - if (hugetlb_extend_reservation(HUGETLBFS_I(inode), - size >> HPAGE_SHIFT) != 0) + if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) goto out_inode; d_instantiate(dentry, inode); diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 9e9931e2badd..f2386442adee 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -672,11 +672,11 @@ out: return ret; } -static struct super_block * +static int inotify_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA); + return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA, mnt); } static struct file_system_type inotify_fs_type = { diff --git a/fs/ioprio.c b/fs/ioprio.c index ca77008146c0..7fa76ed53c10 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -24,15 +24,21 @@ #include <linux/blkdev.h> #include <linux/capability.h> #include <linux/syscalls.h> +#include <linux/security.h> static int set_task_ioprio(struct task_struct *task, int ioprio) { + int err; struct io_context *ioc; if (task->uid != current->euid && task->uid != current->uid && !capable(CAP_SYS_NICE)) return -EPERM; + err = security_task_setioprio(task, ioprio); + if (err) + return err; + task_lock(task); task->ioprio = ioprio; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 70adbb98bad1..3f9c8ba1fa1f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -56,7 +56,7 @@ static void isofs_put_super(struct super_block *sb) } static void isofs_read_inode(struct inode *); -static int isofs_statfs (struct super_block *, struct kstatfs *); +static int isofs_statfs (struct dentry *, struct kstatfs *); static kmem_cache_t *isofs_inode_cachep; @@ -901,8 +901,10 @@ out_freesbi: return -EINVAL; } -static int isofs_statfs (struct super_block *sb, struct kstatfs *buf) +static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; + buf->f_type = ISOFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = (ISOFS_SB(sb)->s_nzones @@ -1399,10 +1401,11 @@ struct inode *isofs_iget(struct super_block *sb, return inode; } -static struct super_block *isofs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int isofs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super, + mnt); } static struct file_system_type iso9660_fs_type = { diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 3f5102b069db..47678a26c13b 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -24,29 +24,67 @@ #include <linux/slab.h> /* - * Unlink a buffer from a transaction. + * Unlink a buffer from a transaction checkpoint list. * * Called with j_list_lock held. */ - -static inline void __buffer_unlink(struct journal_head *jh) +static inline void __buffer_unlink_first(struct journal_head *jh) { - transaction_t *transaction; - - transaction = jh->b_cp_transaction; - jh->b_cp_transaction = NULL; + transaction_t *transaction = jh->b_cp_transaction; jh->b_cpnext->b_cpprev = jh->b_cpprev; jh->b_cpprev->b_cpnext = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) + if (transaction->t_checkpoint_list == jh) { transaction->t_checkpoint_list = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) - transaction->t_checkpoint_list = NULL; + if (transaction->t_checkpoint_list == jh) + transaction->t_checkpoint_list = NULL; + } +} + +/* + * Unlink a buffer from a transaction checkpoint(io) list. + * + * Called with j_list_lock held. + */ +static inline void __buffer_unlink(struct journal_head *jh) +{ + transaction_t *transaction = jh->b_cp_transaction; + + __buffer_unlink_first(jh); + if (transaction->t_checkpoint_io_list == jh) { + transaction->t_checkpoint_io_list = jh->b_cpnext; + if (transaction->t_checkpoint_io_list == jh) + transaction->t_checkpoint_io_list = NULL; + } +} + +/* + * Move a buffer from the checkpoint list to the checkpoint io list + * + * Called with j_list_lock held + */ +static inline void __buffer_relink_io(struct journal_head *jh) +{ + transaction_t *transaction = jh->b_cp_transaction; + + __buffer_unlink_first(jh); + + if (!transaction->t_checkpoint_io_list) { + jh->b_cpnext = jh->b_cpprev = jh; + } else { + jh->b_cpnext = transaction->t_checkpoint_io_list; + jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; + jh->b_cpprev->b_cpnext = jh; + jh->b_cpnext->b_cpprev = jh; + } + transaction->t_checkpoint_io_list = jh; } /* * Try to release a checkpointed buffer from its transaction. - * Returns 1 if we released it. + * Returns 1 if we released it and 2 if we also released the + * whole transaction. + * * Requires j_list_lock * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ @@ -57,12 +95,11 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { JBUFFER_TRACE(jh, "remove from checkpoint list"); - __journal_remove_checkpoint(jh); + ret = __journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); - ret = 1; } else { jbd_unlock_bh_state(bh); } @@ -117,83 +154,54 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) } /* - * Clean up a transaction's checkpoint list. - * - * We wait for any pending IO to complete and make sure any clean - * buffers are removed from the transaction. - * - * Return 1 if we performed any actions which might have destroyed the - * checkpoint. (journal_remove_checkpoint() deletes the transaction when - * the last checkpoint buffer is cleansed) + * Clean up transaction's list of buffers submitted for io. + * We wait for any pending IO to complete and remove any clean + * buffers. Note that we take the buffers in the opposite ordering + * from the one in which they were submitted for IO. * * Called with j_list_lock held. */ -static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) +static void __wait_cp_io(journal_t *journal, transaction_t *transaction) { - struct journal_head *jh, *next_jh, *last_jh; + struct journal_head *jh; struct buffer_head *bh; - int ret = 0; - - assert_spin_locked(&journal->j_list_lock); - jh = transaction->t_checkpoint_list; - if (!jh) - return 0; - - last_jh = jh->b_cpprev; - next_jh = jh; - do { - jh = next_jh; + tid_t this_tid; + int released = 0; + + this_tid = transaction->t_tid; +restart: + /* Did somebody clean up the transaction in the meanwhile? */ + if (journal->j_checkpoint_transactions != transaction || + transaction->t_tid != this_tid) + return; + while (!released && transaction->t_checkpoint_io_list) { + jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); + if (!jbd_trylock_bh_state(bh)) { + jbd_sync_bh(journal, bh); + spin_lock(&journal->j_list_lock); + goto restart; + } if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); - goto out_return_1; + spin_lock(&journal->j_list_lock); + goto restart; } - /* - * This is foul + * Now in whatever state the buffer currently is, we know that + * it has been written out and so we can drop it from the list */ - if (!jbd_trylock_bh_state(bh)) { - jbd_sync_bh(journal, bh); - goto out_return_1; - } - - if (jh->b_transaction != NULL) { - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; - - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - log_start_commit(journal, tid); - log_wait_commit(journal, tid); - goto out_return_1; - } - - /* - * AKPM: I think the buffer_jbddirty test is redundant - it - * shouldn't have NULL b_transaction? - */ - next_jh = jh->b_cpnext; - if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { - BUFFER_TRACE(bh, "remove from checkpoint"); - __journal_remove_checkpoint(jh); - jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); - __brelse(bh); - ret = 1; - } else { - jbd_unlock_bh_state(bh); - } - } while (jh != last_jh); - - return ret; -out_return_1: - spin_lock(&journal->j_list_lock); - return 1; + released = __journal_remove_checkpoint(jh); + jbd_unlock_bh_state(bh); + journal_remove_journal_head(bh); + __brelse(bh); + } } #define NR_BATCH 64 @@ -203,9 +211,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - spin_unlock(&journal->j_list_lock); ll_rw_block(SWRITE, *batch_count, bhs); - spin_lock(&journal->j_list_lock); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); @@ -221,19 +227,43 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. * - * Called with j_list_lock held. + * Called with j_list_lock held and drops it if 1 is returned * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ -static int __flush_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count, - int *drop_count) +static int __process_buffer(journal_t *journal, struct journal_head *jh, + struct buffer_head **bhs, int *batch_count) { struct buffer_head *bh = jh2bh(jh); int ret = 0; - if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) { - J_ASSERT_JH(jh, jh->b_transaction == NULL); + if (buffer_locked(bh)) { + atomic_inc(&bh->b_count); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); + ret = 1; + } else if (jh->b_transaction != NULL) { + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + log_start_commit(journal, tid); + log_wait_commit(journal, tid); + ret = 1; + } else if (!buffer_dirty(bh)) { + J_ASSERT_JH(jh, !buffer_jbddirty(bh)); + BUFFER_TRACE(bh, "remove from checkpoint"); + __journal_remove_checkpoint(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + journal_remove_journal_head(bh); + __brelse(bh); + ret = 1; + } else { /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. @@ -246,45 +276,30 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh, J_ASSERT_BH(bh, !buffer_jwrite(bh)); set_buffer_jwrite(bh); bhs[*batch_count] = bh; + __buffer_relink_io(jh); jbd_unlock_bh_state(bh); (*batch_count)++; if (*batch_count == NR_BATCH) { + spin_unlock(&journal->j_list_lock); __flush_batch(journal, bhs, batch_count); ret = 1; } - } else { - int last_buffer = 0; - if (jh->b_cpnext == jh) { - /* We may be about to drop the transaction. Tell the - * caller that the lists have changed. - */ - last_buffer = 1; - } - if (__try_to_free_cp_buf(jh)) { - (*drop_count)++; - ret = last_buffer; - } } return ret; } /* - * Perform an actual checkpoint. We don't write out only enough to - * satisfy the current blocked requests: rather we submit a reasonably - * sized chunk of the outstanding data to disk at once for - * efficiency. __log_wait_for_space() will retry if we didn't free enough. + * Perform an actual checkpoint. We take the first transaction on the + * list of transactions to be checkpointed and send all its buffers + * to disk. We submit larger chunks of data at once. * - * However, we _do_ take into account the amount requested so that once - * the IO has been queued, we can return as soon as enough of it has - * completed to disk. - * * The journal should be locked before calling this function. */ int log_do_checkpoint(journal_t *journal) { + transaction_t *transaction; + tid_t this_tid; int result; - int batch_count = 0; - struct buffer_head *bhs[NR_BATCH]; jbd_debug(1, "Start checkpoint\n"); @@ -299,79 +314,68 @@ int log_do_checkpoint(journal_t *journal) return result; /* - * OK, we need to start writing disk blocks. Try to free up a - * quarter of the log in a single checkpoint if we can. + * OK, we need to start writing disk blocks. Take one transaction + * and write it. */ + spin_lock(&journal->j_list_lock); + if (!journal->j_checkpoint_transactions) + goto out; + transaction = journal->j_checkpoint_transactions; + this_tid = transaction->t_tid; +restart: /* - * AKPM: check this code. I had a feeling a while back that it - * degenerates into a busy loop at unmount time. + * If someone cleaned up this transaction while we slept, we're + * done (maybe it's a new transaction, but it fell at the same + * address). */ - spin_lock(&journal->j_list_lock); - while (journal->j_checkpoint_transactions) { - transaction_t *transaction; - struct journal_head *jh, *last_jh, *next_jh; - int drop_count = 0; - int cleanup_ret, retry = 0; - tid_t this_tid; - - transaction = journal->j_checkpoint_transactions; - this_tid = transaction->t_tid; - jh = transaction->t_checkpoint_list; - last_jh = jh->b_cpprev; - next_jh = jh; - do { + if (journal->j_checkpoint_transactions == transaction && + transaction->t_tid == this_tid) { + int batch_count = 0; + struct buffer_head *bhs[NR_BATCH]; + struct journal_head *jh; + int retry = 0; + + while (!retry && transaction->t_checkpoint_list) { struct buffer_head *bh; - jh = next_jh; - next_jh = jh->b_cpnext; + jh = transaction->t_checkpoint_list; bh = jh2bh(jh); if (!jbd_trylock_bh_state(bh)) { jbd_sync_bh(journal, bh); - spin_lock(&journal->j_list_lock); retry = 1; break; } - retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); - if (cond_resched_lock(&journal->j_list_lock)) { + retry = __process_buffer(journal, jh, bhs,&batch_count); + if (!retry && lock_need_resched(&journal->j_list_lock)){ + spin_unlock(&journal->j_list_lock); retry = 1; break; } - } while (jh != last_jh && !retry); + } if (batch_count) { + if (!retry) { + spin_unlock(&journal->j_list_lock); + retry = 1; + } __flush_batch(journal, bhs, &batch_count); - retry = 1; } + if (retry) { + spin_lock(&journal->j_list_lock); + goto restart; + } /* - * If someone cleaned up this transaction while we slept, we're - * done - */ - if (journal->j_checkpoint_transactions != transaction) - break; - if (retry) - continue; - /* - * Maybe it's a new transaction, but it fell at the same - * address - */ - if (transaction->t_tid != this_tid) - continue; - /* - * We have walked the whole transaction list without - * finding anything to write to disk. We had better be - * able to make some progress or we are in trouble. + * Now we have cleaned up the first transaction's checkpoint + * list. Let's clean up the second one */ - cleanup_ret = __cleanup_transaction(journal, transaction); - J_ASSERT(drop_count != 0 || cleanup_ret != 0); - if (journal->j_checkpoint_transactions != transaction) - break; + __wait_cp_io(journal, transaction); } +out: spin_unlock(&journal->j_list_lock); result = cleanup_journal_tail(journal); if (result < 0) return result; - return 0; } @@ -456,52 +460,98 @@ int cleanup_journal_tail(journal_t *journal) /* Checkpoint list management */ /* + * journal_clean_one_cp_list + * + * Find all the written-back checkpoint buffers in the given list and release them. + * + * Called with the journal locked. + * Called with j_list_lock held. + * Returns number of bufers reaped (for debug) + */ + +static int journal_clean_one_cp_list(struct journal_head *jh, int *released) +{ + struct journal_head *last_jh; + struct journal_head *next_jh = jh; + int ret, freed = 0; + + *released = 0; + if (!jh) + return 0; + + last_jh = jh->b_cpprev; + do { + jh = next_jh; + next_jh = jh->b_cpnext; + /* Use trylock because of the ranking */ + if (jbd_trylock_bh_state(jh2bh(jh))) { + ret = __try_to_free_cp_buf(jh); + if (ret) { + freed++; + if (ret == 2) { + *released = 1; + return freed; + } + } + } + /* + * This function only frees up some memory + * if possible so we dont have an obligation + * to finish processing. Bail out if preemption + * requested: + */ + if (need_resched()) + return freed; + } while (jh != last_jh); + + return freed; +} + +/* * journal_clean_checkpoint_list * * Find all the written-back checkpoint buffers in the journal and release them. * * Called with the journal locked. * Called with j_list_lock held. - * Returns number of bufers reaped (for debug) + * Returns number of buffers reaped (for debug) */ int __journal_clean_checkpoint_list(journal_t *journal) { transaction_t *transaction, *last_transaction, *next_transaction; int ret = 0; + int released; transaction = journal->j_checkpoint_transactions; - if (transaction == 0) + if (!transaction) goto out; last_transaction = transaction->t_cpprev; next_transaction = transaction; do { - struct journal_head *jh; - transaction = next_transaction; next_transaction = transaction->t_cpnext; - jh = transaction->t_checkpoint_list; - if (jh) { - struct journal_head *last_jh = jh->b_cpprev; - struct journal_head *next_jh = jh; - - do { - jh = next_jh; - next_jh = jh->b_cpnext; - /* Use trylock because of the ranknig */ - if (jbd_trylock_bh_state(jh2bh(jh))) - ret += __try_to_free_cp_buf(jh); - /* - * This function only frees up some memory - * if possible so we dont have an obligation - * to finish processing. Bail out if preemption - * requested: - */ - if (need_resched()) - goto out; - } while (jh != last_jh); - } + ret += journal_clean_one_cp_list(transaction-> + t_checkpoint_list, &released); + /* + * This function only frees up some memory if possible so we + * dont have an obligation to finish processing. Bail out if + * preemption requested: + */ + if (need_resched()) + goto out; + if (released) + continue; + /* + * It is essential that we are as careful as in the case of + * t_checkpoint_list with removing the buffer from the list as + * we can possibly see not yet submitted buffers on io_list + */ + ret += journal_clean_one_cp_list(transaction-> + t_checkpoint_io_list, &released); + if (need_resched()) + goto out; } while (transaction != last_transaction); out: return ret; @@ -516,18 +566,22 @@ out: * buffer updates committed in that transaction have safely been stored * elsewhere on disk. To achieve this, all of the buffers in a * transaction need to be maintained on the transaction's checkpoint - * list until they have been rewritten, at which point this function is + * lists until they have been rewritten, at which point this function is * called to remove the buffer from the existing transaction's - * checkpoint list. + * checkpoint lists. + * + * The function returns 1 if it frees the transaction, 0 otherwise. * * This function is called with the journal locked. * This function is called with j_list_lock held. + * This function is called with jbd_lock_bh_state(jh2bh(jh)) */ -void __journal_remove_checkpoint(struct journal_head *jh) +int __journal_remove_checkpoint(struct journal_head *jh) { transaction_t *transaction; journal_t *journal; + int ret = 0; JBUFFER_TRACE(jh, "entry"); @@ -538,8 +592,10 @@ void __journal_remove_checkpoint(struct journal_head *jh) journal = transaction->t_journal; __buffer_unlink(jh); + jh->b_cp_transaction = NULL; - if (transaction->t_checkpoint_list != NULL) + if (transaction->t_checkpoint_list != NULL || + transaction->t_checkpoint_io_list != NULL) goto out; JBUFFER_TRACE(jh, "transaction has no more buffers"); @@ -565,8 +621,10 @@ void __journal_remove_checkpoint(struct journal_head *jh) /* Just in case anybody was waiting for more transactions to be checkpointed... */ wake_up(&journal->j_wait_logspace); + ret = 1; out: JBUFFER_TRACE(jh, "exit"); + return ret; } /* @@ -628,6 +686,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); + J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(transaction->t_updates == 0); J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 002ad2bbc769..0971814c38b8 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -790,11 +790,22 @@ restart_loop: jbd_unlock_bh_state(bh); } else { J_ASSERT_BH(bh, !buffer_dirty(bh)); - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); - __journal_unfile_buffer(jh); - jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); /* needs a brelse */ - release_buffer_page(bh); + /* The buffer on BJ_Forget list and not jbddirty means + * it has been freed by this transaction and hence it + * could not have been reallocated until this + * transaction has committed. *BUT* it could be + * reallocated once we have written all the data to + * disk and before we process the buffer on BJ_Forget + * list. */ + JBUFFER_TRACE(jh, "refile or unfile freed buffer"); + __journal_refile_buffer(jh); + if (!jh->b_transaction) { + jbd_unlock_bh_state(bh); + /* needs a brelse */ + journal_remove_journal_head(bh); + release_buffer_page(bh); + } else + jbd_unlock_bh_state(bh); } cond_resched_lock(&journal->j_list_lock); } diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 80d7f53fd0a7..de5bafb4e853 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -531,6 +531,7 @@ static int do_one_pass(journal_t *journal, default: jbd_debug(3, "Unrecognised magic %d, end of scan.\n", blocktype); + brelse(bh); goto done; } } diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index c609f5034fcd..508b2ea91f43 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -227,7 +227,8 @@ repeat_locked: spin_unlock(&transaction->t_handle_lock); spin_unlock(&journal->j_state_lock); out: - kfree(new_transaction); + if (unlikely(new_transaction)) /* It's usually NULL */ + kfree(new_transaction); return ret; } @@ -724,7 +725,8 @@ done: journal_cancel_revoke(handle, jh); out: - kfree(frozen_buffer); + if (unlikely(frozen_buffer)) /* It's usually NULL */ + kfree(frozen_buffer); JBUFFER_TRACE(jh, "exit"); return error; @@ -903,7 +905,8 @@ repeat: jbd_unlock_bh_state(bh); out: journal_put_journal_head(jh); - kfree(committed_data); + if (unlikely(committed_data)) + kfree(committed_data); return err; } @@ -2038,7 +2041,8 @@ void __journal_refile_buffer(struct journal_head *jh) __journal_temp_unlink_buffer(jh); jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; - __journal_file_buffer(jh, jh->b_transaction, BJ_Metadata); + __journal_file_buffer(jh, jh->b_transaction, + was_dirty ? BJ_Metadata : BJ_Reserved); J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); if (was_dirty) diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 020cc097c539..9e46ea6da752 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -377,9 +377,9 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode, /* Get statistics of the file system. */ static int -jffs_statfs(struct super_block *sb, struct kstatfs *buf) +jffs_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct jffs_control *c = (struct jffs_control *) sb->s_fs_info; + struct jffs_control *c = (struct jffs_control *) dentry->d_sb->s_fs_info; struct jffs_fmcontrol *fmc; lock_kernel(); @@ -1785,10 +1785,11 @@ static struct super_operations jffs_ops = .remount_fs = jffs_remount, }; -static struct super_block *jffs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int jffs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, jffs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, jffs_fill_super, + mnt); } static struct file_system_type jffs_fs_type = { diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 7b6c24b14f85..2900ec3ec3af 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -192,9 +192,9 @@ int jffs2_setattr(struct dentry *dentry, struct iattr *iattr) return rc; } -int jffs2_statfs(struct super_block *sb, struct kstatfs *buf) +int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); + struct jffs2_sb_info *c = JFFS2_SB_INFO(dentry->d_sb); unsigned long avail; buf->f_type = JFFS2_SUPER_MAGIC; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index cd4021bcb944..6b5223565405 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -175,7 +175,7 @@ void jffs2_clear_inode (struct inode *); void jffs2_dirty_inode(struct inode *inode); struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri); -int jffs2_statfs (struct super_block *, struct kstatfs *); +int jffs2_statfs (struct dentry *, struct kstatfs *); void jffs2_write_super (struct super_block *); int jffs2_remount_fs (struct super_block *, int *, char *); int jffs2_do_fill_super(struct super_block *sb, void *data, int silent); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 9d0521451f59..2378a662c256 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -111,9 +111,10 @@ static int jffs2_sb_set(struct super_block *sb, void *data) return 0; } -static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, struct mtd_info *mtd) +static int jffs2_get_sb_mtd(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct mtd_info *mtd, + struct vfsmount *mnt) { struct super_block *sb; struct jffs2_sb_info *c; @@ -121,19 +122,20 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type, c = kmalloc(sizeof(*c), GFP_KERNEL); if (!c) - return ERR_PTR(-ENOMEM); + return -ENOMEM; memset(c, 0, sizeof(*c)); c->mtd = mtd; sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c); if (IS_ERR(sb)) - goto out_put; + goto out_error; if (sb->s_root) { /* New mountpoint for JFFS2 which is already mounted */ D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): Device %d (\"%s\") is already mounted\n", mtd->index, mtd->name)); + ret = simple_set_mnt(mnt, sb); goto out_put; } @@ -161,44 +163,47 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type, /* Failure case... */ up_write(&sb->s_umount); deactivate_super(sb); - return ERR_PTR(ret); + return ret; } sb->s_flags |= MS_ACTIVE; - return sb; + return simple_set_mnt(mnt, sb); +out_error: + ret = PTR_ERR(sb); out_put: kfree(c); put_mtd_device(mtd); - return sb; + return ret; } -static struct super_block *jffs2_get_sb_mtdnr(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, int mtdnr) +static int jffs2_get_sb_mtdnr(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, int mtdnr, + struct vfsmount *mnt) { struct mtd_info *mtd; mtd = get_mtd_device(NULL, mtdnr); if (!mtd) { D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr)); - return ERR_PTR(-EINVAL); + return -EINVAL; } - return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd); + return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt); } -static struct super_block *jffs2_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int jffs2_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { int err; struct nameidata nd; int mtdnr; if (!dev_name) - return ERR_PTR(-EINVAL); + return -EINVAL; D1(printk(KERN_DEBUG "jffs2_get_sb(): dev_name \"%s\"\n", dev_name)); @@ -220,7 +225,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type, mtd = get_mtd_device(NULL, mtdnr); if (mtd) { if (!strcmp(mtd->name, dev_name+4)) - return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd); + return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt); put_mtd_device(mtd); } } @@ -233,7 +238,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type, if (!*endptr) { /* It was a valid number */ D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd%%d, mtdnr %d\n", mtdnr)); - return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr); + return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt); } } } @@ -247,7 +252,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type, err, nd.dentry->d_inode)); if (err) - return ERR_PTR(err); + return err; err = -EINVAL; @@ -269,11 +274,11 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type, mtdnr = iminor(nd.dentry->d_inode); path_release(&nd); - return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr); + return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt); out: path_release(&nd); - return ERR_PTR(err); + return err; } static void jffs2_put_super (struct super_block *sb) diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 2b220dd6b4e7..7f6e88039700 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -632,10 +632,9 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, } SetPageUptodate(page); } else { - page = read_cache_page(mapping, page_index, - (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, page_index, NULL); if (IS_ERR(page) || !PageUptodate(page)) { - jfs_err("read_cache_page failed!"); + jfs_err("read_mapping_page failed!"); return NULL; } lock_page(page); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index db6f41d6dd60..73d2aba084c6 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -139,9 +139,9 @@ static void jfs_destroy_inode(struct inode *inode) kmem_cache_free(jfs_inode_cachep, ji); } -static int jfs_statfs(struct super_block *sb, struct kstatfs *buf) +static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct jfs_sb_info *sbi = JFS_SBI(sb); + struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb); s64 maxinodes; struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap; @@ -565,10 +565,11 @@ static void jfs_unlockfs(struct super_block *sb) } } -static struct super_block *jfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int jfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super, + mnt); } static int jfs_sync_fs(struct super_block *sb, int wait) diff --git a/fs/libfs.c b/fs/libfs.c index 7145ba7a48d0..fc785d8befb9 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -20,9 +20,9 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } -int simple_statfs(struct super_block *sb, struct kstatfs *buf) +int simple_statfs(struct dentry *dentry, struct kstatfs *buf) { - buf->f_type = sb->s_magic; + buf->f_type = dentry->d_sb->s_magic; buf->f_bsize = PAGE_CACHE_SIZE; buf->f_namelen = NAME_MAX; return 0; @@ -196,9 +196,9 @@ struct inode_operations simple_dir_inode_operations = { * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that * will never be mountable) */ -struct super_block * -get_sb_pseudo(struct file_system_type *fs_type, char *name, - struct super_operations *ops, unsigned long magic) +int get_sb_pseudo(struct file_system_type *fs_type, char *name, + struct super_operations *ops, unsigned long magic, + struct vfsmount *mnt) { struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); static struct super_operations default_ops = {.statfs = simple_statfs}; @@ -207,7 +207,7 @@ get_sb_pseudo(struct file_system_type *fs_type, char *name, struct qstr d_name = {.name = name, .len = strlen(name)}; if (IS_ERR(s)) - return s; + return PTR_ERR(s); s->s_flags = MS_NOUSER; s->s_maxbytes = ~0ULL; @@ -232,12 +232,12 @@ get_sb_pseudo(struct file_system_type *fs_type, char *name, d_instantiate(dentry, root); s->s_root = dentry; s->s_flags |= MS_ACTIVE; - return s; + return simple_set_mnt(mnt, s); Enomem: up_write(&s->s_umount); deactivate_super(s); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) @@ -424,13 +424,13 @@ out: static DEFINE_SPINLOCK(pin_fs_lock); -int simple_pin_fs(char *name, struct vfsmount **mount, int *count) +int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count) { struct vfsmount *mnt = NULL; spin_lock(&pin_fs_lock); if (unlikely(!*mount)) { spin_unlock(&pin_fs_lock); - mnt = do_kern_mount(name, 0, name, NULL); + mnt = vfs_kern_mount(type, 0, type->name, NULL); if (IS_ERR(mnt)) return PTR_ERR(mnt); spin_lock(&pin_fs_lock); diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index bce744468708..52774feab93f 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -147,11 +147,10 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number, * that we mark locks for reclaiming, and that we bump the pseudo NSM state. */ -static inline -void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate) +static void nlmclnt_prepare_reclaim(struct nlm_host *host) { + down_write(&host->h_rwsem); host->h_monitored = 0; - host->h_nsmstate = newstate; host->h_state++; host->h_nextrebind = 0; nlm_rebind_host(host); @@ -164,6 +163,13 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate) dprintk("NLM: reclaiming locks for host %s", host->h_name); } +static void nlmclnt_finish_reclaim(struct nlm_host *host) +{ + host->h_reclaiming = 0; + up_write(&host->h_rwsem); + dprintk("NLM: done reclaiming locks for host %s", host->h_name); +} + /* * Reclaim all locks on server host. We do this by spawning a separate * reclaimer thread. @@ -171,12 +177,10 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate) void nlmclnt_recovery(struct nlm_host *host, u32 newstate) { - if (host->h_reclaiming++) { - if (host->h_nsmstate == newstate) - return; - nlmclnt_prepare_reclaim(host, newstate); - } else { - nlmclnt_prepare_reclaim(host, newstate); + if (host->h_nsmstate == newstate) + return; + host->h_nsmstate = newstate; + if (!host->h_reclaiming++) { nlm_get_host(host); __module_get(THIS_MODULE); if (kernel_thread(reclaimer, host, CLONE_KERNEL) < 0) @@ -190,6 +194,7 @@ reclaimer(void *ptr) struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; struct file_lock *fl, *next; + u32 nsmstate; daemonize("%s-reclaim", host->h_name); allow_signal(SIGKILL); @@ -199,19 +204,25 @@ reclaimer(void *ptr) lock_kernel(); lockd_up(); + nlmclnt_prepare_reclaim(host); /* First, reclaim all locks that have been marked. */ restart: + nsmstate = host->h_nsmstate; list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { list_del_init(&fl->fl_u.nfs_fl.list); if (signalled()) continue; - if (nlmclnt_reclaim(host, fl) == 0) - list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); - goto restart; + if (nlmclnt_reclaim(host, fl) != 0) + continue; + list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); + if (host->h_nsmstate != nsmstate) { + /* Argh! The server rebooted again! */ + list_splice_init(&host->h_granted, &host->h_reclaim); + goto restart; + } } - - host->h_reclaiming = 0; + nlmclnt_finish_reclaim(host); /* Now, wake up all processes that sleep on a blocked lock */ list_for_each_entry(block, &nlm_blocked, b_list) { diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index f96e38155b5c..4db62098d3f4 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -508,7 +508,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) } block = nlmclnt_prepare_block(host, fl); +again: for(;;) { + /* Reboot protection */ + fl->fl_u.nfs_fl.state = host->h_state; status = nlmclnt_call(req, NLMPROC_LOCK); if (status < 0) goto out_unblock; @@ -531,10 +534,16 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) } if (resp->status == NLM_LCK_GRANTED) { - fl->fl_u.nfs_fl.state = host->h_state; + down_read(&host->h_rwsem); + /* Check whether or not the server has rebooted */ + if (fl->fl_u.nfs_fl.state != host->h_state) { + up_read(&host->h_rwsem); + goto again; + } fl->fl_flags |= FL_SLEEP; /* Ensure the resulting lock will get added to granted list */ do_vfs_lock(fl); + up_read(&host->h_rwsem); } status = nlm_stat_to_errno(resp->status); out_unblock: @@ -596,6 +605,7 @@ nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl) static int nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) { + struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; int status; @@ -604,7 +614,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either * case, we want to unlock. */ + down_read(&host->h_rwsem); do_vfs_lock(fl); + up_read(&host->h_rwsem); if (req->a_flags & RPC_TASK_ASYNC) return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 729ac427d359..38b0e8a1aec0 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -112,11 +112,12 @@ nlm_lookup_host(int server, struct sockaddr_in *sin, host->h_version = version; host->h_proto = proto; host->h_rpcclnt = NULL; - init_MUTEX(&host->h_sema); + mutex_init(&host->h_mutex); host->h_nextrebind = jiffies + NLM_HOST_REBIND; host->h_expires = jiffies + NLM_HOST_EXPIRE; atomic_set(&host->h_count, 1); init_waitqueue_head(&host->h_gracewait); + init_rwsem(&host->h_rwsem); host->h_state = 0; /* pseudo NSM state */ host->h_nsmstate = 0; /* real NSM state */ host->h_server = server; @@ -172,7 +173,7 @@ nlm_bind_host(struct nlm_host *host) (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); /* Lock host handle */ - down(&host->h_sema); + mutex_lock(&host->h_mutex); /* If we've already created an RPC client, check whether * RPC rebind is required @@ -204,12 +205,12 @@ nlm_bind_host(struct nlm_host *host) host->h_rpcclnt = clnt; } - up(&host->h_sema); + mutex_unlock(&host->h_mutex); return clnt; forgetit: printk("lockd: couldn't create RPC handle for %s\n", host->h_name); - up(&host->h_sema); + mutex_unlock(&host->h_mutex); return NULL; } diff --git a/fs/locks.c b/fs/locks.c index ab61a8b54829..1ad29c9b6252 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -703,7 +703,7 @@ EXPORT_SYMBOL(posix_test_lock); * from a broken NFS client. But broken NFS clients have a lot more to * worry about than proper deadlock detection anyway... --okir */ -int posix_locks_deadlock(struct file_lock *caller_fl, +static int posix_locks_deadlock(struct file_lock *caller_fl, struct file_lock *block_fl) { struct list_head *tmp; @@ -722,8 +722,6 @@ next_task: return 0; } -EXPORT_SYMBOL(posix_locks_deadlock); - /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks * at the head of the list, but that's secret knowledge known only to * flock_lock_file and posix_lock_file. @@ -794,7 +792,8 @@ out: static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock) { struct file_lock *fl; - struct file_lock *new_fl, *new_fl2; + struct file_lock *new_fl = NULL; + struct file_lock *new_fl2 = NULL; struct file_lock *left = NULL; struct file_lock *right = NULL; struct file_lock **before; @@ -803,9 +802,15 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request /* * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. + * + * In some cases we can be sure, that no new locks will be needed */ - new_fl = locks_alloc_lock(); - new_fl2 = locks_alloc_lock(); + if (!(request->fl_flags & FL_ACCESS) && + (request->fl_type != F_UNLCK || + request->fl_start != 0 || request->fl_end != OFFSET_MAX)) { + new_fl = locks_alloc_lock(); + new_fl2 = locks_alloc_lock(); + } lock_kernel(); if (request->fl_type != F_UNLCK) { @@ -834,14 +839,7 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request if (request->fl_flags & FL_ACCESS) goto out; - error = -ENOLCK; /* "no luck" */ - if (!(new_fl && new_fl2)) - goto out; - /* - * We've allocated the new locks in advance, so there are no - * errors possible (and no blocking operations) from here on. - * * Find the first old lock with the same owner as the new lock. */ @@ -938,10 +936,25 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request before = &fl->fl_next; } + /* + * The above code only modifies existing locks in case of + * merging or replacing. If new lock(s) need to be inserted + * all modifications are done bellow this, so it's safe yet to + * bail out. + */ + error = -ENOLCK; /* "no luck" */ + if (right && left == right && !new_fl2) + goto out; + error = 0; if (!added) { if (request->fl_type == F_UNLCK) goto out; + + if (!new_fl) { + error = -ENOLCK; + goto out; + } locks_copy_lock(new_fl, request); locks_insert_lock(before, new_fl); new_fl = NULL; @@ -1881,19 +1894,18 @@ out: */ void locks_remove_posix(struct file *filp, fl_owner_t owner) { - struct file_lock lock, **before; + struct file_lock lock; /* * If there are no locks held on this file, we don't need to call * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - before = &filp->f_dentry->d_inode->i_flock; - if (*before == NULL) + if (!filp->f_dentry->d_inode->i_flock) return; lock.fl_type = F_UNLCK; - lock.fl_flags = FL_POSIX; + lock.fl_flags = FL_POSIX | FL_CLOSE; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; lock.fl_owner = owner; @@ -1902,25 +1914,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) lock.fl_ops = NULL; lock.fl_lmops = NULL; - if (filp->f_op && filp->f_op->lock != NULL) { + if (filp->f_op && filp->f_op->lock != NULL) filp->f_op->lock(filp, F_SETLK, &lock); - goto out; - } + else + posix_lock_file(filp, &lock); - /* Can't use posix_lock_file here; we need to remove it no matter - * which pid we have. - */ - lock_kernel(); - while (*before != NULL) { - struct file_lock *fl = *before; - if (IS_POSIX(fl) && posix_same_owner(fl, &lock)) { - locks_delete_lock(before); - continue; - } - before = &fl->fl_next; - } - unlock_kernel(); -out: if (lock.fl_ops && lock.fl_ops->fl_release_private) lock.fl_ops->fl_release_private(&lock); } @@ -2206,63 +2204,6 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) EXPORT_SYMBOL(lock_may_write); -static inline void __steal_locks(struct file *file, fl_owner_t from) -{ - struct inode *inode = file->f_dentry->d_inode; - struct file_lock *fl = inode->i_flock; - - while (fl) { - if (fl->fl_file == file && fl->fl_owner == from) - fl->fl_owner = current->files; - fl = fl->fl_next; - } -} - -/* When getting ready for executing a binary, we make sure that current - * has a files_struct on its own. Before dropping the old files_struct, - * we take over ownership of all locks for all file descriptors we own. - * Note that we may accidentally steal a lock for a file that a sibling - * has created since the unshare_files() call. - */ -void steal_locks(fl_owner_t from) -{ - struct files_struct *files = current->files; - int i, j; - struct fdtable *fdt; - - if (from == files) - return; - - lock_kernel(); - j = 0; - - /* - * We are not taking a ref to the file structures, so - * we need to acquire ->file_lock. - */ - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - for (;;) { - unsigned long set; - i = j * __NFDBITS; - if (i >= fdt->max_fdset || i >= fdt->max_fds) - break; - set = fdt->open_fds->fds_bits[j++]; - while (set) { - if (set & 1) { - struct file *file = fdt->fd[i]; - if (file) - __steal_locks(file, from); - } - i++; - set >>= 1; - } - } - spin_unlock(&files->file_lock); - unlock_kernel(); -} -EXPORT_SYMBOL(steal_locks); - static int __init filelock_init(void) { filelock_cache = kmem_cache_create("file_lock_cache", diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 69224d1fe043..2b0a389d1987 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -60,8 +60,7 @@ static int dir_commit_chunk(struct page *page, unsigned from, unsigned to) static struct page * dir_get_page(struct inode *dir, unsigned long n) { struct address_space *mapping = dir->i_mapping; - struct page *page = read_cache_page(mapping, n, - (filler_t*)mapping->a_ops->readpage, NULL); + struct page *page = read_mapping_page(mapping, n, NULL); if (!IS_ERR(page)) { wait_on_page_locked(page); kmap(page); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 2dcccf1d1b7f..a6fb509b7341 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -19,7 +19,7 @@ static void minix_read_inode(struct inode * inode); static int minix_write_inode(struct inode * inode, int wait); -static int minix_statfs(struct super_block *sb, struct kstatfs *buf); +static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); static int minix_remount (struct super_block * sb, int * flags, char * data); static void minix_delete_inode(struct inode *inode) @@ -296,11 +296,11 @@ out_bad_sb: return -EINVAL; } -static int minix_statfs(struct super_block *sb, struct kstatfs *buf) +static int minix_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct minix_sb_info *sbi = minix_sb(sb); - buf->f_type = sb->s_magic; - buf->f_bsize = sb->s_blocksize; + struct minix_sb_info *sbi = minix_sb(dentry->d_sb); + buf->f_type = dentry->d_sb->s_magic; + buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size; buf->f_bfree = minix_count_free_blocks(sbi); buf->f_bavail = buf->f_bfree; @@ -559,10 +559,11 @@ void minix_truncate(struct inode * inode) V2_minix_truncate(inode); } -static struct super_block *minix_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int minix_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super, + mnt); } static struct file_system_type minix_fs_type = { diff --git a/fs/mpage.c b/fs/mpage.c index 9bf2eb30e6f4..1e4598247d0b 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -707,9 +707,9 @@ mpage_writepages(struct address_space *mapping, struct pagevec pvec; int nr_pages; pgoff_t index; - pgoff_t end = -1; /* Inclusive */ + pgoff_t end; /* Inclusive */ int scanned = 0; - int is_range = 0; + int range_whole = 0; if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; @@ -721,16 +721,14 @@ mpage_writepages(struct address_space *mapping, writepage = mapping->a_ops->writepage; pagevec_init(&pvec, 0); - if (wbc->sync_mode == WB_SYNC_NONE) { + if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ + end = -1; } else { - index = 0; /* whole-file sweep */ - scanned = 1; - } - if (wbc->start || wbc->end) { - index = wbc->start >> PAGE_CACHE_SHIFT; - end = wbc->end >> PAGE_CACHE_SHIFT; - is_range = 1; + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; scanned = 1; } retry: @@ -759,7 +757,7 @@ retry: continue; } - if (unlikely(is_range) && page->index > end) { + if (!wbc->range_cyclic && page->index > end) { done = 1; unlock_page(page); continue; @@ -810,7 +808,7 @@ retry: index = 0; goto retry; } - if (!is_range) + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; if (bio) mpage_bio_submit(WRITE, bio); diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 5b76ccd19e3f..9e44158a7540 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -661,11 +661,12 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct super_block *msdos_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int msdos_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super, + mnt); } static struct file_system_type msdos_fs_type = { diff --git a/fs/namei.c b/fs/namei.c index 184fe4acf824..c784e8bb57a3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2243,14 +2243,16 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, int error; char * to; - if (flags != 0) + if ((flags & ~AT_SYMLINK_FOLLOW) != 0) return -EINVAL; to = getname(newname); if (IS_ERR(to)) return PTR_ERR(to); - error = __user_walk_fd(olddfd, oldname, 0, &old_nd); + error = __user_walk_fd(olddfd, oldname, + flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0, + &old_nd); if (error) goto exit; error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); @@ -2577,8 +2579,7 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage) { struct page * page; struct address_space *mapping = dentry->d_inode->i_mapping; - page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, - NULL); + page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) goto sync_fail; wait_on_page_locked(page); diff --git a/fs/namespace.c b/fs/namespace.c index bf478addb852..866430bb024d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -86,6 +86,15 @@ struct vfsmount *alloc_vfsmnt(const char *name) return mnt; } +int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) +{ + mnt->mnt_sb = sb; + mnt->mnt_root = dget(sb->s_root); + return 0; +} + +EXPORT_SYMBOL(simple_set_mnt); + void free_vfsmnt(struct vfsmount *mnt) { kfree(mnt->mnt_devname); @@ -576,8 +585,8 @@ static int do_umount(struct vfsmount *mnt, int flags) */ lock_kernel(); - if ((flags & MNT_FORCE) && sb->s_op->umount_begin) - sb->s_op->umount_begin(sb); + if (sb->s_op->umount_begin) + sb->s_op->umount_begin(mnt, flags); unlock_kernel(); /* @@ -1163,13 +1172,46 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts, } /* + * go through the vfsmounts we've just consigned to the graveyard to + * - check that they're still dead + * - delete the vfsmount from the appropriate namespace under lock + * - dispose of the corpse + */ +static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts) +{ + struct namespace *namespace; + struct vfsmount *mnt; + + while (!list_empty(graveyard)) { + LIST_HEAD(umounts); + mnt = list_entry(graveyard->next, struct vfsmount, mnt_expire); + list_del_init(&mnt->mnt_expire); + + /* don't do anything if the namespace is dead - all the + * vfsmounts from it are going away anyway */ + namespace = mnt->mnt_namespace; + if (!namespace || !namespace->root) + continue; + get_namespace(namespace); + + spin_unlock(&vfsmount_lock); + down_write(&namespace_sem); + expire_mount(mnt, mounts, &umounts); + up_write(&namespace_sem); + release_mounts(&umounts); + mntput(mnt); + put_namespace(namespace); + spin_lock(&vfsmount_lock); + } +} + +/* * process a list of expirable mountpoints with the intent of discarding any * mountpoints that aren't in use and haven't been touched since last we came * here */ void mark_mounts_for_expiry(struct list_head *mounts) { - struct namespace *namespace; struct vfsmount *mnt, *next; LIST_HEAD(graveyard); @@ -1193,38 +1235,79 @@ void mark_mounts_for_expiry(struct list_head *mounts) list_move(&mnt->mnt_expire, &graveyard); } - /* - * go through the vfsmounts we've just consigned to the graveyard to - * - check that they're still dead - * - delete the vfsmount from the appropriate namespace under lock - * - dispose of the corpse - */ - while (!list_empty(&graveyard)) { - LIST_HEAD(umounts); - mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire); - list_del_init(&mnt->mnt_expire); + expire_mount_list(&graveyard, mounts); - /* don't do anything if the namespace is dead - all the - * vfsmounts from it are going away anyway */ - namespace = mnt->mnt_namespace; - if (!namespace || !namespace->root) + spin_unlock(&vfsmount_lock); +} + +EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); + +/* + * Ripoff of 'select_parent()' + * + * search the list of submounts for a given mountpoint, and move any + * shrinkable submounts to the 'graveyard' list. + */ +static int select_submounts(struct vfsmount *parent, struct list_head *graveyard) +{ + struct vfsmount *this_parent = parent; + struct list_head *next; + int found = 0; + +repeat: + next = this_parent->mnt_mounts.next; +resume: + while (next != &this_parent->mnt_mounts) { + struct list_head *tmp = next; + struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child); + + next = tmp->next; + if (!(mnt->mnt_flags & MNT_SHRINKABLE)) continue; - get_namespace(namespace); + /* + * Descend a level if the d_mounts list is non-empty. + */ + if (!list_empty(&mnt->mnt_mounts)) { + this_parent = mnt; + goto repeat; + } - spin_unlock(&vfsmount_lock); - down_write(&namespace_sem); - expire_mount(mnt, mounts, &umounts); - up_write(&namespace_sem); - release_mounts(&umounts); - mntput(mnt); - put_namespace(namespace); - spin_lock(&vfsmount_lock); + if (!propagate_mount_busy(mnt, 1)) { + mntget(mnt); + list_move_tail(&mnt->mnt_expire, graveyard); + found++; + } } + /* + * All done at this level ... ascend and resume the search + */ + if (this_parent != parent) { + next = this_parent->mnt_child.next; + this_parent = this_parent->mnt_parent; + goto resume; + } + return found; +} + +/* + * process a list of expirable mountpoints with the intent of discarding any + * submounts of a specific parent mountpoint + */ +void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts) +{ + LIST_HEAD(graveyard); + int found; + + spin_lock(&vfsmount_lock); + + /* extract submounts of 'mountpoint' from the expiration list */ + while ((found = select_submounts(mountpoint, &graveyard)) != 0) + expire_mount_list(&graveyard, mounts); spin_unlock(&vfsmount_lock); } -EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); +EXPORT_SYMBOL_GPL(shrink_submounts); /* * Some copy_from_user() implementations do not return the exact number of diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index a1f3e972c6ef..90d2ea28f333 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -39,7 +39,7 @@ static void ncp_delete_inode(struct inode *); static void ncp_put_super(struct super_block *); -static int ncp_statfs(struct super_block *, struct kstatfs *); +static int ncp_statfs(struct dentry *, struct kstatfs *); static kmem_cache_t * ncp_inode_cachep; @@ -724,13 +724,14 @@ static void ncp_put_super(struct super_block *sb) kfree(server); } -static int ncp_statfs(struct super_block *sb, struct kstatfs *buf) +static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) { struct dentry* d; struct inode* i; struct ncp_inode_info* ni; struct ncp_server* s; struct ncp_volume_info vi; + struct super_block *sb = dentry->d_sb; int err; __u8 dh; @@ -957,10 +958,10 @@ out: return result; } -static struct super_block *ncp_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int ncp_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, ncp_fill_super); + return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt); } static struct file_system_type ncp_fs_type = { diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index ec61fd56a1a9..0b572a0c1967 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -4,14 +4,16 @@ obj-$(CONFIG_NFS_FS) += nfs.o -nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ - proc.o read.o symlink.o unlink.o write.o +nfs-y := dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \ + proc.o read.o symlink.o unlink.o write.o \ + namespace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ - callback.o callback_xdr.o callback_proc.o + callback.o callback_xdr.o callback_proc.o \ + nfs4namespace.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-objs := $(nfs-y) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 90c95adc8c1b..d53f8c6a9ecb 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -182,8 +182,6 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) /* * Define NFS4 callback program */ -extern struct svc_version nfs4_callback_version1; - static struct svc_version *nfs4_callback_version[] = { [1] = &nfs4_callback_version1, }; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 05c38cf40b69..c92991328d9a 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -202,7 +202,7 @@ static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xd status = decode_fh(xdr, &args->fh); out: dprintk("%s: exit with status = %d\n", __FUNCTION__, status); - return 0; + return status; } static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cae74dd4c7f5..3ddda6f7ecc2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -528,7 +528,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) lock_kernel(); - res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + res = nfs_revalidate_mapping(inode, filp->f_mapping); if (res < 0) { unlock_kernel(); return res; @@ -868,6 +868,17 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) return (nd->intent.open.flags & O_EXCL) != 0; } +static inline int nfs_reval_fsid(struct inode *dir, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + struct nfs_server *server = NFS_SERVER(dir); + + if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) + /* Revalidate fsid on root dir */ + return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode); + return 0; +} + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct dentry *res; @@ -900,6 +911,11 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } + error = nfs_reval_fsid(dir, &fhandle, &fattr); + if (error < 0) { + res = ERR_PTR(error); + goto out_unlock; + } inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); res = (struct dentry *)inode; if (IS_ERR(res)) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 3c72b0c07283..402005c35ab3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -892,7 +892,7 @@ out: * nfs_init_directcache - create a slab cache for nfs_direct_req structures * */ -int nfs_init_directcache(void) +int __init nfs_init_directcache(void) { nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", sizeof(struct nfs_direct_req), @@ -906,10 +906,10 @@ int nfs_init_directcache(void) } /** - * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures + * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures * */ -void nfs_destroy_directcache(void) +void __exit nfs_destroy_directcache(void) { if (kmem_cache_destroy(nfs_direct_cachep)) printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n"); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index fade02c15e6e..add289138836 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -43,7 +43,7 @@ static int nfs_file_mmap(struct file *, struct vm_area_struct *); static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t); static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t); -static int nfs_file_flush(struct file *); +static int nfs_file_flush(struct file *, fl_owner_t id); static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); static int nfs_check_flags(int flags); static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); @@ -127,23 +127,6 @@ nfs_file_release(struct inode *inode, struct file *filp) } /** - * nfs_revalidate_file - Revalidate the page cache & related metadata - * @inode - pointer to inode struct - * @file - pointer to file - */ -static int nfs_revalidate_file(struct inode *inode, struct file *filp) -{ - struct nfs_inode *nfsi = NFS_I(inode); - int retval = 0; - - if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)) - || nfs_attribute_timeout(inode)) - retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - nfs_revalidate_mapping(inode, filp->f_mapping); - return 0; -} - -/** * nfs_revalidate_size - Revalidate the file size * @inode - pointer to inode struct * @file - pointer to struct file @@ -188,7 +171,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) * */ static int -nfs_file_flush(struct file *file) +nfs_file_flush(struct file *file, fl_owner_t id) { struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = file->f_dentry->d_inode; @@ -228,7 +211,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) pos); - result = nfs_revalidate_file(inode, iocb->ki_filp); + result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); @@ -247,7 +230,7 @@ nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_file(inode, filp); + res = nfs_revalidate_mapping(inode, filp->f_mapping); if (!res) res = generic_file_sendfile(filp, ppos, count, actor, target); return res; @@ -263,7 +246,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) dfprintk(VFS, "nfs: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - status = nfs_revalidate_file(inode, file); + status = nfs_revalidate_mapping(inode, file->f_mapping); if (!status) status = generic_file_mmap(file, vma); return status; @@ -320,7 +303,11 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse static void nfs_invalidate_page(struct page *page, unsigned long offset) { - /* FIXME: we really should cancel any unstarted writes on this page */ + struct inode *inode = page->mapping->host; + + /* Cancel any unstarted writes on this page */ + if (offset == 0) + nfs_sync_inode_wait(inode, page->index, 1, FLUSH_INVALIDATE); } static int nfs_release_page(struct page *page, gfp_t gfp) @@ -373,7 +360,6 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t if (result) goto out; } - nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); result = count; if (!count) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 3fab5b0cfc5a..b81e7ed3c902 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -47,7 +47,6 @@ #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> -#include <linux/nfs_fs_sb.h> #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d0b991a92327..51bc88b662fe 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -36,6 +36,8 @@ #include <linux/mount.h> #include <linux/nfs_idmap.h> #include <linux/vfs.h> +#include <linux/inet.h> +#include <linux/nfs_xdr.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -44,89 +46,17 @@ #include "callback.h" #include "delegation.h" #include "iostat.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_PARANOIA 1 -/* Maximum number of readahead requests - * FIXME: this should really be a sysctl so that users may tune it to suit - * their needs. People that do NFS over a slow network, might for - * instance want to reduce it to something closer to 1 for improved - * interactive response. - */ -#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) - static void nfs_invalidate_inode(struct inode *); static int nfs_update_inode(struct inode *, struct nfs_fattr *); -static struct inode *nfs_alloc_inode(struct super_block *sb); -static void nfs_destroy_inode(struct inode *); -static int nfs_write_inode(struct inode *,int); -static void nfs_delete_inode(struct inode *); -static void nfs_clear_inode(struct inode *); -static void nfs_umount_begin(struct super_block *); -static int nfs_statfs(struct super_block *, struct kstatfs *); -static int nfs_show_options(struct seq_file *, struct vfsmount *); -static int nfs_show_stats(struct seq_file *, struct vfsmount *); static void nfs_zap_acl_cache(struct inode *); -static struct rpc_program nfs_program; - -static struct super_operations nfs_sops = { - .alloc_inode = nfs_alloc_inode, - .destroy_inode = nfs_destroy_inode, - .write_inode = nfs_write_inode, - .delete_inode = nfs_delete_inode, - .statfs = nfs_statfs, - .clear_inode = nfs_clear_inode, - .umount_begin = nfs_umount_begin, - .show_options = nfs_show_options, - .show_stats = nfs_show_stats, -}; - -/* - * RPC cruft for NFS - */ -static struct rpc_stat nfs_rpcstat = { - .program = &nfs_program -}; -static struct rpc_version * nfs_version[] = { - NULL, - NULL, - &nfs_version2, -#if defined(CONFIG_NFS_V3) - &nfs_version3, -#elif defined(CONFIG_NFS_V4) - NULL, -#endif -#if defined(CONFIG_NFS_V4) - &nfs_version4, -#endif -}; - -static struct rpc_program nfs_program = { - .name = "nfs", - .number = NFS_PROGRAM, - .nrvers = ARRAY_SIZE(nfs_version), - .version = nfs_version, - .stats = &nfs_rpcstat, - .pipe_dir_name = "/nfs", -}; - -#ifdef CONFIG_NFS_V3_ACL -static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static struct rpc_version * nfsacl_version[] = { - [3] = &nfsacl_version3, -}; - -struct rpc_program nfsacl_program = { - .name = "nfsacl", - .number = NFS_ACL_PROGRAM, - .nrvers = ARRAY_SIZE(nfsacl_version), - .version = nfsacl_version, - .stats = &nfsacl_rpcstat, -}; -#endif /* CONFIG_NFS_V3_ACL */ +static kmem_cache_t * nfs_inode_cachep; static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) @@ -134,8 +64,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) return nfs_fileid_to_ino_t(fattr->fileid); } -static int -nfs_write_inode(struct inode *inode, int sync) +int nfs_write_inode(struct inode *inode, int sync) { int flags = sync ? FLUSH_SYNC : 0; int ret; @@ -146,31 +75,15 @@ nfs_write_inode(struct inode *inode, int sync) return 0; } -static void -nfs_delete_inode(struct inode * inode) +void nfs_clear_inode(struct inode *inode) { - dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); - - truncate_inode_pages(&inode->i_data, 0); + struct nfs_inode *nfsi = NFS_I(inode); + struct rpc_cred *cred; - nfs_wb_all(inode); /* * The following should never happen... */ - if (nfs_have_writebacks(inode)) { - printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); - } - - clear_inode(inode); -} - -static void -nfs_clear_inode(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - struct rpc_cred *cred; - - nfs_wb_all(inode); + BUG_ON(nfs_have_writebacks(inode)); BUG_ON (!list_empty(&nfsi->open_files)); nfs_zap_acl_cache(inode); cred = nfsi->cache_access.cred; @@ -179,554 +92,6 @@ nfs_clear_inode(struct inode *inode) BUG_ON(atomic_read(&nfsi->data_updates) != 0); } -void -nfs_umount_begin(struct super_block *sb) -{ - struct rpc_clnt *rpc = NFS_SB(sb)->client; - - /* -EIO all pending I/O */ - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); - rpc = NFS_SB(sb)->client_acl; - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); -} - - -static inline unsigned long -nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) -{ - /* make sure blocksize is a power of two */ - if ((bsize & (bsize - 1)) || nrbitsp) { - unsigned char nrbits; - - for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) - ; - bsize = 1 << nrbits; - if (nrbitsp) - *nrbitsp = nrbits; - } - - return bsize; -} - -/* - * Calculate the number of 512byte blocks used. - */ -static inline unsigned long -nfs_calc_block_size(u64 tsize) -{ - loff_t used = (tsize + 511) >> 9; - return (used > ULONG_MAX) ? ULONG_MAX : used; -} - -/* - * Compute and set NFS server blocksize - */ -static inline unsigned long -nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) -{ - if (bsize < NFS_MIN_FILE_IO_SIZE) - bsize = NFS_DEF_FILE_IO_SIZE; - else if (bsize >= NFS_MAX_FILE_IO_SIZE) - bsize = NFS_MAX_FILE_IO_SIZE; - - return nfs_block_bits(bsize, nrbitsp); -} - -/* - * Obtain the root inode of the file system. - */ -static struct inode * -nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) -{ - struct nfs_server *server = NFS_SB(sb); - int error; - - error = server->rpc_ops->getroot(server, rootfh, fsinfo); - if (error < 0) { - dprintk("nfs_get_root: getattr error = %d\n", -error); - return ERR_PTR(error); - } - - return nfs_fhget(sb, rootfh, fsinfo->fattr); -} - -/* - * Do NFS version-independent mount processing, and sanity checking - */ -static int -nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) -{ - struct nfs_server *server; - struct inode *root_inode; - struct nfs_fattr fattr; - struct nfs_fsinfo fsinfo = { - .fattr = &fattr, - }; - struct nfs_pathconf pathinfo = { - .fattr = &fattr, - }; - int no_root_error = 0; - unsigned long max_rpc_payload; - - /* We probably want something more informative here */ - snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - - server = NFS_SB(sb); - - sb->s_magic = NFS_SUPER_MAGIC; - - server->io_stats = nfs_alloc_iostats(); - if (server->io_stats == NULL) - return -ENOMEM; - - root_inode = nfs_get_root(sb, &server->fh, &fsinfo); - /* Did getting the root inode fail? */ - if (IS_ERR(root_inode)) { - no_root_error = PTR_ERR(root_inode); - goto out_no_root; - } - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) { - no_root_error = -ENOMEM; - goto out_no_root; - } - sb->s_root->d_op = server->rpc_ops->dentry_ops; - - /* mount time stamp, in seconds */ - server->mount_time = jiffies; - - /* Get some general file system info */ - if (server->namelen == 0 && - server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) - server->namelen = pathinfo.max_namelen; - /* Work out a lot of parameters */ - if (server->rsize == 0) - server->rsize = nfs_block_size(fsinfo.rtpref, NULL); - if (server->wsize == 0) - server->wsize = nfs_block_size(fsinfo.wtpref, NULL); - - if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax) - server->rsize = nfs_block_size(fsinfo.rtmax, NULL); - if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax) - server->wsize = nfs_block_size(fsinfo.wtmax, NULL); - - max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); - if (server->rsize > max_rpc_payload) - server->rsize = max_rpc_payload; - if (server->rsize > NFS_MAX_FILE_IO_SIZE) - server->rsize = NFS_MAX_FILE_IO_SIZE; - server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - if (server->wsize > max_rpc_payload) - server->wsize = max_rpc_payload; - if (server->wsize > NFS_MAX_FILE_IO_SIZE) - server->wsize = NFS_MAX_FILE_IO_SIZE; - server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - if (sb->s_blocksize == 0) - sb->s_blocksize = nfs_block_bits(server->wsize, - &sb->s_blocksize_bits); - server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); - - server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); - if (server->dtsize > PAGE_CACHE_SIZE) - server->dtsize = PAGE_CACHE_SIZE; - if (server->dtsize > server->rsize) - server->dtsize = server->rsize; - - if (server->flags & NFS_MOUNT_NOAC) { - server->acregmin = server->acregmax = 0; - server->acdirmin = server->acdirmax = 0; - sb->s_flags |= MS_SYNCHRONOUS; - } - server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - - sb->s_maxbytes = fsinfo.maxfilesize; - if (sb->s_maxbytes > MAX_LFS_FILESIZE) - sb->s_maxbytes = MAX_LFS_FILESIZE; - - server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; - server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; - - /* We're airborne Set socket buffersize */ - rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); - return 0; - /* Yargs. It didn't work out. */ -out_no_root: - dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error); - if (!IS_ERR(root_inode)) - iput(root_inode); - return no_root_error; -} - -static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) -{ - to->to_initval = timeo * HZ / 10; - to->to_retries = retrans; - if (!to->to_retries) - to->to_retries = 2; - - switch (proto) { - case IPPROTO_TCP: - if (!to->to_initval) - to->to_initval = 60 * HZ; - if (to->to_initval > NFS_MAX_TCP_TIMEOUT) - to->to_initval = NFS_MAX_TCP_TIMEOUT; - to->to_increment = to->to_initval; - to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); - to->to_exponential = 0; - break; - case IPPROTO_UDP: - default: - if (!to->to_initval) - to->to_initval = 11 * HZ / 10; - if (to->to_initval > NFS_MAX_UDP_TIMEOUT) - to->to_initval = NFS_MAX_UDP_TIMEOUT; - to->to_maxval = NFS_MAX_UDP_TIMEOUT; - to->to_exponential = 1; - break; - } -} - -/* - * Create an RPC client handle. - */ -static struct rpc_clnt * -nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) -{ - struct rpc_timeout timeparms; - struct rpc_xprt *xprt = NULL; - struct rpc_clnt *clnt = NULL; - int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; - - nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); - - server->retrans_timeo = timeparms.to_initval; - server->retrans_count = timeparms.to_retries; - - /* create transport and client */ - xprt = xprt_create_proto(proto, &server->addr, &timeparms); - if (IS_ERR(xprt)) { - dprintk("%s: cannot create RPC transport. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - return (struct rpc_clnt *)xprt; - } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, data->pseudoflavor); - if (IS_ERR(clnt)) { - dprintk("%s: cannot create RPC client. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - goto out_fail; - } - - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; - - return clnt; - -out_fail: - return clnt; -} - -/* - * The way this works is that the mount process passes a structure - * in the data argument which contains the server's IP address - * and the root file handle obtained from the server's mount - * daemon. We stash these away in the private superblock fields. - */ -static int -nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) -{ - struct nfs_server *server; - rpc_authflavor_t authflavor; - - server = NFS_SB(sb); - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - if (data->bsize) - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - /* Start lockd here, before we might error out */ - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - - server->namelen = data->namlen; - server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL); - if (!server->hostname) - return -ENOMEM; - strcpy(server->hostname, data->hostname); - - /* Check NFS protocol revision and initialize RPC op vector - * and file handle pool. */ -#ifdef CONFIG_NFS_V3 - if (server->flags & NFS_MOUNT_VER3) { - server->rpc_ops = &nfs_v3_clientops; - server->caps |= NFS_CAP_READDIRPLUS; - } else { - server->rpc_ops = &nfs_v2_clientops; - } -#else - server->rpc_ops = &nfs_v2_clientops; -#endif - - /* Fill in pseudoflavor for mount version < 5 */ - if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) - data->pseudoflavor = RPC_AUTH_UNIX; - authflavor = data->pseudoflavor; /* save for sb_init() */ - /* XXX maybe we want to add a server->pseudoflavor field */ - - /* Create RPC client handles */ - server->client = nfs_create_client(server, data); - if (IS_ERR(server->client)) - return PTR_ERR(server->client); - /* RFC 2623, sec 2.3.2 */ - if (authflavor != RPC_AUTH_UNIX) { - struct rpc_auth *auth; - - server->client_sys = rpc_clone_client(server->client); - if (IS_ERR(server->client_sys)) - return PTR_ERR(server->client_sys); - auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); - if (IS_ERR(auth)) - return PTR_ERR(auth); - } else { - atomic_inc(&server->client->cl_count); - server->client_sys = server->client; - } - if (server->flags & NFS_MOUNT_VER3) { -#ifdef CONFIG_NFS_V3_ACL - if (!(server->flags & NFS_MOUNT_NOACL)) { - server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); - /* No errors! Assume that Sun nfsacls are supported */ - if (!IS_ERR(server->client_acl)) - server->caps |= NFS_CAP_ACLS; - } -#else - server->flags &= ~NFS_MOUNT_NOACL; -#endif /* CONFIG_NFS_V3_ACL */ - /* - * The VFS shouldn't apply the umask to mode bits. We will - * do so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) - server->namelen = NFS3_MAXNAMLEN; - sb->s_time_gran = 1; - } else { - if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) - server->namelen = NFS2_MAXNAMLEN; - } - - sb->s_op = &nfs_sops; - return nfs_sb_init(sb, authflavor); -} - -static int -nfs_statfs(struct super_block *sb, struct kstatfs *buf) -{ - struct nfs_server *server = NFS_SB(sb); - unsigned char blockbits; - unsigned long blockres; - struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode); - struct nfs_fattr fattr; - struct nfs_fsstat res = { - .fattr = &fattr, - }; - int error; - - lock_kernel(); - - error = server->rpc_ops->statfs(server, rootfh, &res); - buf->f_type = NFS_SUPER_MAGIC; - if (error < 0) - goto out_err; - - /* - * Current versions of glibc do not correctly handle the - * case where f_frsize != f_bsize. Eventually we want to - * report the value of wtmult in this field. - */ - buf->f_frsize = sb->s_blocksize; - - /* - * On most *nix systems, f_blocks, f_bfree, and f_bavail - * are reported in units of f_frsize. Linux hasn't had - * an f_frsize field in its statfs struct until recently, - * thus historically Linux's sys_statfs reports these - * fields in units of f_bsize. - */ - buf->f_bsize = sb->s_blocksize; - blockbits = sb->s_blocksize_bits; - blockres = (1 << blockbits) - 1; - buf->f_blocks = (res.tbytes + blockres) >> blockbits; - buf->f_bfree = (res.fbytes + blockres) >> blockbits; - buf->f_bavail = (res.abytes + blockres) >> blockbits; - - buf->f_files = res.tfiles; - buf->f_ffree = res.afiles; - - buf->f_namelen = server->namelen; - out: - unlock_kernel(); - return 0; - - out_err: - dprintk("%s: statfs error = %d\n", __FUNCTION__, -error); - buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; - goto out; - -} - -static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) -{ - static struct proc_nfs_info { - int flag; - char *str; - char *nostr; - } nfs_info[] = { - { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_NOCTO, ",nocto", "" }, - { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", "" }, - { NFS_MOUNT_NOACL, ",noacl", "" }, - { 0, NULL, NULL } - }; - struct proc_nfs_info *nfs_infop; - char buf[12]; - char *proto; - - seq_printf(m, ",vers=%d", nfss->rpc_ops->version); - seq_printf(m, ",rsize=%d", nfss->rsize); - seq_printf(m, ",wsize=%d", nfss->wsize); - if (nfss->acregmin != 3*HZ || showdefaults) - seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ || showdefaults) - seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ || showdefaults) - seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ || showdefaults) - seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); - for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { - if (nfss->flags & nfs_infop->flag) - seq_puts(m, nfs_infop->str); - else - seq_puts(m, nfs_infop->nostr); - } - switch (nfss->client->cl_xprt->prot) { - case IPPROTO_TCP: - proto = "tcp"; - break; - case IPPROTO_UDP: - proto = "udp"; - break; - default: - snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot); - proto = buf; - } - seq_printf(m, ",proto=%s", proto); - seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); - seq_printf(m, ",retrans=%u", nfss->retrans_count); -} - -static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) -{ - struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); - - nfs_show_mount_options(m, nfss, 0); - - seq_puts(m, ",addr="); - seq_escape(m, nfss->hostname, " \t\n\\"); - - return 0; -} - -static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) -{ - int i, cpu; - struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); - struct rpc_auth *auth = nfss->client->cl_auth; - struct nfs_iostats totals = { }; - - seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); - - /* - * Display all mount option settings - */ - seq_printf(m, "\n\topts:\t"); - seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); - seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); - seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); - seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); - nfs_show_mount_options(m, nfss, 1); - - seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); - - seq_printf(m, "\n\tcaps:\t"); - seq_printf(m, "caps=0x%x", nfss->caps); - seq_printf(m, ",wtmult=%d", nfss->wtmult); - seq_printf(m, ",dtsize=%d", nfss->dtsize); - seq_printf(m, ",bsize=%d", nfss->bsize); - seq_printf(m, ",namelen=%d", nfss->namelen); - -#ifdef CONFIG_NFS_V4 - if (nfss->rpc_ops->version == 4) { - seq_printf(m, "\n\tnfsv4:\t"); - seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); - seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); - seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); - } -#endif - - /* - * Display security flavor in effect for this mount - */ - seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); - if (auth->au_flavor) - seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); - - /* - * Display superblock I/O counters - */ - for_each_possible_cpu(cpu) { - struct nfs_iostats *stats; - - preempt_disable(); - stats = per_cpu_ptr(nfss->io_stats, cpu); - - for (i = 0; i < __NFSIOS_COUNTSMAX; i++) - totals.events[i] += stats->events[i]; - for (i = 0; i < __NFSIOS_BYTESMAX; i++) - totals.bytes[i] += stats->bytes[i]; - - preempt_enable(); - } - - seq_printf(m, "\n\tevents:\t"); - for (i = 0; i < __NFSIOS_COUNTSMAX; i++) - seq_printf(m, "%lu ", totals.events[i]); - seq_printf(m, "\n\tbytes:\t"); - for (i = 0; i < __NFSIOS_BYTESMAX; i++) - seq_printf(m, "%Lu ", totals.bytes[i]); - seq_printf(m, "\n"); - - rpc_print_iostats(m, nfss->client); - - return 0; -} - /** * nfs_sync_mapping - helper to flush all mmapped dirty data to disk */ @@ -889,6 +254,14 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); + /* Deal with crossing mountpoints */ + if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { + if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) + inode->i_op = &nfs_referral_inode_operations; + else + inode->i_op = &nfs_mountpoint_inode_operations; + inode->i_fop = NULL; + } } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; else @@ -1207,6 +580,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); lock_kernel(); if (!inode || is_bad_inode(inode)) goto out_nowait; @@ -1220,7 +594,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) status = -ESTALE; /* Do we trust the cached ESTALE? */ if (NFS_ATTRTIMEO(inode) != 0) { - if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) { + if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) { /* no */ } else goto out; @@ -1251,8 +625,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } spin_unlock(&inode->i_lock); - nfs_revalidate_mapping(inode, inode->i_mapping); - if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); @@ -1286,8 +658,7 @@ int nfs_attribute_timeout(struct inode *inode) */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); - if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); @@ -1298,9 +669,16 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) * @inode - pointer to host inode * @mapping - pointer to mapping */ -void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); + int ret = 0; + + if (NFS_STALE(inode)) + ret = -ESTALE; + if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) + || nfs_attribute_timeout(inode)) + ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); @@ -1321,6 +699,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) inode->i_sb->s_id, (long long)NFS_FILEID(inode)); } + return ret; } /** @@ -1360,12 +739,6 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); - if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 - && nfsi->change_attr == fattr->pre_change_attr) { - nfsi->change_attr = fattr->change_attr; - nfsi->cache_change_attribute = jiffies; - } - /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_WCC) != 0) { if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { @@ -1399,9 +772,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat int data_unstable; - if ((fattr->valid & NFS_ATTR_FATTR) == 0) - return 0; - /* Has the inode gone and changed behind our back? */ if (nfsi->fileid != fattr->fileid || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { @@ -1414,20 +784,13 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); - if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) { - if (nfsi->change_attr == fattr->change_attr) - goto out; - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (!data_unstable) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && + nfsi->change_attr != fattr->change_attr) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Verify a few of the more important attributes */ - if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (!data_unstable) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); @@ -1444,7 +807,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (inode->i_nlink != fattr->nlink) nfsi->cache_validity |= NFS_INO_INVALID_ATTR; -out: if (!timespec_equal(&inode->i_atime, &fattr->atime)) nfsi->cache_validity |= NFS_INO_INVALID_ATIME; @@ -1470,7 +832,6 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; spin_lock(&inode->i_lock); - nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; if (time_after(fattr->time_start, nfsi->last_updated)) status = nfs_update_inode(inode, fattr); else @@ -1495,7 +856,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) spin_lock(&inode->i_lock); if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + nfsi->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; goto out; } status = nfs_update_inode(inode, fattr); @@ -1518,6 +879,7 @@ out: */ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) { + struct nfs_server *server; struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_isize, new_isize; unsigned int invalid = 0; @@ -1527,9 +889,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) __FUNCTION__, inode->i_sb->s_id, inode->i_ino, atomic_read(&inode->i_count), fattr->valid); - if ((fattr->valid & NFS_ATTR_FATTR) == 0) - return 0; - if (nfsi->fileid != fattr->fileid) goto out_fileid; @@ -1539,6 +898,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; + server = NFS_SERVER(inode); + /* Update the fsid if and only if this is the root directory */ + if (inode == inode->i_sb->s_root->d_inode + && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) + server->fsid = fattr->fsid; + /* * Update the read time so we don't revalidate too often. */ @@ -1548,7 +913,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Are we racing with known updates of the metadata on the server? */ data_stable = nfs_verify_change_attribute(inode, fattr->time_start); if (data_stable) - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATIME); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); @@ -1612,15 +977,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blksize = fattr->du.nfs2.blocksize; } - if ((fattr->valid & NFS_ATTR_FATTR_V4)) { - if (nfsi->change_attr != fattr->change_attr) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - nfsi->change_attr = fattr->change_attr; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfsi->cache_change_attribute = jiffies; - } else - invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA); + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && + nfsi->change_attr != fattr->change_attr) { + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + nfsi->change_attr = fattr->change_attr; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfsi->cache_change_attribute = jiffies; } /* Update attrtimeo value if we're out of the unstable period */ @@ -1668,190 +1031,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) goto out_err; } -/* - * File system information - */ - -static int nfs_set_super(struct super_block *s, void *data) -{ - s->s_fs_info = data; - return set_anon_super(s, data); -} - -static int nfs_compare_super(struct super_block *sb, void *data) -{ - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); - - if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr) - return 0; - if (old->addr.sin_port != server->addr.sin_port) - return 0; - return !nfs_compare_fh(&old->fh, &server->fh); -} - -static struct super_block *nfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - int error; - struct nfs_server *server = NULL; - struct super_block *s; - struct nfs_fh *root; - struct nfs_mount_data *data = raw_data; - - s = ERR_PTR(-EINVAL); - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - goto out_err; - } - if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - goto out_err; - } - switch (data->version) { - case 1: - data->namlen = 0; - case 2: - data->bsize = 0; - case 3: - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: mount structure version %d does not support NFSv3\n", - __FUNCTION__, - data->version); - goto out_err; - } - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - case 4: - if (data->flags & NFS_MOUNT_SECFLAVOUR) { - dprintk("%s: mount structure version %d does not support strong security\n", - __FUNCTION__, - data->version); - goto out_err; - } - case 5: - memset(data->context, 0, sizeof(data->context)); - } -#ifndef CONFIG_NFS_V3 - /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ - s = ERR_PTR(-EPROTONOSUPPORT); - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); - goto out_err; - } -#endif /* CONFIG_NFS_V3 */ - - s = ERR_PTR(-ENOMEM); - server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) - goto out_err; - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); - - root = &server->fh; - if (data->flags & NFS_MOUNT_VER3) - root->size = data->root.size; - else - root->size = NFS2_FHSIZE; - s = ERR_PTR(-EINVAL); - if (root->size > sizeof(root->data)) { - dprintk("%s: invalid root filehandle\n", __FUNCTION__); - goto out_err; - } - memcpy(root->data, data->root.data, root->size); - - /* We now require that the mount process passes the remote address */ - memcpy(&server->addr, &data->addr, sizeof(server->addr)); - if (server->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote address!\n", - __FUNCTION__); - goto out_err; - } - - /* Fire up rpciod if not yet running */ - s = ERR_PTR(rpciod_up()); - if (IS_ERR(s)) { - dprintk("%s: couldn't start rpciod! Error = %ld\n", - __FUNCTION__, PTR_ERR(s)); - goto out_err; - } - - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); - if (IS_ERR(s) || s->s_root) - goto out_rpciod_down; - - s->s_flags = flags; - - error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return ERR_PTR(error); - } - s->s_flags |= MS_ACTIVE; - return s; -out_rpciod_down: - rpciod_down(); -out_err: - kfree(server); - return s; -} - -static void nfs_kill_super(struct super_block *s) -{ - struct nfs_server *server = NFS_SB(s); - - kill_anon_super(s); - - if (!IS_ERR(server->client)) - rpc_shutdown_client(server->client); - if (!IS_ERR(server->client_sys)) - rpc_shutdown_client(server->client_sys); - if (!IS_ERR(server->client_acl)) - rpc_shutdown_client(server->client_acl); - - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_down(); /* release rpc.lockd */ - - rpciod_down(); /* release rpciod */ - - nfs_free_iostats(server->io_stats); - kfree(server->hostname); - kfree(server); -} - -static struct file_system_type nfs_fs_type = { - .owner = THIS_MODULE, - .name = "nfs", - .get_sb = nfs_get_sb, - .kill_sb = nfs_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; #ifdef CONFIG_NFS_V4 -static void nfs4_clear_inode(struct inode *); - - -static struct super_operations nfs4_sops = { - .alloc_inode = nfs_alloc_inode, - .destroy_inode = nfs_destroy_inode, - .write_inode = nfs_write_inode, - .delete_inode = nfs_delete_inode, - .statfs = nfs_statfs, - .clear_inode = nfs4_clear_inode, - .umount_begin = nfs_umount_begin, - .show_options = nfs_show_options, - .show_stats = nfs_show_stats, -}; - /* * Clean out any remaining NFSv4 state that might be left over due * to open() calls that passed nfs_atomic_lookup, but failed to call * nfs_open(). */ -static void nfs4_clear_inode(struct inode *inode) +void nfs4_clear_inode(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); @@ -1875,357 +1063,9 @@ static void nfs4_clear_inode(struct inode *inode) nfs4_close_state(state, state->state); } } - - -static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) -{ - struct nfs_server *server; - struct nfs4_client *clp = NULL; - struct rpc_xprt *xprt = NULL; - struct rpc_clnt *clnt = NULL; - struct rpc_timeout timeparms; - rpc_authflavor_t authflavour; - int err = -EIO; - - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - server = NFS_SB(sb); - if (data->rsize != 0) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize != 0) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - server->caps = NFS_CAP_ATOMIC_OPEN; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - server->rpc_ops = &nfs_v4_clientops; - - nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); - - server->retrans_timeo = timeparms.to_initval; - server->retrans_count = timeparms.to_retries; - - clp = nfs4_get_client(&server->addr.sin_addr); - if (!clp) { - dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); - return -EIO; - } - - /* Now create transport and client */ - authflavour = RPC_AUTH_UNIX; - if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen != 1) { - dprintk("%s: Invalid number of RPC auth flavours %d.\n", - __FUNCTION__, data->auth_flavourlen); - err = -EINVAL; - goto out_fail; - } - if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { - err = -EFAULT; - goto out_fail; - } - } - - down_write(&clp->cl_sem); - if (IS_ERR(clp->cl_rpcclient)) { - xprt = xprt_create_proto(data->proto, &server->addr, &timeparms); - if (IS_ERR(xprt)) { - up_write(&clp->cl_sem); - err = PTR_ERR(xprt); - dprintk("%s: cannot create RPC transport. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, authflavour); - if (IS_ERR(clnt)) { - up_write(&clp->cl_sem); - err = PTR_ERR(clnt); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; - clp->cl_rpcclient = clnt; - memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); - nfs_idmap_new(clp); - } - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); - clnt = rpc_clone_client(clp->cl_rpcclient); - if (!IS_ERR(clnt)) - server->nfs4_state = clp; - up_write(&clp->cl_sem); - clp = NULL; - - if (IS_ERR(clnt)) { - err = PTR_ERR(clnt); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - return err; - } - - server->client = clnt; - - if (server->nfs4_state->cl_idmap == NULL) { - dprintk("%s: failed to create idmapper.\n", __FUNCTION__); - return -ENOMEM; - } - - if (clnt->cl_auth->au_flavor != authflavour) { - struct rpc_auth *auth; - - auth = rpcauth_create(authflavour, clnt); - if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __FUNCTION__); - return PTR_ERR(auth); - } - } - - sb->s_time_gran = 1; - - sb->s_op = &nfs4_sops; - err = nfs_sb_init(sb, authflavour); - if (err == 0) - return 0; -out_fail: - if (clp) - nfs4_put_client(clp); - return err; -} - -static int nfs4_compare_super(struct super_block *sb, void *data) -{ - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); - - if (strcmp(server->hostname, old->hostname) != 0) - return 0; - if (strcmp(server->mnt_path, old->mnt_path) != 0) - return 0; - return 1; -} - -static void * -nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) -{ - void *p = NULL; - - if (!src->len) - return ERR_PTR(-EINVAL); - if (src->len < maxlen) - maxlen = src->len; - if (dst == NULL) { - p = dst = kmalloc(maxlen + 1, GFP_KERNEL); - if (p == NULL) - return ERR_PTR(-ENOMEM); - } - if (copy_from_user(dst, src->data, maxlen)) { - kfree(p); - return ERR_PTR(-EFAULT); - } - dst[maxlen] = '\0'; - return dst; -} - -static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - int error; - struct nfs_server *server; - struct super_block *s; - struct nfs4_mount_data *data = raw_data; - void *p; - - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - return ERR_PTR(-EINVAL); - } - if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - return ERR_PTR(-EINVAL); - } - - server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) - return ERR_PTR(-ENOMEM); - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); - - p = nfs_copy_user_string(NULL, &data->hostname, 256); - if (IS_ERR(p)) - goto out_err; - server->hostname = p; - - p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); - if (IS_ERR(p)) - goto out_err; - server->mnt_path = p; - - p = nfs_copy_user_string(server->ip_addr, &data->client_addr, - sizeof(server->ip_addr) - 1); - if (IS_ERR(p)) - goto out_err; - - /* We now require that the mount process passes the remote address */ - if (data->host_addrlen != sizeof(server->addr)) { - s = ERR_PTR(-EINVAL); - goto out_free; - } - if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { - s = ERR_PTR(-EFAULT); - goto out_free; - } - if (server->addr.sin_family != AF_INET || - server->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote IP address!\n", - __FUNCTION__); - s = ERR_PTR(-EINVAL); - goto out_free; - } - - /* Fire up rpciod if not yet running */ - s = ERR_PTR(rpciod_up()); - if (IS_ERR(s)) { - dprintk("%s: couldn't start rpciod! Error = %ld\n", - __FUNCTION__, PTR_ERR(s)); - goto out_free; - } - - s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); - - if (IS_ERR(s) || s->s_root) - goto out_free; - - s->s_flags = flags; - - error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return ERR_PTR(error); - } - s->s_flags |= MS_ACTIVE; - return s; -out_err: - s = (struct super_block *)p; -out_free: - kfree(server->mnt_path); - kfree(server->hostname); - kfree(server); - return s; -} - -static void nfs4_kill_super(struct super_block *sb) -{ - struct nfs_server *server = NFS_SB(sb); - - nfs_return_all_delegations(sb); - kill_anon_super(sb); - - nfs4_renewd_prepare_shutdown(server); - - if (server->client != NULL && !IS_ERR(server->client)) - rpc_shutdown_client(server->client); - - destroy_nfsv4_state(server); - - rpciod_down(); - - nfs_free_iostats(server->io_stats); - kfree(server->hostname); - kfree(server); -} - -static struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .get_sb = nfs4_get_sb, - .kill_sb = nfs4_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -static const int nfs_set_port_min = 0; -static const int nfs_set_port_max = 65535; -static int param_set_port(const char *val, struct kernel_param *kp) -{ - char *endp; - int num = simple_strtol(val, &endp, 0); - if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) - return -EINVAL; - *((int *)kp->arg) = num; - return 0; -} - -module_param_call(callback_tcpport, param_set_port, param_get_int, - &nfs_callback_set_tcpport, 0644); - -static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) -{ - char *endp; - int num = simple_strtol(val, &endp, 0); - int jif = num * HZ; - if (endp == val || *endp || num < 0 || jif < num) - return -EINVAL; - *((int *)kp->arg) = jif; - return 0; -} - -module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, - &nfs_idmap_cache_timeout, 0644); - -#define nfs4_init_once(nfsi) \ - do { \ - INIT_LIST_HEAD(&(nfsi)->open_states); \ - nfsi->delegation = NULL; \ - nfsi->delegation_state = 0; \ - init_rwsem(&nfsi->rwsem); \ - } while(0) - -static inline int register_nfs4fs(void) -{ - int ret; - - ret = nfs_register_sysctl(); - if (ret != 0) - return ret; - ret = register_filesystem(&nfs4_fs_type); - if (ret != 0) - nfs_unregister_sysctl(); - return ret; -} - -static inline void unregister_nfs4fs(void) -{ - unregister_filesystem(&nfs4_fs_type); - nfs_unregister_sysctl(); -} -#else -#define nfs4_init_once(nfsi) \ - do { } while (0) -#define register_nfs4fs() (0) -#define unregister_nfs4fs() #endif -extern int nfs_init_nfspagecache(void); -extern void nfs_destroy_nfspagecache(void); -extern int nfs_init_readpagecache(void); -extern void nfs_destroy_readpagecache(void); -extern int nfs_init_writepagecache(void); -extern void nfs_destroy_writepagecache(void); -#ifdef CONFIG_NFS_DIRECTIO -extern int nfs_init_directcache(void); -extern void nfs_destroy_directcache(void); -#endif - -static kmem_cache_t * nfs_inode_cachep; - -static struct inode *nfs_alloc_inode(struct super_block *sb) +struct inode *nfs_alloc_inode(struct super_block *sb) { struct nfs_inode *nfsi; nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); @@ -2244,11 +1084,21 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) return &nfsi->vfs_inode; } -static void nfs_destroy_inode(struct inode *inode) +void nfs_destroy_inode(struct inode *inode) { kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } +static inline void nfs4_init_once(struct nfs_inode *nfsi) +{ +#ifdef CONFIG_NFS_V4 + INIT_LIST_HEAD(&nfsi->open_states); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + init_rwsem(&nfsi->rwsem); +#endif +} + static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) { struct nfs_inode *nfsi = (struct nfs_inode *) foo; @@ -2269,7 +1119,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) } } -static int nfs_init_inodecache(void) +static int __init nfs_init_inodecache(void) { nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", sizeof(struct nfs_inode), @@ -2282,7 +1132,7 @@ static int nfs_init_inodecache(void) return 0; } -static void nfs_destroy_inodecache(void) +static void __exit nfs_destroy_inodecache(void) { if (kmem_cache_destroy(nfs_inode_cachep)) printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n"); @@ -2311,29 +1161,22 @@ static int __init init_nfs_fs(void) if (err) goto out1; -#ifdef CONFIG_NFS_DIRECTIO err = nfs_init_directcache(); if (err) goto out0; -#endif #ifdef CONFIG_PROC_FS rpc_proc_register(&nfs_rpcstat); #endif - err = register_filesystem(&nfs_fs_type); - if (err) - goto out; - if ((err = register_nfs4fs()) != 0) + if ((err = register_nfs_fs()) != 0) goto out; return 0; out: #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif -#ifdef CONFIG_NFS_DIRECTIO nfs_destroy_directcache(); out0: -#endif nfs_destroy_writepagecache(); out1: nfs_destroy_readpagecache(); @@ -2347,9 +1190,7 @@ out4: static void __exit exit_nfs_fs(void) { -#ifdef CONFIG_NFS_DIRECTIO nfs_destroy_directcache(); -#endif nfs_destroy_writepagecache(); nfs_destroy_readpagecache(); nfs_destroy_inodecache(); @@ -2357,8 +1198,7 @@ static void __exit exit_nfs_fs(void) #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif - unregister_filesystem(&nfs_fs_type); - unregister_nfs4fs(); + unregister_nfs_fs(); } /* Not quite true; I just maintain it */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h new file mode 100644 index 000000000000..bd2815e2dec1 --- /dev/null +++ b/fs/nfs/internal.h @@ -0,0 +1,186 @@ +/* + * NFS internal definitions + */ + +#include <linux/mount.h> + +struct nfs_clone_mount { + const struct super_block *sb; + const struct dentry *dentry; + struct nfs_fh *fh; + struct nfs_fattr *fattr; + char *hostname; + char *mnt_path; + struct sockaddr_in *addr; + rpc_authflavor_t authflavor; +}; + +/* namespace-nfs4.c */ +#ifdef CONFIG_NFS_V4 +extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); +#else +static inline +struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) +{ + return ERR_PTR(-ENOENT); +} +#endif + +/* callback_xdr.c */ +extern struct svc_version nfs4_callback_version1; + +/* pagelist.c */ +extern int __init nfs_init_nfspagecache(void); +extern void __exit nfs_destroy_nfspagecache(void); +extern int __init nfs_init_readpagecache(void); +extern void __exit nfs_destroy_readpagecache(void); +extern int __init nfs_init_writepagecache(void); +extern void __exit nfs_destroy_writepagecache(void); + +#ifdef CONFIG_NFS_DIRECTIO +extern int __init nfs_init_directcache(void); +extern void __exit nfs_destroy_directcache(void); +#else +#define nfs_init_directcache() (0) +#define nfs_destroy_directcache() do {} while(0) +#endif + +/* nfs2xdr.c */ +extern struct rpc_procinfo nfs_procedures[]; +extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); + +/* nfs3xdr.c */ +extern struct rpc_procinfo nfs3_procedures[]; +extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); + +/* nfs4xdr.c */ +extern int nfs_stat_to_errno(int); +extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); + +/* nfs4proc.c */ +#ifdef CONFIG_NFS_V4 +extern struct rpc_procinfo nfs4_procedures[]; + +extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, + struct nfs4_fs_locations *fs_locations, + struct page *page); +#endif + +/* inode.c */ +extern struct inode *nfs_alloc_inode(struct super_block *sb); +extern void nfs_destroy_inode(struct inode *); +extern int nfs_write_inode(struct inode *,int); +extern void nfs_clear_inode(struct inode *); +#ifdef CONFIG_NFS_V4 +extern void nfs4_clear_inode(struct inode *); +#endif + +/* super.c */ +extern struct file_system_type nfs_referral_nfs4_fs_type; +extern struct file_system_type clone_nfs_fs_type; +#ifdef CONFIG_NFS_V4 +extern struct file_system_type clone_nfs4_fs_type; +#endif +#ifdef CONFIG_PROC_FS +extern struct rpc_stat nfs_rpcstat; +#endif +extern int __init register_nfs_fs(void); +extern void __exit unregister_nfs_fs(void); + +/* namespace.c */ +extern char *nfs_path(const char *base, const struct dentry *dentry, + char *buffer, ssize_t buflen); + +/* + * Determine the mount path as a string + */ +static inline char * +nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) +{ +#ifdef CONFIG_NFS_V4 + return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); +#else + return NULL; +#endif +} + +/* + * Determine the device name as a string + */ +static inline char *nfs_devname(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); +} + +/* + * Determine the actual block size (and log2 thereof) + */ +static inline +unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) +{ + /* make sure blocksize is a power of two */ + if ((bsize & (bsize - 1)) || nrbitsp) { + unsigned char nrbits; + + for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) + ; + bsize = 1 << nrbits; + if (nrbitsp) + *nrbitsp = nrbits; + } + + return bsize; +} + +/* + * Calculate the number of 512byte blocks used. + */ +static inline unsigned long nfs_calc_block_size(u64 tsize) +{ + loff_t used = (tsize + 511) >> 9; + return (used > ULONG_MAX) ? ULONG_MAX : used; +} + +/* + * Compute and set NFS server blocksize + */ +static inline +unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) +{ + if (bsize < NFS_MIN_FILE_IO_SIZE) + bsize = NFS_DEF_FILE_IO_SIZE; + else if (bsize >= NFS_MAX_FILE_IO_SIZE) + bsize = NFS_MAX_FILE_IO_SIZE; + + return nfs_block_bits(bsize, nrbitsp); +} + +/* + * Determine the maximum file size for a superblock + */ +static inline +void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) +{ + sb->s_maxbytes = (loff_t)maxfilesize; + if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) + sb->s_maxbytes = MAX_LFS_FILESIZE; +} + +/* + * Check if the string represents a "valid" IPv4 address + */ +static inline int valid_ipaddr4(const char *buf) +{ + int rc, count, in[4]; + + rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); + if (rc != 4) + return -EINVAL; + for (count = 0; count < 4; count++) { + if (in[count] > 255) + return -EINVAL; + } + return 0; +} diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c new file mode 100644 index 000000000000..19b98ca468eb --- /dev/null +++ b/fs/nfs/namespace.c @@ -0,0 +1,229 @@ +/* + * linux/fs/nfs/namespace.c + * + * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com> + * + * NFS namespace + */ + +#include <linux/config.h> + +#include <linux/dcache.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/nfs_fs.h> +#include <linux/string.h> +#include <linux/sunrpc/clnt.h> +#include <linux/vfs.h> +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +static void nfs_expire_automounts(void *list); + +LIST_HEAD(nfs_automount_list); +static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list); +int nfs_mountpoint_expiry_timeout = 500 * HZ; + +/* + * nfs_path - reconstruct the path given an arbitrary dentry + * @base - arbitrary string to prepend to the path + * @dentry - pointer to dentry + * @buffer - result buffer + * @buflen - length of buffer + * + * Helper function for constructing the path from the + * root dentry to an arbitrary hashed dentry. + * + * This is mainly for use in figuring out the path on the + * server side when automounting on top of an existing partition. + */ +char *nfs_path(const char *base, const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + char *end = buffer+buflen; + int namelen; + + *--end = '\0'; + buflen--; + spin_lock(&dcache_lock); + while (!IS_ROOT(dentry)) { + namelen = dentry->d_name.len; + buflen -= namelen + 1; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, dentry->d_name.name, namelen); + *--end = '/'; + dentry = dentry->d_parent; + } + spin_unlock(&dcache_lock); + namelen = strlen(base); + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + buflen -= namelen; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, base, namelen); + return end; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +/* + * nfs_follow_mountpoint - handle crossing a mountpoint on the server + * @dentry - dentry of mountpoint + * @nd - nameidata info + * + * When we encounter a mountpoint on the server, we want to set up + * a mountpoint on the client too, to prevent inode numbers from + * colliding, and to allow "df" to work properly. + * On NFSv4, we also want to allow for the fact that different + * filesystems may be migrated to different servers in a failover + * situation, and that different filesystems may want to use + * different security flavours. + */ +static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) +{ + struct vfsmount *mnt; + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct dentry *parent; + struct nfs_fh fh; + struct nfs_fattr fattr; + int err; + + BUG_ON(IS_ROOT(dentry)); + dprintk("%s: enter\n", __FUNCTION__); + dput(nd->dentry); + nd->dentry = dget(dentry); + if (d_mountpoint(nd->dentry)) + goto out_follow; + /* Look it up again */ + parent = dget_parent(nd->dentry); + err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr); + dput(parent); + if (err != 0) + goto out_err; + + if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) + mnt = nfs_do_refmount(nd->mnt, nd->dentry); + else + mnt = nfs_do_submount(nd->mnt, nd->dentry, &fh, &fattr); + err = PTR_ERR(mnt); + if (IS_ERR(mnt)) + goto out_err; + + mntget(mnt); + err = do_add_mount(mnt, nd, nd->mnt->mnt_flags|MNT_SHRINKABLE, &nfs_automount_list); + if (err < 0) { + mntput(mnt); + if (err == -EBUSY) + goto out_follow; + goto out_err; + } + mntput(nd->mnt); + dput(nd->dentry); + nd->mnt = mnt; + nd->dentry = dget(mnt->mnt_root); + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); +out: + dprintk("%s: done, returned %d\n", __FUNCTION__, err); + return ERR_PTR(err); +out_err: + path_release(nd); + goto out; +out_follow: + while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) + ; + err = 0; + goto out; +} + +struct inode_operations nfs_mountpoint_inode_operations = { + .follow_link = nfs_follow_mountpoint, + .getattr = nfs_getattr, +}; + +struct inode_operations nfs_referral_inode_operations = { + .follow_link = nfs_follow_mountpoint, +}; + +static void nfs_expire_automounts(void *data) +{ + struct list_head *list = (struct list_head *)data; + + mark_mounts_for_expiry(list); + if (!list_empty(list)) + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); +} + +void nfs_release_automount_timer(void) +{ + if (list_empty(&nfs_automount_list)) { + cancel_delayed_work(&nfs_automount_task); + flush_scheduled_work(); + } +} + +/* + * Clone a mountpoint of the appropriate type + */ +static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, + struct nfs_clone_mount *mountdata) +{ +#ifdef CONFIG_NFS_V4 + struct vfsmount *mnt = NULL; + switch (server->rpc_ops->version) { + case 2: + case 3: + mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); + break; + case 4: + mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata); + } + return mnt; +#else + return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); +#endif +} + +/** + * nfs_do_submount - set up mountpoint when crossing a filesystem boundary + * @mnt_parent - mountpoint of parent directory + * @dentry - parent directory + * @fh - filehandle for new root dentry + * @fattr - attributes for new root inode + * + */ +struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs_clone_mount mountdata = { + .sb = mnt_parent->mnt_sb, + .dentry = dentry, + .fh = fh, + .fattr = fattr, + }; + struct vfsmount *mnt = ERR_PTR(-ENOMEM); + char *page = (char *) __get_free_page(GFP_USER); + char *devname; + + dprintk("%s: submounting on %s/%s\n", __FUNCTION__, + dentry->d_parent->d_name.name, + dentry->d_name.name); + if (page == NULL) + goto out; + devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); + mnt = (struct vfsmount *)devname; + if (IS_ERR(devname)) + goto free_page; + mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, &mountdata); +free_page: + free_page((unsigned long)page); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index f0015fa876e1..67391eef6b93 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -23,12 +23,11 @@ #include <linux/nfs.h> #include <linux/nfs2.h> #include <linux/nfs_fs.h> +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR /* #define NFS_PARANOIA 1 */ -extern int nfs_stat_to_errno(int stat); - /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO @@ -131,7 +130,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) fattr->du.nfs2.blocksize = ntohl(*p++); rdev = ntohl(*p++); fattr->du.nfs2.blocks = ntohl(*p++); - fattr->fsid_u.nfs3 = ntohl(*p++); + fattr->fsid.major = ntohl(*p++); + fattr->fsid.minor = 0; fattr->fileid = ntohl(*p++); p = xdr_decode_time(p, &fattr->atime); p = xdr_decode_time(p, &fattr->mtime); diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 33287879bd23..7322da4d2055 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -172,8 +172,10 @@ static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl, inode->i_ino, acl, dfacl); spin_lock(&inode->i_lock); __nfs3_forget_cached_acls(NFS_I(inode)); - nfsi->acl_access = posix_acl_dup(acl); - nfsi->acl_default = posix_acl_dup(dfacl); + if (!IS_ERR(acl)) + nfsi->acl_access = posix_acl_dup(acl); + if (!IS_ERR(dfacl)) + nfsi->acl_default = posix_acl_dup(dfacl); spin_unlock(&inode->i_lock); } @@ -254,7 +256,9 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) res.acl_access = NULL; } } - nfs3_cache_acls(inode, res.acl_access, res.acl_default); + nfs3_cache_acls(inode, + (res.mask & NFS_ACL) ? res.acl_access : ERR_PTR(-EINVAL), + (res.mask & NFS_DFACL) ? res.acl_default : ERR_PTR(-EINVAL)); switch(type) { case ACL_TYPE_ACCESS: @@ -329,6 +333,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, switch (status) { case 0: status = nfs_refresh_inode(inode, &fattr); + nfs3_cache_acls(inode, acl, dfacl); break; case -EPFNOSUPPORT: case -EPROTONOSUPPORT: diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index cf186f0d2b3b..7143b1f82cea 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -20,11 +20,10 @@ #include <linux/nfs_mount.h> #include "iostat.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_PROC -extern struct rpc_procinfo nfs3_procedures[]; - /* A wrapper to handle the EJUKEBOX error message */ static int nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) @@ -809,8 +808,6 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); - static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index ec233619687e..0250269e9753 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -22,14 +22,13 @@ #include <linux/nfs3.h> #include <linux/nfs_fs.h> #include <linux/nfsacl.h> +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO -extern int nfs_stat_to_errno(int); - /* * Declare the space requirements for NFS arguments and replies as * number of 32bit-words @@ -166,7 +165,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor) fattr->rdev = 0; - p = xdr_decode_hyper(p, &fattr->fsid_u.nfs3); + p = xdr_decode_hyper(p, &fattr->fsid.major); + fattr->fsid.minor = 0; p = xdr_decode_hyper(p, &fattr->fileid); p = xdr_decode_time3(p, &fattr->atime); p = xdr_decode_time3(p, &fattr->mtime); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0f5e4e7cddec..9a102860df37 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -217,6 +217,9 @@ extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *); extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); +extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); +extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, + struct nfs4_fs_locations *fs_locations, struct page *page); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; @@ -225,6 +228,7 @@ extern const u32 nfs4_fattr_bitmap[2]; extern const u32 nfs4_statfs_bitmap[2]; extern const u32 nfs4_pathconf_bitmap[2]; extern const u32 nfs4_fsinfo_bitmap[2]; +extern const u32 nfs4_fs_locations_bitmap[2]; /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs4_client *); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c new file mode 100644 index 000000000000..ea38d27b74e6 --- /dev/null +++ b/fs/nfs/nfs4namespace.c @@ -0,0 +1,201 @@ +/* + * linux/fs/nfs/nfs4namespace.c + * + * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com> + * + * NFSv4 namespace + */ + +#include <linux/config.h> + +#include <linux/dcache.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/nfs_fs.h> +#include <linux/string.h> +#include <linux/sunrpc/clnt.h> +#include <linux/vfs.h> +#include <linux/inet.h> +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +/* + * Check if fs_root is valid + */ +static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, + char *buffer, ssize_t buflen) +{ + char *end = buffer + buflen; + int n; + + *--end = '\0'; + buflen--; + + n = pathname->ncomponents; + while (--n >= 0) { + struct nfs4_string *component = &pathname->components[n]; + buflen -= component->len + 1; + if (buflen < 0) + goto Elong; + end -= component->len; + memcpy(end, component->data, component->len); + *--end = '/'; + } + return end; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + + +/** + * nfs_follow_referral - set up mountpoint when hitting a referral on moved error + * @mnt_parent - mountpoint of parent directory + * @dentry - parent directory + * @fspath - fs path returned in fs_locations + * @mntpath - mount path to new server + * @hostname - hostname of new server + * @addr - host addr of new server + * + */ +static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + struct nfs4_fs_locations *locations) +{ + struct vfsmount *mnt = ERR_PTR(-ENOENT); + struct nfs_clone_mount mountdata = { + .sb = mnt_parent->mnt_sb, + .dentry = dentry, + .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, + }; + char *page, *page2; + char *path, *fs_path; + char *devname; + int loc, s; + + if (locations == NULL || locations->nlocations <= 0) + goto out; + + dprintk("%s: referral at %s/%s\n", __FUNCTION__, + dentry->d_parent->d_name.name, dentry->d_name.name); + + /* Ensure fs path is a prefix of current dentry path */ + page = (char *) __get_free_page(GFP_USER); + if (page == NULL) + goto out; + page2 = (char *) __get_free_page(GFP_USER); + if (page2 == NULL) + goto out; + + path = nfs4_path(dentry, page, PAGE_SIZE); + if (IS_ERR(path)) + goto out_free; + + fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); + if (IS_ERR(fs_path)) + goto out_free; + + if (strncmp(path, fs_path, strlen(fs_path)) != 0) { + dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path); + goto out_free; + } + + devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); + if (IS_ERR(devname)) { + mnt = (struct vfsmount *)devname; + goto out_free; + } + + loc = 0; + while (loc < locations->nlocations && IS_ERR(mnt)) { + struct nfs4_fs_location *location = &locations->locations[loc]; + char *mnt_path; + + if (location == NULL || location->nservers <= 0 || + location->rootpath.ncomponents == 0) { + loc++; + continue; + } + + mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); + if (IS_ERR(mnt_path)) { + loc++; + continue; + } + mountdata.mnt_path = mnt_path; + + s = 0; + while (s < location->nservers) { + struct sockaddr_in addr = {}; + + if (location->servers[s].len <= 0 || + valid_ipaddr4(location->servers[s].data) < 0) { + s++; + continue; + } + + mountdata.hostname = location->servers[s].data; + addr.sin_addr.s_addr = in_aton(mountdata.hostname); + addr.sin_family = AF_INET; + addr.sin_port = htons(NFS_PORT); + mountdata.addr = &addr; + + mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata); + if (!IS_ERR(mnt)) { + break; + } + s++; + } + loc++; + } + +out_free: + free_page((unsigned long)page); + free_page((unsigned long)page2); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} + +/* + * nfs_do_refmount - handle crossing a referral on server + * @dentry - dentry of referral + * @nd - nameidata info + * + */ +struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) +{ + struct vfsmount *mnt = ERR_PTR(-ENOENT); + struct dentry *parent; + struct nfs4_fs_locations *fs_locations = NULL; + struct page *page; + int err; + + /* BUG_ON(IS_ROOT(dentry)); */ + dprintk("%s: enter\n", __FUNCTION__); + + page = alloc_page(GFP_KERNEL); + if (page == NULL) + goto out; + + fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (fs_locations == NULL) + goto out_free; + + /* Get locations */ + parent = dget_parent(dentry); + dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name); + err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page); + dput(parent); + if (err != 0 || fs_locations->nlocations <= 0 || + fs_locations->fs_path.ncomponents <= 0) + goto out_free; + + mnt = nfs_follow_referral(mnt_parent, dentry, fs_locations); +out_free: + __free_page(page); + kfree(fs_locations); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d86c0db7b1e8..b4916b092194 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -65,8 +65,6 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *) static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp); -extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); -extern struct rpc_procinfo nfs4_procedures[]; /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) @@ -121,6 +119,25 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE 0 }; +const u32 nfs4_fs_locations_bitmap[2] = { + FATTR4_WORD0_TYPE + | FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEID + | FATTR4_WORD0_FS_LOCATIONS, + FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY + | FATTR4_WORD1_MOUNTED_ON_FILEID +}; + static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, struct nfs4_readdir_arg *readdir) { @@ -185,15 +202,15 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp spin_unlock(&clp->cl_lock); } -static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinfo) +static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) { - struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_inode *nfsi = NFS_I(dir); - spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + spin_lock(&dir->i_lock); + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; if (cinfo->before == nfsi->change_attr && cinfo->atomic) nfsi->change_attr = cinfo->after; - spin_unlock(&inode->i_lock); + spin_unlock(&dir->i_lock); } struct nfs4_opendata { @@ -1331,7 +1348,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f return status; } -static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { struct nfs4_exception exception = { }; int err; @@ -1443,6 +1460,50 @@ out: return nfs4_map_errors(status); } +/* + * Get locations and (maybe) other attributes of a referral. + * Note that we'll actually follow the referral later when + * we detect fsid mismatch in inode revalidation + */ +static int nfs4_get_referral(struct inode *dir, struct qstr *name, struct nfs_fattr *fattr, struct nfs_fh *fhandle) +{ + int status = -ENOMEM; + struct page *page = NULL; + struct nfs4_fs_locations *locations = NULL; + struct dentry dentry = {}; + + page = alloc_page(GFP_KERNEL); + if (page == NULL) + goto out; + locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (locations == NULL) + goto out; + + dentry.d_name.name = name->name; + dentry.d_name.len = name->len; + status = nfs4_proc_fs_locations(dir, &dentry, locations, page); + if (status != 0) + goto out; + /* Make sure server returned a different fsid for the referral */ + if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { + dprintk("%s: server did not return a different fsid for a referral at %s\n", __FUNCTION__, name->name); + status = -EIO; + goto out; + } + + memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr)); + fattr->valid |= NFS_ATTR_FATTR_V4_REFERRAL; + if (!fattr->mode) + fattr->mode = S_IFDIR; + memset(fhandle, 0, sizeof(struct nfs_fh)); +out: + if (page) + __free_page(page); + if (locations) + kfree(locations); + return status; +} + static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs4_getattr_arg args = { @@ -1547,6 +1608,8 @@ static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, dprintk("NFS call lookup %s\n", name->name); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + if (status == -NFS4ERR_MOVED) + status = nfs4_get_referral(dir, name, fattr, fhandle); dprintk("NFS reply lookup: %d\n", status); return status; } @@ -2008,7 +2071,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * if (!status) { update_changeattr(dir, &res.cinfo); nfs_post_op_update_inode(dir, res.dir_attr); - nfs_refresh_inode(inode, res.fattr); + nfs_post_op_update_inode(inode, res.fattr); } return status; @@ -3570,6 +3633,36 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) return len; } +int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, + struct nfs4_fs_locations *fs_locations, struct page *page) +{ + struct nfs_server *server = NFS_SERVER(dir); + u32 bitmask[2] = { + [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, + [1] = FATTR4_WORD1_MOUNTED_ON_FILEID, + }; + struct nfs4_fs_locations_arg args = { + .dir_fh = NFS_FH(dir), + .name = &dentry->d_name, + .page = page, + .bitmask = bitmask, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], + .rpc_argp = &args, + .rpc_resp = fs_locations, + }; + int status; + + dprintk("%s: start\n", __FUNCTION__); + fs_locations->fattr.valid = 0; + fs_locations->server = server; + fs_locations->nlocations = 0; + status = rpc_call_sync(server->client, &msg, 0); + dprintk("%s: returned status = %d\n", __FUNCTION__, status); + return status; +} + struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7c5d70efe720..1750d996f49f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -411,6 +411,15 @@ static int nfs_stat_to_errno(int); #define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) +#define NFS4_enc_fs_locations_sz \ + (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_fs_locations_sz \ + (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz) static struct { unsigned int mode; @@ -722,6 +731,13 @@ static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) bitmask[1] & nfs4_fsinfo_bitmap[1]); } +static int encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask) +{ + return encode_getattr_two(xdr, + bitmask[0] & nfs4_fs_locations_bitmap[0], + bitmask[1] & nfs4_fs_locations_bitmap[1]); +} + static int encode_getfh(struct xdr_stream *xdr) { uint32_t *p; @@ -2003,6 +2019,38 @@ out: } /* + * Encode FS_LOCATIONS request + */ +static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + struct rpc_auth *auth = req->rq_task->tk_auth; + int replen; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) + goto out; + if ((status = encode_lookup(&xdr, args->name)) != 0) + goto out; + if ((status = encode_fs_locations(&xdr, args->bitmask)) != 0) + goto out; + /* set up reply + * toplevel_status + OP_PUTFH + status + * + OP_LOOKUP + status + OP_GETATTR + status = 7 + */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page, + 0, PAGE_SIZE); +out: + return status; +} + +/* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" * set of XDR encode/decode routines which are intended to be shared by @@ -2036,7 +2084,7 @@ out: } \ } while (0) -static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string) +static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) { uint32_t *p; @@ -2087,7 +2135,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) { uint32_t *p; - uint32_t strlen; + unsigned int strlen; char *str; READ_BUF(12); @@ -2217,7 +2265,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, return 0; } -static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fsid *fsid) +static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) { uint32_t *p; @@ -2285,6 +2333,22 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t return 0; } +static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) +{ + uint32_t *p; + + *fileid = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) { + READ_BUF(8); + READ64(*fileid); + bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + } + dprintk("%s: fileid=%Lu\n", __FUNCTION__, (unsigned long long)*fileid); + return 0; +} + static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) { uint32_t *p; @@ -2336,6 +2400,116 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin return status; } +static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) +{ + int n; + uint32_t *p; + int status = 0; + + READ_BUF(4); + READ32(n); + if (n < 0) + goto out_eio; + if (n == 0) + goto root_path; + dprintk("path "); + path->ncomponents = 0; + while (path->ncomponents < n) { + struct nfs4_string *component = &path->components[path->ncomponents]; + status = decode_opaque_inline(xdr, &component->len, &component->data); + if (unlikely(status != 0)) + goto out_eio; + if (path->ncomponents != n) + dprintk("/"); + dprintk("%s", component->data); + if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) + path->ncomponents++; + else { + dprintk("cannot parse %d components in path\n", n); + goto out_eio; + } + } +out: + dprintk("\n"); + return status; +root_path: +/* a root pathname is sent as a zero component4 */ + path->ncomponents = 1; + path->components[0].len=0; + path->components[0].data=NULL; + dprintk("path /\n"); + goto out; +out_eio: + dprintk(" status %d", status); + status = -EIO; + goto out; +} + +static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res) +{ + int n; + uint32_t *p; + int status = -EIO; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_FS_LOCATIONS -1U))) + goto out; + status = 0; + if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) + goto out; + dprintk("%s: fsroot ", __FUNCTION__); + status = decode_pathname(xdr, &res->fs_path); + if (unlikely(status != 0)) + goto out; + READ_BUF(4); + READ32(n); + if (n <= 0) + goto out_eio; + res->nlocations = 0; + while (res->nlocations < n) { + int m; + struct nfs4_fs_location *loc = &res->locations[res->nlocations]; + + READ_BUF(4); + READ32(m); + if (m <= 0) + goto out_eio; + + loc->nservers = 0; + dprintk("%s: servers ", __FUNCTION__); + while (loc->nservers < m) { + struct nfs4_string *server = &loc->servers[loc->nservers]; + status = decode_opaque_inline(xdr, &server->len, &server->data); + if (unlikely(status != 0)) + goto out_eio; + dprintk("%s ", server->data); + if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS) + loc->nservers++; + else { + int i; + dprintk("%s: using first %d of %d servers returned for location %d\n", __FUNCTION__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations); + for (i = loc->nservers; i < m; i++) { + int len; + char *data; + status = decode_opaque_inline(xdr, &len, &data); + if (unlikely(status != 0)) + goto out_eio; + } + } + } + status = decode_pathname(xdr, &loc->rootpath); + if (unlikely(status != 0)) + goto out_eio; + if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES) + res->nlocations++; + } +out: + dprintk("%s: fs_locations done, error = %d\n", __FUNCTION__, status); + return status; +out_eio: + status = -EIO; + goto out; +} + static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) { uint32_t *p; @@ -2841,6 +3015,7 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons bitmap[2] = {0}, type; int status, fmode = 0; + uint64_t fileid; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto xdr_error; @@ -2863,10 +3038,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons goto xdr_error; if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0) goto xdr_error; - if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid_u.nfs4)) != 0) + if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid)) != 0) goto xdr_error; if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0) goto xdr_error; + if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, + struct nfs4_fs_locations, + fattr))) != 0) + goto xdr_error; if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0) goto xdr_error; fattr->mode |= fmode; @@ -2886,6 +3065,10 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons goto xdr_error; if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0) goto xdr_error; + if ((status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid)) != 0) + goto xdr_error; + if (fattr->fileid == 0 && fileid != 0) + fattr->fileid = fileid; if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; xdr_error: @@ -3350,8 +3533,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, attrlen, recvd); return -EINVAL; } - if (attrlen <= *acl_len) - xdr_read_pages(xdr, attrlen); + xdr_read_pages(xdr, attrlen); *acl_len = attrlen; } else status = -EOPNOTSUPP; @@ -4211,6 +4393,29 @@ out: return status; } +/* + * FS_LOCATIONS request + */ +static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_lookup(&xdr)) != 0) + goto out; + xdr_enter_page(&xdr, PAGE_SIZE); + status = decode_getfattr(&xdr, &res->fattr, res->server); +out: + return status; +} + uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) { uint32_t bitmap[2] = {0}; @@ -4382,6 +4587,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), PROC(GETACL, enc_getacl, dec_getacl), PROC(SETACL, enc_setacl, dec_setacl), + PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), }; struct rpc_version nfs_version4 = { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 106aca388ebc..ef9429643ebc 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -325,6 +325,7 @@ out: /** * nfs_scan_list - Scan a list for matching requests + * @nfsi: NFS inode * @head: One of the NFS inode request lists * @dst: Destination list * @idx_start: lower bound of page->index to scan @@ -336,14 +337,15 @@ out: * The requests are *not* checked to ensure that they form a contiguous set. * You must be holding the inode's req_lock when calling this function */ -int -nfs_scan_list(struct list_head *head, struct list_head *dst, - unsigned long idx_start, unsigned int npages) +int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, + struct list_head *dst, unsigned long idx_start, + unsigned int npages) { - struct list_head *pos, *tmp; - struct nfs_page *req; - unsigned long idx_end; - int res; + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; + struct nfs_page *req; + unsigned long idx_end; + int found, i; + int res; res = 0; if (npages == 0) @@ -351,25 +353,32 @@ nfs_scan_list(struct list_head *head, struct list_head *dst, else idx_end = idx_start + npages - 1; - list_for_each_safe(pos, tmp, head) { - - req = nfs_list_entry(pos); - - if (req->wb_index < idx_start) - continue; - if (req->wb_index > idx_end) + for (;;) { + found = radix_tree_gang_lookup(&nfsi->nfs_page_tree, + (void **)&pgvec[0], idx_start, + NFS_SCAN_MAXENTRIES); + if (found <= 0) break; + for (i = 0; i < found; i++) { + req = pgvec[i]; + if (req->wb_index > idx_end) + goto out; + idx_start = req->wb_index + 1; + if (req->wb_list_head != head) + continue; + if (nfs_set_page_writeback_locked(req)) { + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + res++; + } + } - if (!nfs_set_page_writeback_locked(req)) - continue; - nfs_list_remove_request(req); - nfs_list_add_request(req, dst); - res++; } +out: return res; } -int nfs_init_nfspagecache(void) +int __init nfs_init_nfspagecache(void) { nfs_page_cachep = kmem_cache_create("nfs_page", sizeof(struct nfs_page), @@ -381,7 +390,7 @@ int nfs_init_nfspagecache(void) return 0; } -void nfs_destroy_nfspagecache(void) +void __exit nfs_destroy_nfspagecache(void) { if (kmem_cache_destroy(nfs_page_cachep)) printk(KERN_INFO "nfs_page: not all structures were freed\n"); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 9dd85cac2df0..b3899ea3229e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -44,11 +44,10 @@ #include <linux/nfs_page.h> #include <linux/lockd/bind.h> #include <linux/smp_lock.h> +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_PROC -extern struct rpc_procinfo nfs_procedures[]; - /* * Bare-bones access to getattr: this is for nfs_read_super. */ @@ -611,8 +610,6 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return 0; } -extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); - static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { if (task->tk_status >= 0) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 624ca7146b6b..41c2ffee24f5 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -51,14 +51,11 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); - if (pagecount < NFS_PAGEVEC_SIZE) - p->pagevec = &p->page_array[0]; + if (pagecount <= ARRAY_SIZE(p->page_array)) + p->pagevec = p->page_array; else { - size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kmalloc(size, GFP_NOFS); - if (p->pagevec) { - memset(p->pagevec, 0, size); - } else { + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); + if (!p->pagevec) { mempool_free(p, nfs_rdata_mempool); p = NULL; } @@ -104,6 +101,28 @@ int nfs_return_empty_page(struct page *page) return 0; } +static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) +{ + unsigned int remainder = data->args.count - data->res.count; + unsigned int base = data->args.pgbase + data->res.count; + unsigned int pglen; + struct page **pages; + + if (data->res.eof == 0 || remainder == 0) + return; + /* + * Note: "remainder" can never be negative, since we check for + * this in the XDR code. + */ + pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; + base &= ~PAGE_CACHE_MASK; + pglen = PAGE_CACHE_SIZE - base; + if (pglen < remainder) + memclear_highpage_flush(*pages, base, pglen); + else + memclear_highpage_flush(*pages, base, remainder); +} + /* * Read a page synchronously. */ @@ -177,11 +196,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; spin_unlock(&inode->i_lock); - if (count) - memclear_highpage_flush(page, rdata->args.pgbase, count); - SetPageUptodate(page); - if (PageError(page)) - ClearPageError(page); + nfs_readpage_truncate_uninitialised_page(rdata); + if (rdata->res.eof || rdata->res.count == rdata->args.count) + SetPageUptodate(page); result = 0; io_error: @@ -436,20 +453,12 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) struct nfs_page *req = data->req; struct page *page = req->wb_page; + if (likely(task->tk_status >= 0)) + nfs_readpage_truncate_uninitialised_page(data); + else + SetPageError(page); if (nfs_readpage_result(task, data) != 0) return; - if (task->tk_status >= 0) { - unsigned int request = data->args.count; - unsigned int result = data->res.count; - - if (result < request) { - memclear_highpage_flush(page, - data->args.pgbase + result, - request - result); - } - } else - SetPageError(page); - if (atomic_dec_and_test(&req->wb_complete)) { if (!PageError(page)) SetPageUptodate(page); @@ -462,6 +471,40 @@ static const struct rpc_call_ops nfs_read_partial_ops = { .rpc_release = nfs_readdata_release, }; +static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) +{ + unsigned int count = data->res.count; + unsigned int base = data->args.pgbase; + struct page **pages; + + if (unlikely(count == 0)) + return; + pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; + base &= ~PAGE_CACHE_MASK; + count += base; + for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) + SetPageUptodate(*pages); + /* + * Was this an eof or a short read? If the latter, don't mark the page + * as uptodate yet. + */ + if (count > 0 && (data->res.eof || data->args.count == data->res.count)) + SetPageUptodate(*pages); +} + +static void nfs_readpage_set_pages_error(struct nfs_read_data *data) +{ + unsigned int count = data->args.count; + unsigned int base = data->args.pgbase; + struct page **pages; + + pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; + base &= ~PAGE_CACHE_MASK; + count += base; + for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) + SetPageError(*pages); +} + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). @@ -469,27 +512,24 @@ static const struct rpc_call_ops nfs_read_partial_ops = { static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - unsigned int count = data->res.count; + /* + * Note: nfs_readpage_result may change the values of + * data->args. In the multi-page case, we therefore need + * to ensure that we call the next nfs_readpage_set_page_uptodate() + * first in the multi-page case. + */ + if (likely(task->tk_status >= 0)) { + nfs_readpage_truncate_uninitialised_page(data); + nfs_readpage_set_pages_uptodate(data); + } else + nfs_readpage_set_pages_error(data); if (nfs_readpage_result(task, data) != 0) return; while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); - struct page *page = req->wb_page; - nfs_list_remove_request(req); - if (task->tk_status >= 0) { - if (count < PAGE_CACHE_SIZE) { - if (count < req->wb_bytes) - memclear_highpage_flush(page, - req->wb_pgbase + count, - req->wb_bytes - count); - count = 0; - } else - count -= PAGE_CACHE_SIZE; - SetPageUptodate(page); - } else - SetPageError(page); + nfs_list_remove_request(req); nfs_readpage_release(req); } } @@ -654,7 +694,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, return ret; } -int nfs_init_readpagecache(void) +int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", sizeof(struct nfs_read_data), @@ -671,7 +711,7 @@ int nfs_init_readpagecache(void) return 0; } -void nfs_destroy_readpagecache(void) +void __exit nfs_destroy_readpagecache(void) { mempool_destroy(nfs_rdata_mempool); if (kmem_cache_destroy(nfs_rdata_cachep)) diff --git a/fs/nfs/super.c b/fs/nfs/super.c new file mode 100644 index 000000000000..e8a9bee74d9d --- /dev/null +++ b/fs/nfs/super.c @@ -0,0 +1,1537 @@ +/* + * linux/fs/nfs/super.c + * + * Copyright (C) 1992 Rick Sladkey + * + * nfs superblock handling functions + * + * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some + * experimental NFS changes. Modularisation taken straight from SYS5 fs. + * + * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. + * J.S.Peatfield@damtp.cam.ac.uk + * + * Split from inode.c by David Howells <dhowells@redhat.com> + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/stats.h> +#include <linux/sunrpc/metrics.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs4_mount.h> +#include <linux/lockd/bind.h> +#include <linux/smp_lock.h> +#include <linux/seq_file.h> +#include <linux/mount.h> +#include <linux/nfs_idmap.h> +#include <linux/vfs.h> +#include <linux/inet.h> +#include <linux/nfs_xdr.h> + +#include <asm/system.h> +#include <asm/uaccess.h> + +#include "nfs4_fs.h" +#include "callback.h" +#include "delegation.h" +#include "iostat.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +/* Maximum number of readahead requests + * FIXME: this should really be a sysctl so that users may tune it to suit + * their needs. People that do NFS over a slow network, might for + * instance want to reduce it to something closer to 1 for improved + * interactive response. + */ +#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) + +/* + * RPC cruft for NFS + */ +static struct rpc_version * nfs_version[] = { + NULL, + NULL, + &nfs_version2, +#if defined(CONFIG_NFS_V3) + &nfs_version3, +#elif defined(CONFIG_NFS_V4) + NULL, +#endif +#if defined(CONFIG_NFS_V4) + &nfs_version4, +#endif +}; + +static struct rpc_program nfs_program = { + .name = "nfs", + .number = NFS_PROGRAM, + .nrvers = ARRAY_SIZE(nfs_version), + .version = nfs_version, + .stats = &nfs_rpcstat, + .pipe_dir_name = "/nfs", +}; + +struct rpc_stat nfs_rpcstat = { + .program = &nfs_program +}; + + +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = ARRAY_SIZE(nfsacl_version), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + +static void nfs_umount_begin(struct vfsmount *, int); +static int nfs_statfs(struct dentry *, struct kstatfs *); +static int nfs_show_options(struct seq_file *, struct vfsmount *); +static int nfs_show_stats(struct seq_file *, struct vfsmount *); +static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); +static int nfs_clone_nfs_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static void nfs_kill_super(struct super_block *); + +static struct file_system_type nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = nfs_get_sb, + .kill_sb = nfs_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type clone_nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = nfs_clone_nfs_sb, + .kill_sb = nfs_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static struct super_operations nfs_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .statfs = nfs_statfs, + .clear_inode = nfs_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, + .show_stats = nfs_show_stats, +}; + +#ifdef CONFIG_NFS_V4 +static int nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static void nfs4_kill_super(struct super_block *sb); + +static struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type clone_nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs_clone_nfs4_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type nfs_referral_nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs_referral_nfs4_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .statfs = nfs_statfs, + .clear_inode = nfs4_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, + .show_stats = nfs_show_stats, +}; +#endif + +#ifdef CONFIG_NFS_V4 +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; + +static int param_set_port(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) + return -EINVAL; + *((int *)kp->arg) = num; + return 0; +} + +module_param_call(callback_tcpport, param_set_port, param_get_int, + &nfs_callback_set_tcpport, 0644); +#endif + +#ifdef CONFIG_NFS_V4 +static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + int jif = num * HZ; + if (endp == val || *endp || num < 0 || jif < num) + return -EINVAL; + *((int *)kp->arg) = jif; + return 0; +} + +module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, + &nfs_idmap_cache_timeout, 0644); +#endif + +/* + * Register the NFS filesystems + */ +int __init register_nfs_fs(void) +{ + int ret; + + ret = register_filesystem(&nfs_fs_type); + if (ret < 0) + goto error_0; + +#ifdef CONFIG_NFS_V4 + ret = nfs_register_sysctl(); + if (ret < 0) + goto error_1; + ret = register_filesystem(&nfs4_fs_type); + if (ret < 0) + goto error_2; +#endif + return 0; + +#ifdef CONFIG_NFS_V4 +error_2: + nfs_unregister_sysctl(); +error_1: + unregister_filesystem(&nfs_fs_type); +#endif +error_0: + return ret; +} + +/* + * Unregister the NFS filesystems + */ +void __exit unregister_nfs_fs(void) +{ +#ifdef CONFIG_NFS_V4 + unregister_filesystem(&nfs4_fs_type); + nfs_unregister_sysctl(); +#endif + unregister_filesystem(&nfs_fs_type); +} + +/* + * Deliver file system statistics to userspace + */ +static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct nfs_server *server = NFS_SB(sb); + unsigned char blockbits; + unsigned long blockres; + struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode); + struct nfs_fattr fattr; + struct nfs_fsstat res = { + .fattr = &fattr, + }; + int error; + + lock_kernel(); + + error = server->rpc_ops->statfs(server, rootfh, &res); + buf->f_type = NFS_SUPER_MAGIC; + if (error < 0) + goto out_err; + + /* + * Current versions of glibc do not correctly handle the + * case where f_frsize != f_bsize. Eventually we want to + * report the value of wtmult in this field. + */ + buf->f_frsize = sb->s_blocksize; + + /* + * On most *nix systems, f_blocks, f_bfree, and f_bavail + * are reported in units of f_frsize. Linux hasn't had + * an f_frsize field in its statfs struct until recently, + * thus historically Linux's sys_statfs reports these + * fields in units of f_bsize. + */ + buf->f_bsize = sb->s_blocksize; + blockbits = sb->s_blocksize_bits; + blockres = (1 << blockbits) - 1; + buf->f_blocks = (res.tbytes + blockres) >> blockbits; + buf->f_bfree = (res.fbytes + blockres) >> blockbits; + buf->f_bavail = (res.abytes + blockres) >> blockbits; + + buf->f_files = res.tfiles; + buf->f_ffree = res.afiles; + + buf->f_namelen = server->namelen; + out: + unlock_kernel(); + return 0; + + out_err: + dprintk("%s: statfs error = %d\n", __FUNCTION__, -error); + buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; + goto out; + +} + +static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) +{ + static struct { + rpc_authflavor_t flavour; + const char *str; + } sec_flavours[] = { + { RPC_AUTH_NULL, "null" }, + { RPC_AUTH_UNIX, "sys" }, + { RPC_AUTH_GSS_KRB5, "krb5" }, + { RPC_AUTH_GSS_KRB5I, "krb5i" }, + { RPC_AUTH_GSS_KRB5P, "krb5p" }, + { RPC_AUTH_GSS_LKEY, "lkey" }, + { RPC_AUTH_GSS_LKEYI, "lkeyi" }, + { RPC_AUTH_GSS_LKEYP, "lkeyp" }, + { RPC_AUTH_GSS_SPKM, "spkm" }, + { RPC_AUTH_GSS_SPKMI, "spkmi" }, + { RPC_AUTH_GSS_SPKMP, "spkmp" }, + { -1, "unknown" } + }; + int i; + + for (i=0; sec_flavours[i].flavour != -1; i++) { + if (sec_flavours[i].flavour == flavour) + break; + } + return sec_flavours[i].str; +} + +/* + * Describe the mount options in force on this server representation + */ +static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) +{ + static struct proc_nfs_info { + int flag; + char *str; + char *nostr; + } nfs_info[] = { + { NFS_MOUNT_SOFT, ",soft", ",hard" }, + { NFS_MOUNT_INTR, ",intr", "" }, + { NFS_MOUNT_NOCTO, ",nocto", "" }, + { NFS_MOUNT_NOAC, ",noac", "" }, + { NFS_MOUNT_NONLM, ",nolock", "" }, + { NFS_MOUNT_NOACL, ",noacl", "" }, + { 0, NULL, NULL } + }; + struct proc_nfs_info *nfs_infop; + char buf[12]; + char *proto; + + seq_printf(m, ",vers=%d", nfss->rpc_ops->version); + seq_printf(m, ",rsize=%d", nfss->rsize); + seq_printf(m, ",wsize=%d", nfss->wsize); + if (nfss->acregmin != 3*HZ || showdefaults) + seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); + if (nfss->acregmax != 60*HZ || showdefaults) + seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); + if (nfss->acdirmin != 30*HZ || showdefaults) + seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); + if (nfss->acdirmax != 60*HZ || showdefaults) + seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); + for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { + if (nfss->flags & nfs_infop->flag) + seq_puts(m, nfs_infop->str); + else + seq_puts(m, nfs_infop->nostr); + } + switch (nfss->client->cl_xprt->prot) { + case IPPROTO_TCP: + proto = "tcp"; + break; + case IPPROTO_UDP: + proto = "udp"; + break; + default: + snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot); + proto = buf; + } + seq_printf(m, ",proto=%s", proto); + seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); + seq_printf(m, ",retrans=%u", nfss->retrans_count); + seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); +} + +/* + * Describe the mount options on this VFS mountpoint + */ +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + + nfs_show_mount_options(m, nfss, 0); + + seq_puts(m, ",addr="); + seq_escape(m, nfss->hostname, " \t\n\\"); + + return 0; +} + +/* + * Present statistical information for this VFS mountpoint + */ +static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) +{ + int i, cpu; + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + struct rpc_auth *auth = nfss->client->cl_auth; + struct nfs_iostats totals = { }; + + seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); + + /* + * Display all mount option settings + */ + seq_printf(m, "\n\topts:\t"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); + nfs_show_mount_options(m, nfss, 1); + + seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); + + seq_printf(m, "\n\tcaps:\t"); + seq_printf(m, "caps=0x%x", nfss->caps); + seq_printf(m, ",wtmult=%d", nfss->wtmult); + seq_printf(m, ",dtsize=%d", nfss->dtsize); + seq_printf(m, ",bsize=%d", nfss->bsize); + seq_printf(m, ",namelen=%d", nfss->namelen); + +#ifdef CONFIG_NFS_V4 + if (nfss->rpc_ops->version == 4) { + seq_printf(m, "\n\tnfsv4:\t"); + seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); + seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); + seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); + } +#endif + + /* + * Display security flavor in effect for this mount + */ + seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); + if (auth->au_flavor) + seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); + + /* + * Display superblock I/O counters + */ + for_each_possible_cpu(cpu) { + struct nfs_iostats *stats; + + preempt_disable(); + stats = per_cpu_ptr(nfss->io_stats, cpu); + + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + totals.events[i] += stats->events[i]; + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + totals.bytes[i] += stats->bytes[i]; + + preempt_enable(); + } + + seq_printf(m, "\n\tevents:\t"); + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + seq_printf(m, "%lu ", totals.events[i]); + seq_printf(m, "\n\tbytes:\t"); + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + seq_printf(m, "%Lu ", totals.bytes[i]); + seq_printf(m, "\n"); + + rpc_print_iostats(m, nfss->client); + + return 0; +} + +/* + * Begin unmount by attempting to remove all automounted mountpoints we added + * in response to traversals + */ +static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) +{ + struct nfs_server *server; + struct rpc_clnt *rpc; + + shrink_submounts(vfsmnt, &nfs_automount_list); + if (!(flags & MNT_FORCE)) + return; + /* -EIO all pending I/O */ + server = NFS_SB(vfsmnt->mnt_sb); + rpc = server->client; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); + rpc = server->client_acl; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); +} + +/* + * Obtain the root inode of the file system. + */ +static struct inode * +nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) +{ + struct nfs_server *server = NFS_SB(sb); + int error; + + error = server->rpc_ops->getroot(server, rootfh, fsinfo); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + return ERR_PTR(error); + } + + server->fsid = fsinfo->fattr->fsid; + return nfs_fhget(sb, rootfh, fsinfo->fattr); +} + +/* + * Do NFS version-independent mount processing, and sanity checking + */ +static int +nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) +{ + struct nfs_server *server; + struct inode *root_inode; + struct nfs_fattr fattr; + struct nfs_fsinfo fsinfo = { + .fattr = &fattr, + }; + struct nfs_pathconf pathinfo = { + .fattr = &fattr, + }; + int no_root_error = 0; + unsigned long max_rpc_payload; + + /* We probably want something more informative here */ + snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); + + server = NFS_SB(sb); + + sb->s_magic = NFS_SUPER_MAGIC; + + server->io_stats = nfs_alloc_iostats(); + if (server->io_stats == NULL) + return -ENOMEM; + + root_inode = nfs_get_root(sb, &server->fh, &fsinfo); + /* Did getting the root inode fail? */ + if (IS_ERR(root_inode)) { + no_root_error = PTR_ERR(root_inode); + goto out_no_root; + } + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) { + no_root_error = -ENOMEM; + goto out_no_root; + } + sb->s_root->d_op = server->rpc_ops->dentry_ops; + + /* mount time stamp, in seconds */ + server->mount_time = jiffies; + + /* Get some general file system info */ + if (server->namelen == 0 && + server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) + server->namelen = pathinfo.max_namelen; + /* Work out a lot of parameters */ + if (server->rsize == 0) + server->rsize = nfs_block_size(fsinfo.rtpref, NULL); + if (server->wsize == 0) + server->wsize = nfs_block_size(fsinfo.wtpref, NULL); + + if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax) + server->rsize = nfs_block_size(fsinfo.rtmax, NULL); + if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax) + server->wsize = nfs_block_size(fsinfo.wtmax, NULL); + + max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); + if (server->rsize > max_rpc_payload) + server->rsize = max_rpc_payload; + if (server->rsize > NFS_MAX_FILE_IO_SIZE) + server->rsize = NFS_MAX_FILE_IO_SIZE; + server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + if (server->wsize > NFS_MAX_FILE_IO_SIZE) + server->wsize = NFS_MAX_FILE_IO_SIZE; + server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + if (sb->s_blocksize == 0) + sb->s_blocksize = nfs_block_bits(server->wsize, + &sb->s_blocksize_bits); + server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); + + server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); + if (server->dtsize > PAGE_CACHE_SIZE) + server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > server->rsize) + server->dtsize = server->rsize; + + if (server->flags & NFS_MOUNT_NOAC) { + server->acregmin = server->acregmax = 0; + server->acdirmin = server->acdirmax = 0; + sb->s_flags |= MS_SYNCHRONOUS; + } + server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; + + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + + server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; + server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; + + /* We're airborne Set socket buffersize */ + rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); + return 0; + /* Yargs. It didn't work out. */ +out_no_root: + dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error); + if (!IS_ERR(root_inode)) + iput(root_inode); + return no_root_error; +} + +/* + * Initialise the timeout values for a connection + */ +static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) +{ + to->to_initval = timeo * HZ / 10; + to->to_retries = retrans; + if (!to->to_retries) + to->to_retries = 2; + + switch (proto) { + case IPPROTO_TCP: + if (!to->to_initval) + to->to_initval = 60 * HZ; + if (to->to_initval > NFS_MAX_TCP_TIMEOUT) + to->to_initval = NFS_MAX_TCP_TIMEOUT; + to->to_increment = to->to_initval; + to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); + to->to_exponential = 0; + break; + case IPPROTO_UDP: + default: + if (!to->to_initval) + to->to_initval = 11 * HZ / 10; + if (to->to_initval > NFS_MAX_UDP_TIMEOUT) + to->to_initval = NFS_MAX_UDP_TIMEOUT; + to->to_maxval = NFS_MAX_UDP_TIMEOUT; + to->to_exponential = 1; + break; + } +} + +/* + * Create an RPC client handle. + */ +static struct rpc_clnt * +nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) +{ + struct rpc_timeout timeparms; + struct rpc_xprt *xprt = NULL; + struct rpc_clnt *clnt = NULL; + int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + + nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); + + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + + /* create transport and client */ + xprt = xprt_create_proto(proto, &server->addr, &timeparms); + if (IS_ERR(xprt)) { + dprintk("%s: cannot create RPC transport. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); + return (struct rpc_clnt *)xprt; + } + clnt = rpc_create_client(xprt, server->hostname, &nfs_program, + server->rpc_ops->version, data->pseudoflavor); + if (IS_ERR(clnt)) { + dprintk("%s: cannot create RPC client. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); + goto out_fail; + } + + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; + + return clnt; + +out_fail: + return clnt; +} + +/* + * Clone a server record + */ +static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data) +{ + struct nfs_server *server = NFS_SB(sb); + struct nfs_server *parent = NFS_SB(data->sb); + struct inode *root_inode; + struct nfs_fsinfo fsinfo; + void *err = ERR_PTR(-ENOMEM); + + sb->s_op = data->sb->s_op; + sb->s_blocksize = data->sb->s_blocksize; + sb->s_blocksize_bits = data->sb->s_blocksize_bits; + sb->s_maxbytes = data->sb->s_maxbytes; + + server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + server->io_stats = nfs_alloc_iostats(); + if (server->io_stats == NULL) + goto out; + + server->client = rpc_clone_client(parent->client); + if (IS_ERR((err = server->client))) + goto out; + + if (!IS_ERR(parent->client_sys)) { + server->client_sys = rpc_clone_client(parent->client_sys); + if (IS_ERR((err = server->client_sys))) + goto out; + } + if (!IS_ERR(parent->client_acl)) { + server->client_acl = rpc_clone_client(parent->client_acl); + if (IS_ERR((err = server->client_acl))) + goto out; + } + root_inode = nfs_fhget(sb, data->fh, data->fattr); + if (!root_inode) + goto out; + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_put_root; + fsinfo.fattr = data->fattr; + if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + sb->s_root->d_op = server->rpc_ops->dentry_ops; + sb->s_flags |= MS_ACTIVE; + return server; +out_put_root: + iput(root_inode); +out: + return err; +} + +/* + * Copy an existing superblock and attach revised data + */ +static int nfs_clone_generic_sb(struct nfs_clone_mount *data, + struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *), + struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *), + struct vfsmount *mnt) +{ + struct nfs_server *server; + struct nfs_server *parent = NFS_SB(data->sb); + struct super_block *sb = ERR_PTR(-EINVAL); + char *hostname; + int error = -ENOMEM; + int len; + + server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (server == NULL) + goto out_err; + memcpy(server, parent, sizeof(*server)); + hostname = (data->hostname != NULL) ? data->hostname : parent->hostname; + len = strlen(hostname) + 1; + server->hostname = kmalloc(len, GFP_KERNEL); + if (server->hostname == NULL) + goto free_server; + memcpy(server->hostname, hostname, len); + error = rpciod_up(); + if (error != 0) + goto free_hostname; + + sb = fill_sb(server, data); + if (IS_ERR(sb)) { + error = PTR_ERR(sb); + goto kill_rpciod; + } + + if (sb->s_root) + goto out_rpciod_down; + + server = fill_server(sb, data); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_deactivate; + } + return simple_set_mnt(mnt, sb); +out_deactivate: + up_write(&sb->s_umount); + deactivate_super(sb); + return error; +out_rpciod_down: + rpciod_down(); + kfree(server->hostname); + kfree(server); + return simple_set_mnt(mnt, sb); +kill_rpciod: + rpciod_down(); +free_hostname: + kfree(server->hostname); +free_server: + kfree(server); +out_err: + return error; +} + +/* + * Set up an NFS2/3 superblock + * + * The way this works is that the mount process passes a structure + * in the data argument which contains the server's IP address + * and the root file handle obtained from the server's mount + * daemon. We stash these away in the private superblock fields. + */ +static int +nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) +{ + struct nfs_server *server; + rpc_authflavor_t authflavor; + + server = NFS_SB(sb); + sb->s_blocksize_bits = 0; + sb->s_blocksize = 0; + if (data->bsize) + sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + + server->acregmin = data->acregmin*HZ; + server->acregmax = data->acregmax*HZ; + server->acdirmin = data->acdirmin*HZ; + server->acdirmax = data->acdirmax*HZ; + + /* Start lockd here, before we might error out */ + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_up(); + + server->namelen = data->namlen; + server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL); + if (!server->hostname) + return -ENOMEM; + strcpy(server->hostname, data->hostname); + + /* Check NFS protocol revision and initialize RPC op vector + * and file handle pool. */ +#ifdef CONFIG_NFS_V3 + if (server->flags & NFS_MOUNT_VER3) { + server->rpc_ops = &nfs_v3_clientops; + server->caps |= NFS_CAP_READDIRPLUS; + } else { + server->rpc_ops = &nfs_v2_clientops; + } +#else + server->rpc_ops = &nfs_v2_clientops; +#endif + + /* Fill in pseudoflavor for mount version < 5 */ + if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) + data->pseudoflavor = RPC_AUTH_UNIX; + authflavor = data->pseudoflavor; /* save for sb_init() */ + /* XXX maybe we want to add a server->pseudoflavor field */ + + /* Create RPC client handles */ + server->client = nfs_create_client(server, data); + if (IS_ERR(server->client)) + return PTR_ERR(server->client); + /* RFC 2623, sec 2.3.2 */ + if (authflavor != RPC_AUTH_UNIX) { + struct rpc_auth *auth; + + server->client_sys = rpc_clone_client(server->client); + if (IS_ERR(server->client_sys)) + return PTR_ERR(server->client_sys); + auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); + if (IS_ERR(auth)) + return PTR_ERR(auth); + } else { + atomic_inc(&server->client->cl_count); + server->client_sys = server->client; + } + if (server->flags & NFS_MOUNT_VER3) { +#ifdef CONFIG_NFS_V3_ACL + if (!(server->flags & NFS_MOUNT_NOACL)) { + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + /* No errors! Assume that Sun nfsacls are supported */ + if (!IS_ERR(server->client_acl)) + server->caps |= NFS_CAP_ACLS; + } +#else + server->flags &= ~NFS_MOUNT_NOACL; +#endif /* CONFIG_NFS_V3_ACL */ + /* + * The VFS shouldn't apply the umask to mode bits. We will + * do so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) + server->namelen = NFS3_MAXNAMLEN; + sb->s_time_gran = 1; + } else { + if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) + server->namelen = NFS2_MAXNAMLEN; + } + + sb->s_op = &nfs_sops; + return nfs_sb_init(sb, authflavor); +} + +static int nfs_set_super(struct super_block *s, void *data) +{ + s->s_fs_info = data; + return set_anon_super(s, data); +} + +static int nfs_compare_super(struct super_block *sb, void *data) +{ + struct nfs_server *server = data; + struct nfs_server *old = NFS_SB(sb); + + if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr) + return 0; + if (old->addr.sin_port != server->addr.sin_port) + return 0; + return !nfs_compare_fh(&old->fh, &server->fh); +} + +static int nfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + int error; + struct nfs_server *server = NULL; + struct super_block *s; + struct nfs_fh *root; + struct nfs_mount_data *data = raw_data; + + error = -EINVAL; + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + goto out_err_noserver; + } + if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + goto out_err_noserver; + } + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: mount structure version %d does not support NFSv3\n", + __FUNCTION__, + data->version); + goto out_err_noserver; + } + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) { + dprintk("%s: mount structure version %d does not support strong security\n", + __FUNCTION__, + data->version); + goto out_err_noserver; + } + case 5: + memset(data->context, 0, sizeof(data->context)); + } +#ifndef CONFIG_NFS_V3 + /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ + error = -EPROTONOSUPPORT; + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); + goto out_err_noserver; + } +#endif /* CONFIG_NFS_V3 */ + + error = -ENOMEM; + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (!server) + goto out_err_noserver; + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + + root = &server->fh; + if (data->flags & NFS_MOUNT_VER3) + root->size = data->root.size; + else + root->size = NFS2_FHSIZE; + error = -EINVAL; + if (root->size > sizeof(root->data)) { + dprintk("%s: invalid root filehandle\n", __FUNCTION__); + goto out_err; + } + memcpy(root->data, data->root.data, root->size); + + /* We now require that the mount process passes the remote address */ + memcpy(&server->addr, &data->addr, sizeof(server->addr)); + if (server->addr.sin_addr.s_addr == INADDR_ANY) { + dprintk("%s: mount program didn't pass remote address!\n", + __FUNCTION__); + goto out_err; + } + + /* Fire up rpciod if not yet running */ + error = rpciod_up(); + if (error < 0) { + dprintk("%s: couldn't start rpciod! Error = %d\n", + __FUNCTION__, error); + goto out_err; + } + + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_rpciod; + } + + if (s->s_root) + goto out_rpciod_down; + + s->s_flags = flags; + + error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); + if (error) { + up_write(&s->s_umount); + deactivate_super(s); + return error; + } + s->s_flags |= MS_ACTIVE; + return simple_set_mnt(mnt, s); + +out_rpciod_down: + rpciod_down(); + kfree(server); + return simple_set_mnt(mnt, s); + +out_err_rpciod: + rpciod_down(); +out_err: + kfree(server); +out_err_noserver: + return error; +} + +static void nfs_kill_super(struct super_block *s) +{ + struct nfs_server *server = NFS_SB(s); + + kill_anon_super(s); + + if (!IS_ERR(server->client)) + rpc_shutdown_client(server->client); + if (!IS_ERR(server->client_sys)) + rpc_shutdown_client(server->client_sys); + if (!IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); + + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_down(); /* release rpc.lockd */ + + rpciod_down(); /* release rpciod */ + + nfs_free_iostats(server->io_stats); + kfree(server->hostname); + kfree(server); + nfs_release_automount_timer(); +} + +static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) +{ + struct super_block *sb; + + server->fsid = data->fattr->fsid; + nfs_copy_fh(&server->fh, data->fh); + sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM)) + lockd_up(); + return sb; +} + +static int nfs_clone_nfs_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + struct nfs_clone_mount *data = raw_data; + return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt); +} + +#ifdef CONFIG_NFS_V4 +static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, + struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor) +{ + struct nfs4_client *clp; + struct rpc_xprt *xprt = NULL; + struct rpc_clnt *clnt = NULL; + int err = -EIO; + + clp = nfs4_get_client(&server->addr.sin_addr); + if (!clp) { + dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); + return ERR_PTR(err); + } + + /* Now create transport and client */ + down_write(&clp->cl_sem); + if (IS_ERR(clp->cl_rpcclient)) { + xprt = xprt_create_proto(proto, &server->addr, timeparms); + if (IS_ERR(xprt)) { + up_write(&clp->cl_sem); + err = PTR_ERR(xprt); + dprintk("%s: cannot create RPC transport. Error = %d\n", + __FUNCTION__, err); + goto out_fail; + } + /* Bind to a reserved port! */ + xprt->resvport = 1; + clnt = rpc_create_client(xprt, server->hostname, &nfs_program, + server->rpc_ops->version, flavor); + if (IS_ERR(clnt)) { + up_write(&clp->cl_sem); + err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + goto out_fail; + } + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; + clp->cl_rpcclient = clnt; + memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); + nfs_idmap_new(clp); + } + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + clnt = rpc_clone_client(clp->cl_rpcclient); + if (!IS_ERR(clnt)) + server->nfs4_state = clp; + up_write(&clp->cl_sem); + clp = NULL; + + if (IS_ERR(clnt)) { + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + return clnt; + } + + if (server->nfs4_state->cl_idmap == NULL) { + dprintk("%s: failed to create idmapper.\n", __FUNCTION__); + return ERR_PTR(-ENOMEM); + } + + if (clnt->cl_auth->au_flavor != flavor) { + struct rpc_auth *auth; + + auth = rpcauth_create(flavor, clnt); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return (struct rpc_clnt *)auth; + } + } + return clnt; + + out_fail: + if (clp) + nfs4_put_client(clp); + return ERR_PTR(err); +} + +/* + * Set up an NFS4 superblock + */ +static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) +{ + struct nfs_server *server; + struct rpc_timeout timeparms; + rpc_authflavor_t authflavour; + int err = -EIO; + + sb->s_blocksize_bits = 0; + sb->s_blocksize = 0; + server = NFS_SB(sb); + if (data->rsize != 0) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize != 0) + server->wsize = nfs_block_size(data->wsize, NULL); + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + server->caps = NFS_CAP_ATOMIC_OPEN; + + server->acregmin = data->acregmin*HZ; + server->acregmax = data->acregmax*HZ; + server->acdirmin = data->acdirmin*HZ; + server->acdirmax = data->acdirmax*HZ; + + server->rpc_ops = &nfs_v4_clientops; + + nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); + + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + + /* Now create transport and client */ + authflavour = RPC_AUTH_UNIX; + if (data->auth_flavourlen != 0) { + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + err = -EINVAL; + goto out_fail; + } + if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { + err = -EFAULT; + goto out_fail; + } + } + + server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour); + if (IS_ERR(server->client)) { + err = PTR_ERR(server->client); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + goto out_fail; + } + + sb->s_time_gran = 1; + + sb->s_op = &nfs4_sops; + err = nfs_sb_init(sb, authflavour); + + out_fail: + return err; +} + +static int nfs4_compare_super(struct super_block *sb, void *data) +{ + struct nfs_server *server = data; + struct nfs_server *old = NFS_SB(sb); + + if (strcmp(server->hostname, old->hostname) != 0) + return 0; + if (strcmp(server->mnt_path, old->mnt_path) != 0) + return 0; + return 1; +} + +static void * +nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) +{ + void *p = NULL; + + if (!src->len) + return ERR_PTR(-EINVAL); + if (src->len < maxlen) + maxlen = src->len; + if (dst == NULL) { + p = dst = kmalloc(maxlen + 1, GFP_KERNEL); + if (p == NULL) + return ERR_PTR(-ENOMEM); + } + if (copy_from_user(dst, src->data, maxlen)) { + kfree(p); + return ERR_PTR(-EFAULT); + } + dst[maxlen] = '\0'; + return dst; +} + +static int nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + int error; + struct nfs_server *server; + struct super_block *s; + struct nfs4_mount_data *data = raw_data; + void *p; + + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + return -EINVAL; + } + if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + return -EINVAL; + } + + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (!server) + return -ENOMEM; + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + + p = nfs_copy_user_string(NULL, &data->hostname, 256); + if (IS_ERR(p)) + goto out_err; + server->hostname = p; + + p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); + if (IS_ERR(p)) + goto out_err; + server->mnt_path = p; + + p = nfs_copy_user_string(server->ip_addr, &data->client_addr, + sizeof(server->ip_addr) - 1); + if (IS_ERR(p)) + goto out_err; + + /* We now require that the mount process passes the remote address */ + if (data->host_addrlen != sizeof(server->addr)) { + error = -EINVAL; + goto out_free; + } + if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { + error = -EFAULT; + goto out_free; + } + if (server->addr.sin_family != AF_INET || + server->addr.sin_addr.s_addr == INADDR_ANY) { + dprintk("%s: mount program didn't pass remote IP address!\n", + __FUNCTION__); + error = -EINVAL; + goto out_free; + } + + /* Fire up rpciod if not yet running */ + error = rpciod_up(); + if (error < 0) { + dprintk("%s: couldn't start rpciod! Error = %d\n", + __FUNCTION__, error); + goto out_free; + } + + s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); + + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_free; + } + + if (s->s_root) { + kfree(server->mnt_path); + kfree(server->hostname); + kfree(server); + return simple_set_mnt(mnt, s); + } + + s->s_flags = flags; + + error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0); + if (error) { + up_write(&s->s_umount); + deactivate_super(s); + return error; + } + s->s_flags |= MS_ACTIVE; + return simple_set_mnt(mnt, s); +out_err: + error = PTR_ERR(p); +out_free: + kfree(server->mnt_path); + kfree(server->hostname); + kfree(server); + return error; +} + +static void nfs4_kill_super(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + + nfs_return_all_delegations(sb); + kill_anon_super(sb); + + nfs4_renewd_prepare_shutdown(server); + + if (server->client != NULL && !IS_ERR(server->client)) + rpc_shutdown_client(server->client); + + destroy_nfsv4_state(server); + + rpciod_down(); + + nfs_free_iostats(server->io_stats); + kfree(server->hostname); + kfree(server); + nfs_release_automount_timer(); +} + +/* + * Constructs the SERVER-side path + */ +static inline char *nfs4_dup_path(const struct dentry *dentry) +{ + char *page = (char *) __get_free_page(GFP_USER); + char *path; + + path = nfs4_path(dentry, page, PAGE_SIZE); + if (!IS_ERR(path)) { + int len = PAGE_SIZE + page - path; + char *tmp = path; + + path = kmalloc(len, GFP_KERNEL); + if (path) + memcpy(path, tmp, len); + else + path = ERR_PTR(-ENOMEM); + } + free_page((unsigned long)page); + return path; +} + +static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) +{ + const struct dentry *dentry = data->dentry; + struct nfs4_client *clp = server->nfs4_state; + struct super_block *sb; + + server->fsid = data->fattr->fsid; + nfs_copy_fh(&server->fh, data->fh); + server->mnt_path = nfs4_dup_path(dentry); + if (IS_ERR(server->mnt_path)) { + sb = (struct super_block *)server->mnt_path; + goto err; + } + sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); + if (IS_ERR(sb) || sb->s_root) + goto free_path; + nfs4_server_capabilities(server, &server->fh); + + down_write(&clp->cl_sem); + atomic_inc(&clp->cl_count); + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + up_write(&clp->cl_sem); + return sb; +free_path: + kfree(server->mnt_path); +err: + server->mnt_path = NULL; + return sb; +} + +static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + struct nfs_clone_mount *data = raw_data; + return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt); +} + +static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data) +{ + struct super_block *sb = ERR_PTR(-ENOMEM); + int len; + + len = strlen(data->mnt_path) + 1; + server->mnt_path = kmalloc(len, GFP_KERNEL); + if (server->mnt_path == NULL) + goto err; + memcpy(server->mnt_path, data->mnt_path, len); + memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in)); + + sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); + if (IS_ERR(sb) || sb->s_root) + goto free_path; + return sb; +free_path: + kfree(server->mnt_path); +err: + server->mnt_path = NULL; + return sb; +} + +static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data) +{ + struct nfs_server *server = NFS_SB(sb); + struct rpc_timeout timeparms; + int proto, timeo, retrans; + void *err; + + proto = IPPROTO_TCP; + /* Since we are following a referral and there may be alternatives, + set the timeouts and retries to low values */ + timeo = 2; + retrans = 1; + nfs_init_timeout_values(&timeparms, proto, timeo, retrans); + + server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor); + if (IS_ERR((err = server->client))) + goto out_err; + + sb->s_time_gran = 1; + sb->s_op = &nfs4_sops; + err = ERR_PTR(nfs_sb_init(sb, data->authflavor)); + if (!IS_ERR(err)) + return server; +out_err: + return (struct nfs_server *)err; +} + +static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + struct nfs_clone_mount *data = raw_data; + return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt); +} + +#endif diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 18dc95b0b646..600bbe630abd 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -52,7 +52,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct page *page; - void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode)); + void *err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); if (err) goto read_failed; page = read_cache_page(&inode->i_data, 0, @@ -75,22 +75,13 @@ read_failed: return NULL; } -static void nfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) -{ - if (cookie) { - struct page *page = cookie; - kunmap(page); - page_cache_release(page); - } -} - /* * symlinks can't do much... */ struct inode_operations nfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = nfs_follow_link, - .put_link = nfs_put_link, + .put_link = page_put_link, .getattr = nfs_getattr, .setattr = nfs_setattr, }; diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 4c486eb867ca..db61e51bb154 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/nfs4.h> #include <linux/nfs_idmap.h> +#include <linux/nfs_fs.h> #include "callback.h" @@ -46,6 +47,15 @@ static ctl_table nfs_cb_sysctls[] = { .strategy = &sysctl_jiffies, }, #endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs_mountpoint_timeout", + .data = &nfs_mountpoint_expiry_timeout, + .maxlen = sizeof(nfs_mountpoint_expiry_timeout), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, { .ctl_name = 0 } }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4cfada2cc09f..b383fdd3a15c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -98,11 +98,10 @@ struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); - if (pagecount < NFS_PAGEVEC_SIZE) - p->pagevec = &p->page_array[0]; + if (pagecount <= ARRAY_SIZE(p->page_array)) + p->pagevec = p->page_array; else { - size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kzalloc(size, GFP_NOFS); + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); if (!p->pagevec) { mempool_free(p, nfs_commit_mempool); p = NULL; @@ -126,14 +125,11 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); - if (pagecount < NFS_PAGEVEC_SIZE) - p->pagevec = &p->page_array[0]; + if (pagecount <= ARRAY_SIZE(p->page_array)) + p->pagevec = p->page_array; else { - size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kmalloc(size, GFP_NOFS); - if (p->pagevec) { - memset(p->pagevec, 0, size); - } else { + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); + if (!p->pagevec) { mempool_free(p, nfs_wdata_mempool); p = NULL; } @@ -583,6 +579,17 @@ static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, un return ret; } +static void nfs_cancel_requests(struct list_head *head) +{ + struct nfs_page *req; + while(!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_inode_remove_request(req); + nfs_clear_page_writeback(req); + } +} + /* * nfs_scan_dirty - Scan an inode for dirty requests * @inode: NFS inode to scan @@ -627,7 +634,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st int res = 0; if (nfsi->ncommit != 0) { - res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); + res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages); nfsi->ncommit -= res; if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); @@ -1495,15 +1502,25 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, pages = nfs_scan_dirty(inode, &head, idx_start, npages); if (pages != 0) { spin_unlock(&nfsi->req_lock); - ret = nfs_flush_list(inode, &head, pages, how); + if (how & FLUSH_INVALIDATE) + nfs_cancel_requests(&head); + else + ret = nfs_flush_list(inode, &head, pages, how); spin_lock(&nfsi->req_lock); continue; } if (nocommit) break; - pages = nfs_scan_commit(inode, &head, 0, 0); + pages = nfs_scan_commit(inode, &head, idx_start, npages); if (pages == 0) break; + if (how & FLUSH_INVALIDATE) { + spin_unlock(&nfsi->req_lock); + nfs_cancel_requests(&head); + spin_lock(&nfsi->req_lock); + continue; + } + pages += nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&nfsi->req_lock); ret = nfs_commit_list(inode, &head, how); spin_lock(&nfsi->req_lock); @@ -1512,7 +1529,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, return ret; } -int nfs_init_writepagecache(void) +int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", sizeof(struct nfs_write_data), @@ -1534,7 +1551,7 @@ int nfs_init_writepagecache(void) return 0; } -void nfs_destroy_writepagecache(void) +void __exit nfs_destroy_writepagecache(void) { mempool_destroy(nfs_commit_mempool); mempool_destroy(nfs_wdata_mempool); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index de3998f15f10..5446a0861d1d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1310,7 +1310,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { - status = vfs_statfs(dentry->d_inode->i_sb, &statfs); + status = vfs_statfs(dentry, &statfs); if (status) goto out_nfserr; } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3ef017b3b5bd..a1810e6a93e5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -494,10 +494,10 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) return simple_fill_super(sb, 0x6e667364, nfsd_files); } -static struct super_block *nfsd_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int nfsd_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, nfsd_fill_super); + return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt); } static struct file_system_type nfsd_fs_type = { diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 1d65f13f458c..245eaa1fb59b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1737,7 +1737,7 @@ int nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) { int err = fh_verify(rqstp, fhp, 0, MAY_NOP); - if (!err && vfs_statfs(fhp->fh_dentry->d_inode->i_sb,stat)) + if (!err && vfs_statfs(fhp->fh_dentry,stat)) err = nfserr_io; return err; } diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h index 3b74e66ca2ff..325ce261a107 100644 --- a/fs/ntfs/aops.h +++ b/fs/ntfs/aops.h @@ -86,8 +86,7 @@ static inline void ntfs_unmap_page(struct page *page) static inline struct page *ntfs_map_page(struct address_space *mapping, unsigned long index) { - struct page *page = read_cache_page(mapping, index, - (filler_t*)mapping->a_ops->readpage, NULL); + struct page *page = read_mapping_page(mapping, index, NULL); if (!IS_ERR(page)) { wait_on_page_locked(page); diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 1663f5c3c6aa..6708e1d68a9e 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -2529,8 +2529,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) end >>= PAGE_CACHE_SHIFT; /* If there is a first partial page, need to do it the slow way. */ if (start_ofs) { - page = read_cache_page(mapping, idx, - (filler_t*)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, idx, NULL); if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read first partial " "page (sync error, index 0x%lx).", idx); @@ -2600,8 +2599,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) } /* If there is a last partial page, need to do it the slow way. */ if (end_ofs) { - page = read_cache_page(mapping, idx, - (filler_t*)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, idx, NULL); if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read last partial page " "(sync error, index 0x%lx).", idx); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index c63a83e8da98..2e42c2dcae12 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -231,8 +231,7 @@ do_non_resident_extend: * Read the page. If the page is not present, this will zero * the uninitialized regions for us. */ - page = read_cache_page(mapping, index, - (filler_t*)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, index, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto init_err_out; @@ -1359,7 +1358,7 @@ err_out: goto out; } -static size_t __ntfs_copy_from_user_iovec(char *vaddr, +static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr, const struct iovec *iov, size_t iov_ofs, size_t bytes) { size_t total = 0; @@ -1377,10 +1376,6 @@ static size_t __ntfs_copy_from_user_iovec(char *vaddr, bytes -= len; vaddr += len; if (unlikely(left)) { - /* - * Zero the rest of the target like __copy_from_user(). - */ - memset(vaddr, 0, bytes); total -= left; break; } @@ -1421,11 +1416,13 @@ static inline void ntfs_set_next_iovec(const struct iovec **iovp, * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s * single-segment behaviour. * - * We call the same helper (__ntfs_copy_from_user_iovec()) both when atomic and - * when not atomic. This is ok because __ntfs_copy_from_user_iovec() calls - * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In - * fact, the only difference between __copy_from_user_inatomic() and - * __copy_from_user() is that the latter calls might_sleep(). And on many + * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both + * when atomic and when not atomic. This is ok because + * __ntfs_copy_from_user_iovec_inatomic() calls __copy_from_user_inatomic() + * and it is ok to call this when non-atomic. + * Infact, the only difference between __copy_from_user_inatomic() and + * __copy_from_user() is that the latter calls might_sleep() and the former + * should not zero the tail of the buffer on error. And on many * architectures __copy_from_user_inatomic() is just defined to * __copy_from_user() so it makes no difference at all on those architectures. */ @@ -1442,14 +1439,18 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages, if (len > bytes) len = bytes; kaddr = kmap_atomic(*pages, KM_USER0); - copied = __ntfs_copy_from_user_iovec(kaddr + ofs, + copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs, *iov, *iov_ofs, len); kunmap_atomic(kaddr, KM_USER0); if (unlikely(copied != len)) { /* Do it the slow way. */ kaddr = kmap(*pages); - copied = __ntfs_copy_from_user_iovec(kaddr + ofs, + copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs, *iov, *iov_ofs, len); + /* + * Zero the rest of the target like __copy_from_user(). + */ + memset(kaddr + ofs + copied, 0, len - copied); kunmap(*pages); if (unlikely(copied != len)) goto err_out; @@ -1484,14 +1485,15 @@ static inline void ntfs_flush_dcache_pages(struct page **pages, unsigned nr_pages) { BUG_ON(!nr_pages); + /* + * Warning: Do not do the decrement at the same time as the call to + * flush_dcache_page() because it is a NULL macro on i386 and hence the + * decrement never happens so the loop never terminates. + */ do { - /* - * Warning: Do not do the decrement at the same time as the - * call because flush_dcache_page() is a NULL macro on i386 - * and hence the decrement never happens. - */ + --nr_pages; flush_dcache_page(pages[nr_pages]); - } while (--nr_pages > 0); + } while (nr_pages > 0); } /** diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 27833f6df49f..0e14acea3f8b 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -2601,10 +2601,10 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol, /** * ntfs_statfs - return information about mounted NTFS volume - * @sb: super block of mounted volume + * @dentry: dentry from mounted volume * @sfs: statfs structure in which to return the information * - * Return information about the mounted NTFS volume @sb in the statfs structure + * Return information about the mounted NTFS volume @dentry in the statfs structure * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is * called). We interpret the values to be correct of the moment in time at * which we are called. Most values are variable otherwise and this isn't just @@ -2617,8 +2617,9 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol, * * Return 0 on success or -errno on error. */ -static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs) +static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs) { + struct super_block *sb = dentry->d_sb; s64 size; ntfs_volume *vol = NTFS_SB(sb); ntfs_inode *mft_ni = NTFS_I(vol->mft_ino); @@ -3093,10 +3094,11 @@ struct kmem_cache *ntfs_index_ctx_cache; /* Driver wide mutex. */ DEFINE_MUTEX(ntfs_lock); -static struct super_block *ntfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int ntfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super, + mnt); } static struct file_system_type ntfs_fs_type = { diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 7e88e24b3471..7273d9fa6bab 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -574,10 +574,10 @@ static struct inode_operations dlmfs_file_inode_operations = { .getattr = simple_getattr, }; -static struct super_block *dlmfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int dlmfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super); + return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); } static struct file_system_type dlmfs_fs_type = { diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 949b3dac30f1..cdf73393f094 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -100,7 +100,7 @@ static int ocfs2_initialize_mem_caches(void); static void ocfs2_free_mem_caches(void); static void ocfs2_delete_osb(struct ocfs2_super *osb); -static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf); +static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf); static int ocfs2_sync_fs(struct super_block *sb, int wait); @@ -672,12 +672,14 @@ read_super_error: return status; } -static struct super_block *ocfs2_get_sb(struct file_system_type *fs_type, - int flags, - const char *dev_name, - void *data) +static int ocfs2_get_sb(struct file_system_type *fs_type, + int flags, + const char *dev_name, + void *data, + struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super, + mnt); } static struct file_system_type ocfs2_fs_type = { @@ -855,7 +857,7 @@ static void ocfs2_put_super(struct super_block *sb) mlog_exit_void(); } -static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf) +static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) { struct ocfs2_super *osb; u32 numbits, freebits; @@ -864,9 +866,9 @@ static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf) struct buffer_head *bh = NULL; struct inode *inode = NULL; - mlog_entry("(%p, %p)\n", sb, buf); + mlog_entry("(%p, %p)\n", dentry->d_sb, buf); - osb = OCFS2_SB(sb); + osb = OCFS2_SB(dentry->d_sb); inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, @@ -889,7 +891,7 @@ static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf) freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used); buf->f_type = OCFS2_SUPER_MAGIC; - buf->f_bsize = sb->s_blocksize; + buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_namelen = OCFS2_MAX_FILENAME_LEN; buf->f_blocks = ((sector_t) numbits) * (osb->s_clustersize >> osb->sb->s_blocksize_bits); diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index f6986bd79e75..0c8a1294ec96 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -64,8 +64,7 @@ static char *ocfs2_page_getlink(struct dentry * dentry, { struct page * page; struct address_space *mapping = dentry->d_inode->i_mapping; - page = read_cache_page(mapping, 0, - (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) goto sync_fail; wait_on_page_locked(page); diff --git a/fs/open.c b/fs/open.c index 4f178acd4c09..303f06d2a7b9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -31,18 +31,18 @@ #include <asm/unistd.h> -int vfs_statfs(struct super_block *sb, struct kstatfs *buf) +int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) { int retval = -ENODEV; - if (sb) { + if (dentry) { retval = -ENOSYS; - if (sb->s_op->statfs) { + if (dentry->d_sb->s_op->statfs) { memset(buf, 0, sizeof(*buf)); - retval = security_sb_statfs(sb); + retval = security_sb_statfs(dentry); if (retval) return retval; - retval = sb->s_op->statfs(sb, buf); + retval = dentry->d_sb->s_op->statfs(dentry, buf); if (retval == 0 && buf->f_frsize == 0) buf->f_frsize = buf->f_bsize; } @@ -52,12 +52,12 @@ int vfs_statfs(struct super_block *sb, struct kstatfs *buf) EXPORT_SYMBOL(vfs_statfs); -static int vfs_statfs_native(struct super_block *sb, struct statfs *buf) +static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) { struct kstatfs st; int retval; - retval = vfs_statfs(sb, &st); + retval = vfs_statfs(dentry, &st); if (retval) return retval; @@ -95,12 +95,12 @@ static int vfs_statfs_native(struct super_block *sb, struct statfs *buf) return 0; } -static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf) +static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) { struct kstatfs st; int retval; - retval = vfs_statfs(sb, &st); + retval = vfs_statfs(dentry, &st); if (retval) return retval; @@ -130,7 +130,7 @@ asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf) error = user_path_walk(path, &nd); if (!error) { struct statfs tmp; - error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp); + error = vfs_statfs_native(nd.dentry, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; path_release(&nd); @@ -149,7 +149,7 @@ asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 error = user_path_walk(path, &nd); if (!error) { struct statfs64 tmp; - error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp); + error = vfs_statfs64(nd.dentry, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; path_release(&nd); @@ -168,7 +168,7 @@ asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf) file = fget(fd); if (!file) goto out; - error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp); + error = vfs_statfs_native(file->f_dentry, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; fput(file); @@ -189,7 +189,7 @@ asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user file = fget(fd); if (!file) goto out; - error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp); + error = vfs_statfs64(file->f_dentry, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; fput(file); @@ -322,7 +322,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) error = locks_verify_truncate(inode, file, length); if (!error) - error = do_truncate(dentry, length, 0, file); + error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); out_putf: fput(file); out: @@ -1152,7 +1152,7 @@ int filp_close(struct file *filp, fl_owner_t id) } if (filp->f_op && filp->f_op->flush) - retval = filp->f_op->flush(filp); + retval = filp->f_op->flush(filp, id); dnotify_flush(filp, id); locks_remove_posix(filp, id); diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 0f14276a2e51..efc7c91128af 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -64,6 +64,11 @@ static int openpromfs_readdir(struct file *, void *, filldir_t); static struct dentry *openpromfs_lookup(struct inode *, struct dentry *dentry, struct nameidata *nd); static int openpromfs_unlink (struct inode *, struct dentry *dentry); +static inline u16 ptr_nod(void *p) +{ + return (long)p & 0xFFFF; +} + static ssize_t nodenum_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -72,7 +77,7 @@ static ssize_t nodenum_read(struct file *file, char __user *buf, if (count < 0 || !inode->u.generic_ip) return -EINVAL; - sprintf (buffer, "%8.8x\n", (u32)(long)(inode->u.generic_ip)); + sprintf (buffer, "%8.8lx\n", (long)inode->u.generic_ip); if (file->f_pos >= 9) return 0; if (count > 9 - file->f_pos) @@ -95,9 +100,9 @@ static ssize_t property_read(struct file *filp, char __user *buf, char buffer[64]; if (!filp->private_data) { - node = nodes[(u16)((long)inode->u.generic_ip)].node; + node = nodes[ptr_nod(inode->u.generic_ip)].node; i = ((u32)(long)inode->u.generic_ip) >> 16; - if ((u16)((long)inode->u.generic_ip) == aliases) { + if (ptr_nod(inode->u.generic_ip) == aliases) { if (i >= aliases_nodes) p = NULL; else @@ -111,7 +116,7 @@ static ssize_t property_read(struct file *filp, char __user *buf, return -EIO; i = prom_getproplen (node, p); if (i < 0) { - if ((u16)((long)inode->u.generic_ip) == aliases) + if (ptr_nod(inode->u.generic_ip) == aliases) i = 0; else return -EIO; @@ -123,7 +128,7 @@ static ssize_t property_read(struct file *filp, char __user *buf, GFP_KERNEL); if (!filp->private_data) return -ENOMEM; - op = (openprom_property *)filp->private_data; + op = filp->private_data; op->flag = 0; op->alloclen = 2 * i; strcpy (op->name, p); @@ -163,7 +168,7 @@ static ssize_t property_read(struct file *filp, char __user *buf, op->len--; } } else - op = (openprom_property *)filp->private_data; + op = filp->private_data; if (!count || !(op->len || (op->flag & OPP_ASCIIZ))) return 0; if (*ppos >= 0xffffff || count >= 0xffffff) @@ -335,7 +340,7 @@ static ssize_t property_write(struct file *filp, const char __user *buf, return i; } k = *ppos; - op = (openprom_property *)filp->private_data; + op = filp->private_data; if (!(op->flag & OPP_STRING)) { u32 *first, *last; int first_off, last_cnt; @@ -388,13 +393,13 @@ static ssize_t property_write(struct file *filp, const char __user *buf, memcpy (b, filp->private_data, sizeof (openprom_property) + strlen (op->name) + op->alloclen); - memset (((char *)b) + sizeof (openprom_property) + memset (b + sizeof (openprom_property) + strlen (op->name) + op->alloclen, 0, 2 * i - op->alloclen); - op = (openprom_property *)b; + op = b; op->alloclen = 2*i; b = filp->private_data; - filp->private_data = (void *)op; + filp->private_data = op; kfree (b); } first = ((u32 *)op->value) + (k / 9); @@ -448,10 +453,11 @@ static ssize_t property_write(struct file *filp, const char __user *buf, *q |= simple_strtoul (tmp, NULL, 16); buf += last_cnt; } else { - char tchars[17]; /* XXX yuck... */ + char tchars[2 * sizeof(long) + 1]; - if (copy_from_user(tchars, buf, 16)) + if (copy_from_user(tchars, buf, sizeof(tchars) - 1)) return -EFAULT; + tchars[sizeof(tchars) - 1] = '\0'; *q = simple_strtoul (tchars, NULL, 16); buf += 9; } @@ -497,13 +503,13 @@ write_try_string: memcpy (b, filp->private_data, sizeof (openprom_property) + strlen (op->name) + op->alloclen); - memset (((char *)b) + sizeof (openprom_property) + memset (b + sizeof (openprom_property) + strlen (op->name) + op->alloclen, 0, 2*(count - *ppos) - op->alloclen); - op = (openprom_property *)b; + op = b; op->alloclen = 2*(count + *ppos); b = filp->private_data; - filp->private_data = (void *)op; + filp->private_data = op; kfree (b); } p = op->value + *ppos - ((op->flag & OPP_QUOTED) ? 1 : 0); @@ -532,15 +538,15 @@ write_try_string: int property_release (struct inode *inode, struct file *filp) { - openprom_property *op = (openprom_property *)filp->private_data; + openprom_property *op = filp->private_data; int error; u32 node; if (!op) return 0; lock_kernel(); - node = nodes[(u16)((long)inode->u.generic_ip)].node; - if ((u16)((long)inode->u.generic_ip) == aliases) { + node = nodes[ptr_nod(inode->u.generic_ip)].node; + if (ptr_nod(inode->u.generic_ip) == aliases) { if ((op->flag & OPP_DIRTY) && (op->flag & OPP_STRING)) { char *p = op->name; int i = (op->value - op->name) - strlen (op->name) - 1; @@ -931,7 +937,7 @@ static int __init check_space (u16 n) return -1; if (nodes) { - memcpy ((char *)pages, (char *)nodes, + memcpy ((char *)pages, nodes, (1 << alloced) * PAGE_SIZE); free_pages ((unsigned long)nodes, alloced); } @@ -1054,10 +1060,10 @@ out_no_root: return -ENOMEM; } -static struct super_block *openprom_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int openprom_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, openprom_fill_super); + return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt); } static struct file_system_type openprom_fs_type = { diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 7ef1f094de91..2ef313a96b66 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -329,6 +329,7 @@ void delete_partition(struct gendisk *disk, int part) p->ios[0] = p->ios[1] = 0; p->sectors[0] = p->sectors[1] = 0; devfs_remove("%s/part%d", disk->devfs_name, part); + sysfs_remove_link(&p->kobj, "subsystem"); if (p->holder_dir) kobject_unregister(p->holder_dir); kobject_uevent(&p->kobj, KOBJ_REMOVE); @@ -363,6 +364,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len) kobject_add(&p->kobj); if (!disk->part_uevent_suppress) kobject_uevent(&p->kobj, KOBJ_ADD); + sysfs_create_link(&p->kobj, &block_subsys.kset.kobj, "subsystem"); partition_sysfs_add_subdir(p); disk->part[part-1] = p; } @@ -398,6 +400,7 @@ static void disk_sysfs_symlinks(struct gendisk *disk) kfree(disk_name); } } + sysfs_create_link(&disk->kobj, &block_subsys.kset.kobj, "subsystem"); } /* Not exported, helper to add_disk(). */ @@ -481,6 +484,10 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) sector_t from = state->parts[p].from; if (!size) continue; + if (from + size > get_capacity(disk)) { + printk(" %s: p%d exceeds device capacity\n", + disk->disk_name, p); + } add_partition(disk, p, from, size); #ifdef CONFIG_BLK_DEV_MD if (state->parts[p].flags) @@ -496,8 +503,8 @@ unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) struct address_space *mapping = bdev->bd_inode->i_mapping; struct page *page; - page = read_cache_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), - (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), + NULL); if (!IS_ERR(page)) { wait_on_page_locked(page); if (!PageUptodate(page)) @@ -548,5 +555,6 @@ void del_gendisk(struct gendisk *disk) put_device(disk->driverfs_dev); disk->driverfs_dev = NULL; } + sysfs_remove_link(&disk->kobj, "subsystem"); kobject_del(&disk->kobj); } diff --git a/fs/pipe.c b/fs/pipe.c index 5acd8954aaa0..20352573e025 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -979,12 +979,11 @@ no_files: * any operations on the root directory. However, we need a non-trivial * d_name - pipe: will go nicely and kill the special-casing in procfs. */ - -static struct super_block * -pipefs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int pipefs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); + return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); } static struct file_system_type pipe_fs_type = { diff --git a/fs/proc/root.c b/fs/proc/root.c index c3fd3611112f..9995356ce73e 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -26,10 +26,10 @@ struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc struct proc_dir_entry *proc_sys_root; #endif -static struct super_block *proc_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int proc_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, proc_fill_super); + return get_sb_single(fs_type, flags, data, proc_fill_super, mnt); } static struct file_system_type proc_fs_type = { diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 2ecd46f85e9f..2f24c46f72a1 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -128,7 +128,7 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb); static void qnx4_destroy_inode(struct inode *inode); static void qnx4_read_inode(struct inode *); static int qnx4_remount(struct super_block *sb, int *flags, char *data); -static int qnx4_statfs(struct super_block *, struct kstatfs *); +static int qnx4_statfs(struct dentry *, struct kstatfs *); static struct super_operations qnx4_sops = { @@ -282,8 +282,10 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock ) return block; } -static int qnx4_statfs(struct super_block *sb, struct kstatfs *buf) +static int qnx4_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; + lock_kernel(); buf->f_type = sb->s_magic; @@ -561,10 +563,11 @@ static void destroy_inodecache(void) "qnx4_inode_cache: not all structures were freed\n"); } -static struct super_block *qnx4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int qnx4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super, + mnt); } static struct file_system_type qnx4_fs_type = { diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 14bd2246fb6d..b9677335cc8d 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -185,16 +185,17 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent) return 0; } -struct super_block *ramfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +int ramfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, ramfs_fill_super); + return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt); } -static struct super_block *rootfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int rootfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super); + return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super, + mnt); } static struct file_system_type ramfs_fs_type = { diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index cae2abbc0c71..00f1321e9209 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -60,7 +60,7 @@ static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs) } static int reiserfs_remount(struct super_block *s, int *flags, char *data); -static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf); +static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); static int reiserfs_sync_fs(struct super_block *s, int wait) { @@ -1938,15 +1938,15 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) return errval; } -static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf) +static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); + struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(dentry->d_sb); buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize)); buf->f_bfree = sb_free_blocks(rs); buf->f_bavail = buf->f_bfree; buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1; - buf->f_bsize = s->s_blocksize; + buf->f_bsize = dentry->d_sb->s_blocksize; /* changed to accommodate gcc folks. */ buf->f_type = REISERFS_SUPER_MAGIC; return 0; @@ -2249,11 +2249,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, #endif -static struct super_block *get_super_block(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int get_super_block(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super, + mnt); } static int __init init_reiserfs_fs(void) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index ffb79c48c5bf..39fedaa88a0c 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -452,8 +452,7 @@ static struct page *reiserfs_get_page(struct inode *dir, unsigned long n) /* We can deadlock if we try to free dentries, and an unlink/rmdir has just occured - GFP_NOFS avoids this */ mapping_set_gfp_mask(mapping, GFP_NOFS); - page = read_cache_page(mapping, n, - (filler_t *) mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, n, NULL); if (!IS_ERR(page)) { wait_on_page_locked(page); kmap(page); diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 9b9eda7b335c..283fbc6b8eea 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -179,12 +179,12 @@ outnobh: /* That's simple too. */ static int -romfs_statfs(struct super_block *sb, struct kstatfs *buf) +romfs_statfs(struct dentry *dentry, struct kstatfs *buf) { buf->f_type = ROMFS_MAGIC; buf->f_bsize = ROMBSIZE; buf->f_bfree = buf->f_bavail = buf->f_ffree; - buf->f_blocks = (romfs_maxsize(sb)+ROMBSIZE-1)>>ROMBSBITS; + buf->f_blocks = (romfs_maxsize(dentry->d_sb)+ROMBSIZE-1)>>ROMBSBITS; buf->f_namelen = ROMFS_MAXFN; return 0; } @@ -607,10 +607,11 @@ static struct super_operations romfs_ops = { .remount_fs = romfs_remount, }; -static struct super_block *romfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int romfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super, + mnt); } static struct file_system_type romfs_fs_type = { diff --git a/fs/select.c b/fs/select.c index a8109baa5e46..33b72ba0f86f 100644 --- a/fs/select.c +++ b/fs/select.c @@ -546,37 +546,38 @@ struct poll_list { #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) -static void do_pollfd(unsigned int num, struct pollfd * fdpage, - poll_table ** pwait, int *count) +/* + * Fish for pollable events on the pollfd->fd file descriptor. We're only + * interested in events matching the pollfd->events mask, and the result + * matching that mask is both recorded in pollfd->revents and returned. The + * pwait poll_table will be used by the fd-provided poll handler for waiting, + * if non-NULL. + */ +static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) { - int i; - - for (i = 0; i < num; i++) { - int fd; - unsigned int mask; - struct pollfd *fdp; - - mask = 0; - fdp = fdpage+i; - fd = fdp->fd; - if (fd >= 0) { - int fput_needed; - struct file * file = fget_light(fd, &fput_needed); - mask = POLLNVAL; - if (file != NULL) { - mask = DEFAULT_POLLMASK; - if (file->f_op && file->f_op->poll) - mask = file->f_op->poll(file, *pwait); - mask &= fdp->events | POLLERR | POLLHUP; - fput_light(file, fput_needed); - } - if (mask) { - *pwait = NULL; - (*count)++; - } + unsigned int mask; + int fd; + + mask = 0; + fd = pollfd->fd; + if (fd >= 0) { + int fput_needed; + struct file * file; + + file = fget_light(fd, &fput_needed); + mask = POLLNVAL; + if (file != NULL) { + mask = DEFAULT_POLLMASK; + if (file->f_op && file->f_op->poll) + mask = file->f_op->poll(file, pwait); + /* Mask out unneeded events. */ + mask &= pollfd->events | POLLERR | POLLHUP; + fput_light(file, fput_needed); } - fdp->revents = mask; } + pollfd->revents = mask; + + return mask; } static int do_poll(unsigned int nfds, struct poll_list *list, @@ -594,11 +595,29 @@ static int do_poll(unsigned int nfds, struct poll_list *list, long __timeout; set_current_state(TASK_INTERRUPTIBLE); - walk = list; - while(walk != NULL) { - do_pollfd( walk->len, walk->entries, &pt, &count); - walk = walk->next; + for (walk = list; walk != NULL; walk = walk->next) { + struct pollfd * pfd, * pfd_end; + + pfd = walk->entries; + pfd_end = pfd + walk->len; + for (; pfd != pfd_end; pfd++) { + /* + * Fish for events. If we found one, record it + * and kill the poll_table, so we don't + * needlessly register any other waiters after + * this. They'll get immediately deregistered + * when we break out and return. + */ + if (do_pollfd(pfd, pt)) { + count++; + pt = NULL; + } + } } + /* + * All waiters have already been registered, so don't provide + * a poll_table to them on the next loop iteration. + */ pt = NULL; if (count || !*timeout || signal_pending(current)) break; @@ -727,9 +746,9 @@ out_fds: asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, long timeout_msecs) { - s64 timeout_jiffies = 0; + s64 timeout_jiffies; - if (timeout_msecs) { + if (timeout_msecs > 0) { #if HZ > 1000 /* We can only overflow if HZ > 1000 */ if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) @@ -737,6 +756,9 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, else #endif timeout_jiffies = msecs_to_jiffies(timeout_msecs); + } else { + /* Infinite (< 0) or no (0) timeout */ + timeout_jiffies = timeout_msecs; } return do_sys_poll(ufds, nfds, &timeout_jiffies); diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index fdeabc0a34f7..506ff87c1d4b 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -48,7 +48,7 @@ static void smb_delete_inode(struct inode *); static void smb_put_super(struct super_block *); -static int smb_statfs(struct super_block *, struct kstatfs *); +static int smb_statfs(struct dentry *, struct kstatfs *); static int smb_show_options(struct seq_file *, struct vfsmount *); static kmem_cache_t *smb_inode_cachep; @@ -641,13 +641,13 @@ out_no_server: } static int -smb_statfs(struct super_block *sb, struct kstatfs *buf) +smb_statfs(struct dentry *dentry, struct kstatfs *buf) { int result; lock_kernel(); - result = smb_proc_dskattr(sb, buf); + result = smb_proc_dskattr(dentry, buf); unlock_kernel(); @@ -782,10 +782,10 @@ out: return error; } -static struct super_block *smb_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int smb_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, smb_fill_super); + return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt); } static struct file_system_type smb_fs_type = { diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index b1b878b81730..c3495059889d 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -3226,9 +3226,9 @@ smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr) } int -smb_proc_dskattr(struct super_block *sb, struct kstatfs *attr) +smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr) { - struct smb_sb_info *server = SMB_SB(sb); + struct smb_sb_info *server = SMB_SB(dentry->d_sb); int result; char *p; long unit; diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h index 47664597e6b1..972ed7dad388 100644 --- a/fs/smbfs/proto.h +++ b/fs/smbfs/proto.h @@ -29,7 +29,7 @@ extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr); extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr); extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor); extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr); -extern int smb_proc_dskattr(struct super_block *sb, struct kstatfs *attr); +extern int smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr); extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len); extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath); extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry); diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c index 481a97a423fa..3f71384020cb 100644 --- a/fs/smbfs/smbiod.c +++ b/fs/smbfs/smbiod.c @@ -20,6 +20,7 @@ #include <linux/smp_lock.h> #include <linux/module.h> #include <linux/net.h> +#include <linux/kthread.h> #include <net/ip.h> #include <linux/smb_fs.h> @@ -40,7 +41,7 @@ enum smbiod_state { }; static enum smbiod_state smbiod_state = SMBIOD_DEAD; -static pid_t smbiod_pid; +static struct task_struct *smbiod_thread; static DECLARE_WAIT_QUEUE_HEAD(smbiod_wait); static LIST_HEAD(smb_servers); static DEFINE_SPINLOCK(servers_lock); @@ -67,20 +68,29 @@ void smbiod_wake_up(void) */ static int smbiod_start(void) { - pid_t pid; + struct task_struct *tsk; + int err = 0; + if (smbiod_state != SMBIOD_DEAD) return 0; smbiod_state = SMBIOD_STARTING; __module_get(THIS_MODULE); spin_unlock(&servers_lock); - pid = kernel_thread(smbiod, NULL, 0); - if (pid < 0) + tsk = kthread_run(smbiod, NULL, "smbiod"); + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); module_put(THIS_MODULE); + } spin_lock(&servers_lock); - smbiod_state = pid < 0 ? SMBIOD_DEAD : SMBIOD_RUNNING; - smbiod_pid = pid; - return pid; + if (err < 0) { + smbiod_state = SMBIOD_DEAD; + smbiod_thread = NULL; + } else { + smbiod_state = SMBIOD_RUNNING; + smbiod_thread = tsk; + } + return err; } /* @@ -290,8 +300,6 @@ out: */ static int smbiod(void *unused) { - daemonize("smbiod"); - allow_signal(SIGKILL); VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid); diff --git a/fs/splice.c b/fs/splice.c index a285fd746dc0..05fd2787be98 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -55,31 +55,43 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; - struct address_space *mapping = page_mapping(page); + struct address_space *mapping; lock_page(page); - WARN_ON(!PageUptodate(page)); + mapping = page_mapping(page); + if (mapping) { + WARN_ON(!PageUptodate(page)); - /* - * At least for ext2 with nobh option, we need to wait on writeback - * completing on this page, since we'll remove it from the pagecache. - * Otherwise truncate wont wait on the page, allowing the disk - * blocks to be reused by someone else before we actually wrote our - * data to them. fs corruption ensues. - */ - wait_on_page_writeback(page); + /* + * At least for ext2 with nobh option, we need to wait on + * writeback completing on this page, since we'll remove it + * from the pagecache. Otherwise truncate wont wait on the + * page, allowing the disk blocks to be reused by someone else + * before we actually wrote our data to them. fs corruption + * ensues. + */ + wait_on_page_writeback(page); - if (PagePrivate(page)) - try_to_release_page(page, mapping_gfp_mask(mapping)); + if (PagePrivate(page)) + try_to_release_page(page, mapping_gfp_mask(mapping)); - if (!remove_mapping(mapping, page)) { - unlock_page(page); - return 1; + /* + * If we succeeded in removing the mapping, set LRU flag + * and return good. + */ + if (remove_mapping(mapping, page)) { + buf->flags |= PIPE_BUF_FLAG_LRU; + return 0; + } } - buf->flags |= PIPE_BUF_FLAG_LRU; - return 0; + /* + * Raced with truncate or failed to remove page from current + * address space, unlock and return failure. + */ + unlock_page(page); + return 1; } static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, diff --git a/fs/super.c b/fs/super.c index a66f66bb8049..8a669f6f3f52 100644 --- a/fs/super.c +++ b/fs/super.c @@ -231,7 +231,7 @@ void generic_shutdown_super(struct super_block *sb) if (root) { sb->s_root = NULL; shrink_dcache_parent(root); - shrink_dcache_anon(&sb->s_anon); + shrink_dcache_sb(sb); dput(root); fsync_super(sb); lock_super(sb); @@ -486,7 +486,7 @@ asmlinkage long sys_ustat(unsigned dev, struct ustat __user * ubuf) s = user_get_super(new_decode_dev(dev)); if (s == NULL) goto out; - err = vfs_statfs(s, &sbuf); + err = vfs_statfs(s->s_root, &sbuf); drop_super(s); if (err) goto out; @@ -676,9 +676,10 @@ static void bdev_uevent(struct block_device *bdev, enum kobject_action action) } } -struct super_block *get_sb_bdev(struct file_system_type *fs_type, +int get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)) + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt) { struct block_device *bdev; struct super_block *s; @@ -686,7 +687,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, bdev = open_bdev_excl(dev_name, flags, fs_type); if (IS_ERR(bdev)) - return (struct super_block *)bdev; + return PTR_ERR(bdev); /* * once the super is inserted into the list by sget, s_umount @@ -697,15 +698,17 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); mutex_unlock(&bdev->bd_mount_mutex); if (IS_ERR(s)) - goto out; + goto error_s; if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { up_write(&s->s_umount); deactivate_super(s); - s = ERR_PTR(-EBUSY); + error = -EBUSY; + goto error_bdev; } - goto out; + + close_bdev_excl(bdev); } else { char b[BDEVNAME_SIZE]; @@ -716,18 +719,21 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, if (error) { up_write(&s->s_umount); deactivate_super(s); - s = ERR_PTR(error); - } else { - s->s_flags |= MS_ACTIVE; - bdev_uevent(bdev, KOBJ_MOUNT); + goto error; } + + s->s_flags |= MS_ACTIVE; + bdev_uevent(bdev, KOBJ_MOUNT); } - return s; + return simple_set_mnt(mnt, s); -out: +error_s: + error = PTR_ERR(s); +error_bdev: close_bdev_excl(bdev); - return s; +error: + return error; } EXPORT_SYMBOL(get_sb_bdev); @@ -744,15 +750,16 @@ void kill_block_super(struct super_block *sb) EXPORT_SYMBOL(kill_block_super); -struct super_block *get_sb_nodev(struct file_system_type *fs_type, +int get_sb_nodev(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)) + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt) { int error; struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); if (IS_ERR(s)) - return s; + return PTR_ERR(s); s->s_flags = flags; @@ -760,10 +767,10 @@ struct super_block *get_sb_nodev(struct file_system_type *fs_type, if (error) { up_write(&s->s_umount); deactivate_super(s); - return ERR_PTR(error); + return error; } s->s_flags |= MS_ACTIVE; - return s; + return simple_set_mnt(mnt, s); } EXPORT_SYMBOL(get_sb_nodev); @@ -773,94 +780,100 @@ static int compare_single(struct super_block *s, void *p) return 1; } -struct super_block *get_sb_single(struct file_system_type *fs_type, +int get_sb_single(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)) + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt) { struct super_block *s; int error; s = sget(fs_type, compare_single, set_anon_super, NULL); if (IS_ERR(s)) - return s; + return PTR_ERR(s); if (!s->s_root) { s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { up_write(&s->s_umount); deactivate_super(s); - return ERR_PTR(error); + return error; } s->s_flags |= MS_ACTIVE; } do_remount_sb(s, flags, data, 0); - return s; + return simple_set_mnt(mnt, s); } EXPORT_SYMBOL(get_sb_single); struct vfsmount * -do_kern_mount(const char *fstype, int flags, const char *name, void *data) +vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { - struct file_system_type *type = get_fs_type(fstype); - struct super_block *sb = ERR_PTR(-ENOMEM); struct vfsmount *mnt; - int error; char *secdata = NULL; + int error; if (!type) return ERR_PTR(-ENODEV); + error = -ENOMEM; mnt = alloc_vfsmnt(name); if (!mnt) goto out; if (data) { secdata = alloc_secdata(); - if (!secdata) { - sb = ERR_PTR(-ENOMEM); + if (!secdata) goto out_mnt; - } error = security_sb_copy_data(type, data, secdata); - if (error) { - sb = ERR_PTR(error); + if (error) goto out_free_secdata; - } } - sb = type->get_sb(type, flags, name, data); - if (IS_ERR(sb)) + error = type->get_sb(type, flags, name, data, mnt); + if (error < 0) goto out_free_secdata; - error = security_sb_kern_mount(sb, secdata); + + error = security_sb_kern_mount(mnt->mnt_sb, secdata); if (error) goto out_sb; - mnt->mnt_sb = sb; - mnt->mnt_root = dget(sb->s_root); - mnt->mnt_mountpoint = sb->s_root; + + mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; - up_write(&sb->s_umount); + up_write(&mnt->mnt_sb->s_umount); free_secdata(secdata); - put_filesystem(type); return mnt; out_sb: - up_write(&sb->s_umount); - deactivate_super(sb); - sb = ERR_PTR(error); + dput(mnt->mnt_root); + up_write(&mnt->mnt_sb->s_umount); + deactivate_super(mnt->mnt_sb); out_free_secdata: free_secdata(secdata); out_mnt: free_vfsmnt(mnt); out: - put_filesystem(type); - return (struct vfsmount *)sb; + return ERR_PTR(error); } -EXPORT_SYMBOL_GPL(do_kern_mount); +EXPORT_SYMBOL_GPL(vfs_kern_mount); + +struct vfsmount * +do_kern_mount(const char *fstype, int flags, const char *name, void *data) +{ + struct file_system_type *type = get_fs_type(fstype); + struct vfsmount *mnt; + if (!type) + return ERR_PTR(-ENODEV); + mnt = vfs_kern_mount(type, flags, name, data); + put_filesystem(type); + return mnt; +} struct vfsmount *kern_mount(struct file_system_type *type) { - return do_kern_mount(type->name, 0, type->name, NULL); + return vfs_kern_mount(type, 0, type->name, NULL); } EXPORT_SYMBOL(kern_mount); diff --git a/fs/sync.c b/fs/sync.c index aab5ffe77e9f..955aef04da28 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -100,7 +100,7 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, } if (nbytes == 0) - endbyte = -1; + endbyte = LLONG_MAX; else endbyte--; /* inclusive */ diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index f1117e885bd6..40190c489271 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -66,10 +66,10 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct super_block *sysfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int sysfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, sysfs_fill_super); + return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt); } static struct file_system_type sysfs_fs_type = { diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index d7074341ee87..f2bef962d309 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -53,8 +53,7 @@ static int dir_commit_chunk(struct page *page, unsigned from, unsigned to) static struct page * dir_get_page(struct inode *dir, unsigned long n) { struct address_space *mapping = dir->i_mapping; - struct page *page = read_cache_page(mapping, n, - (filler_t*)mapping->a_ops->readpage, NULL); + struct page *page = read_mapping_page(mapping, n, NULL); if (!IS_ERR(page)) { wait_on_page_locked(page); kmap(page); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 3ff89cc5833a..58b2d22142ba 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -85,8 +85,9 @@ static void sysv_put_super(struct super_block *sb) kfree(sbi); } -static int sysv_statfs(struct super_block *sb, struct kstatfs *buf) +static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; struct sysv_sb_info *sbi = SYSV_SB(sb); buf->f_type = sb->s_magic; diff --git a/fs/sysv/super.c b/fs/sysv/super.c index e92b991e6dda..876639b93321 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -506,16 +506,17 @@ failed: /* Every kernel module contains stuff like this. */ -static struct super_block *sysv_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int sysv_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super, + mnt); } -static struct super_block *v7_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int v7_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt); } static struct file_system_type sysv_fs_type = { diff --git a/fs/udf/super.c b/fs/udf/super.c index e45789fe38e8..44fe2cb0bbb2 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -91,13 +91,13 @@ static void udf_load_partdesc(struct super_block *, struct buffer_head *); static void udf_open_lvid(struct super_block *); static void udf_close_lvid(struct super_block *); static unsigned int udf_count_free(struct super_block *); -static int udf_statfs(struct super_block *, struct kstatfs *); +static int udf_statfs(struct dentry *, struct kstatfs *); /* UDF filesystem type */ -static struct super_block *udf_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int udf_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt); } static struct file_system_type udf_fstype = { @@ -1779,8 +1779,10 @@ udf_put_super(struct super_block *sb) * Written, tested, and released. */ static int -udf_statfs(struct super_block *sb, struct kstatfs *buf) +udf_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; + buf->f_type = UDF_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = UDF_SB_PARTLEN(sb, UDF_SB_PARTITION(sb)); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 3ada9dcf55b8..95b878e5c7a0 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -21,14 +21,6 @@ #include "swab.h" #include "util.h" -#undef UFS_BALLOC_DEBUG - -#ifdef UFS_BALLOC_DEBUG -#define UFSD(x) printk("(%s, %d), %s:", __FILE__, __LINE__, __FUNCTION__); printk x; -#else -#define UFSD(x) -#endif - static unsigned ufs_add_fragments (struct inode *, unsigned, unsigned, unsigned, int *); static unsigned ufs_alloc_fragments (struct inode *, unsigned, unsigned, unsigned, int *); static unsigned ufs_alloccg_block (struct inode *, struct ufs_cg_private_info *, unsigned, int *); @@ -39,7 +31,8 @@ static void ufs_clusteracct(struct super_block *, struct ufs_cg_private_info *, /* * Free 'count' fragments from fragment number 'fragment' */ -void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count) { +void ufs_free_fragments(struct inode *inode, unsigned fragment, unsigned count) +{ struct super_block * sb; struct ufs_sb_private_info * uspi; struct ufs_super_block_first * usb1; @@ -51,7 +44,7 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first(uspi); - UFSD(("ENTER, fragment %u, count %u\n", fragment, count)) + UFSD("ENTER, fragment %u, count %u\n", fragment, count); if (ufs_fragnum(fragment) + count > uspi->s_fpg) ufs_error (sb, "ufs_free_fragments", "internal error"); @@ -68,7 +61,7 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count ucpi = ufs_load_cylinder (sb, cgno); if (!ucpi) goto failed; - ucg = ubh_get_ucg (UCPI_UBH); + ucg = ubh_get_ucg (UCPI_UBH(ucpi)); if (!ufs_cg_chkmagic(sb, ucg)) { ufs_panic (sb, "ufs_free_fragments", "internal error, bad magic number on cg %u", cgno); goto failed; @@ -76,11 +69,11 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count end_bit = bit + count; bbase = ufs_blknum (bit); - blkmap = ubh_blkmap (UCPI_UBH, ucpi->c_freeoff, bbase); + blkmap = ubh_blkmap (UCPI_UBH(ucpi), ucpi->c_freeoff, bbase); ufs_fragacct (sb, blkmap, ucg->cg_frsum, -1); for (i = bit; i < end_bit; i++) { - if (ubh_isclr (UCPI_UBH, ucpi->c_freeoff, i)) - ubh_setbit (UCPI_UBH, ucpi->c_freeoff, i); + if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, i)) + ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, i); else ufs_error (sb, "ufs_free_fragments", "bit already cleared for fragment %u", i); @@ -90,51 +83,52 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count fs32_add(sb, &ucg->cg_cs.cs_nffree, count); - fs32_add(sb, &usb1->fs_cstotal.cs_nffree, count); + uspi->cs_total.cs_nffree += count; fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); - blkmap = ubh_blkmap (UCPI_UBH, ucpi->c_freeoff, bbase); + blkmap = ubh_blkmap (UCPI_UBH(ucpi), ucpi->c_freeoff, bbase); ufs_fragacct(sb, blkmap, ucg->cg_frsum, 1); /* * Trying to reassemble free fragments into block */ blkno = ufs_fragstoblks (bbase); - if (ubh_isblockset(UCPI_UBH, ucpi->c_freeoff, blkno)) { + if (ubh_isblockset(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno)) { fs32_sub(sb, &ucg->cg_cs.cs_nffree, uspi->s_fpb); - fs32_sub(sb, &usb1->fs_cstotal.cs_nffree, uspi->s_fpb); + uspi->cs_total.cs_nffree -= uspi->s_fpb; fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, uspi->s_fpb); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) ufs_clusteracct (sb, ucpi, blkno, 1); fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1); - fs32_add(sb, &usb1->fs_cstotal.cs_nbfree, 1); + uspi->cs_total.cs_nbfree++; fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1); cylno = ufs_cbtocylno (bbase); fs16_add(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(bbase)), 1); fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1); } - ubh_mark_buffer_dirty (USPI_UBH); - ubh_mark_buffer_dirty (UCPI_UBH); + ubh_mark_buffer_dirty (USPI_UBH(uspi)); + ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); - ubh_wait_on_buffer (UCPI_UBH); + ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); + ubh_wait_on_buffer (UCPI_UBH(ucpi)); } sb->s_dirt = 1; unlock_super (sb); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return; failed: unlock_super (sb); - UFSD(("EXIT (FAILED)\n")) + UFSD("EXIT (FAILED)\n"); return; } /* * Free 'count' fragments from fragment number 'fragment' (free whole blocks) */ -void ufs_free_blocks (struct inode * inode, unsigned fragment, unsigned count) { +void ufs_free_blocks(struct inode *inode, unsigned fragment, unsigned count) +{ struct super_block * sb; struct ufs_sb_private_info * uspi; struct ufs_super_block_first * usb1; @@ -146,7 +140,7 @@ void ufs_free_blocks (struct inode * inode, unsigned fragment, unsigned count) { uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first(uspi); - UFSD(("ENTER, fragment %u, count %u\n", fragment, count)) + UFSD("ENTER, fragment %u, count %u\n", fragment, count); if ((fragment & uspi->s_fpbmask) || (count & uspi->s_fpbmask)) { ufs_error (sb, "ufs_free_blocks", "internal error, " @@ -162,7 +156,7 @@ do_more: bit = ufs_dtogd (fragment); if (cgno >= uspi->s_ncg) { ufs_panic (sb, "ufs_free_blocks", "freeing blocks are outside device"); - goto failed; + goto failed_unlock; } end_bit = bit + count; if (end_bit > uspi->s_fpg) { @@ -173,36 +167,36 @@ do_more: ucpi = ufs_load_cylinder (sb, cgno); if (!ucpi) - goto failed; - ucg = ubh_get_ucg (UCPI_UBH); + goto failed_unlock; + ucg = ubh_get_ucg (UCPI_UBH(ucpi)); if (!ufs_cg_chkmagic(sb, ucg)) { ufs_panic (sb, "ufs_free_blocks", "internal error, bad magic number on cg %u", cgno); - goto failed; + goto failed_unlock; } for (i = bit; i < end_bit; i += uspi->s_fpb) { blkno = ufs_fragstoblks(i); - if (ubh_isblockset(UCPI_UBH, ucpi->c_freeoff, blkno)) { + if (ubh_isblockset(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno)) { ufs_error(sb, "ufs_free_blocks", "freeing free fragment"); } - ubh_setblock(UCPI_UBH, ucpi->c_freeoff, blkno); + ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) ufs_clusteracct (sb, ucpi, blkno, 1); DQUOT_FREE_BLOCK(inode, uspi->s_fpb); fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1); - fs32_add(sb, &usb1->fs_cstotal.cs_nbfree, 1); + uspi->cs_total.cs_nbfree++; fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1); cylno = ufs_cbtocylno(i); fs16_add(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(i)), 1); fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1); } - ubh_mark_buffer_dirty (USPI_UBH); - ubh_mark_buffer_dirty (UCPI_UBH); + ubh_mark_buffer_dirty (USPI_UBH(uspi)); + ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); - ubh_wait_on_buffer (UCPI_UBH); + ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); + ubh_wait_on_buffer (UCPI_UBH(ucpi)); } if (overflow) { @@ -213,38 +207,127 @@ do_more: sb->s_dirt = 1; unlock_super (sb); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return; -failed: +failed_unlock: unlock_super (sb); - UFSD(("EXIT (FAILED)\n")) +failed: + UFSD("EXIT (FAILED)\n"); return; } +static struct page *ufs_get_locked_page(struct address_space *mapping, + unsigned long index) +{ + struct page *page; + +try_again: + page = find_lock_page(mapping, index); + if (!page) { + page = read_cache_page(mapping, index, + (filler_t*)mapping->a_ops->readpage, + NULL); + if (IS_ERR(page)) { + printk(KERN_ERR "ufs_change_blocknr: " + "read_cache_page error: ino %lu, index: %lu\n", + mapping->host->i_ino, index); + goto out; + } + lock_page(page); -#define NULLIFY_FRAGMENTS \ - for (i = oldcount; i < newcount; i++) { \ - bh = sb_getblk(sb, result + i); \ - memset (bh->b_data, 0, sb->s_blocksize); \ - set_buffer_uptodate(bh); \ - mark_buffer_dirty (bh); \ - if (IS_SYNC(inode)) \ - sync_dirty_buffer(bh); \ - brelse (bh); \ + if (!PageUptodate(page) || PageError(page)) { + unlock_page(page); + page_cache_release(page); + + printk(KERN_ERR "ufs_change_blocknr: " + "can not read page: ino %lu, index: %lu\n", + mapping->host->i_ino, index); + + page = ERR_PTR(-EIO); + goto out; + } } -unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment, - unsigned goal, unsigned count, int * err ) + if (unlikely(!page->mapping || !page_has_buffers(page))) { + unlock_page(page); + page_cache_release(page); + goto try_again;/*we really need these buffers*/ + } +out: + return page; +} + +/* + * Modify inode page cache in such way: + * have - blocks with b_blocknr equal to oldb...oldb+count-1 + * get - blocks with b_blocknr equal to newb...newb+count-1 + * also we suppose that oldb...oldb+count-1 blocks + * situated at the end of file. + * + * We can come here from ufs_writepage or ufs_prepare_write, + * locked_page is argument of these functions, so we already lock it. + */ +static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk, + unsigned int count, unsigned int oldb, + unsigned int newb, struct page *locked_page) +{ + unsigned int blk_per_page = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits); + struct address_space *mapping = inode->i_mapping; + pgoff_t index, cur_index = locked_page->index; + unsigned int i, j; + struct page *page; + struct buffer_head *head, *bh; + + UFSD("ENTER, ino %lu, count %u, oldb %u, newb %u\n", + inode->i_ino, count, oldb, newb); + + BUG_ON(!PageLocked(locked_page)); + + for (i = 0; i < count; i += blk_per_page) { + index = (baseblk+i) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + + if (likely(cur_index != index)) { + page = ufs_get_locked_page(mapping, index); + if (IS_ERR(page)) + continue; + } else + page = locked_page; + + j = i; + head = page_buffers(page); + bh = head; + do { + if (likely(bh->b_blocknr == j + oldb && j < count)) { + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + bh->b_blocknr = newb + j++; + mark_buffer_dirty(bh); + } + + bh = bh->b_this_page; + } while (bh != head); + + set_page_dirty(page); + + if (likely(cur_index != index)) { + unlock_page(page); + page_cache_release(page); + } + } + UFSD("EXIT\n"); +} + +unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment, + unsigned goal, unsigned count, int * err, struct page *locked_page) { struct super_block * sb; struct ufs_sb_private_info * uspi; struct ufs_super_block_first * usb1; - struct buffer_head * bh; - unsigned cgno, oldcount, newcount, tmp, request, i, result; + unsigned cgno, oldcount, newcount, tmp, request, result; - UFSD(("ENTER, ino %lu, fragment %u, goal %u, count %u\n", inode->i_ino, fragment, goal, count)) + UFSD("ENTER, ino %lu, fragment %u, goal %u, count %u\n", inode->i_ino, fragment, goal, count); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -273,14 +356,14 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment, return (unsigned)-1; } if (fragment < UFS_I(inode)->i_lastfrag) { - UFSD(("EXIT (ALREADY ALLOCATED)\n")) + UFSD("EXIT (ALREADY ALLOCATED)\n"); unlock_super (sb); return 0; } } else { if (tmp) { - UFSD(("EXIT (ALREADY ALLOCATED)\n")) + UFSD("EXIT (ALREADY ALLOCATED)\n"); unlock_super(sb); return 0; } @@ -289,9 +372,9 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment, /* * There is not enough space for user on the device */ - if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(usb1, UFS_MINFREE) <= 0) { + if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { unlock_super (sb); - UFSD(("EXIT (FAILED)\n")) + UFSD("EXIT (FAILED)\n"); return 0; } @@ -310,12 +393,10 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment, if (result) { *p = cpu_to_fs32(sb, result); *err = 0; - inode->i_blocks += count << uspi->s_nspfshift; UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); - NULLIFY_FRAGMENTS } unlock_super(sb); - UFSD(("EXIT, result %u\n", result)) + UFSD("EXIT, result %u\n", result); return result; } @@ -325,11 +406,9 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment, result = ufs_add_fragments (inode, tmp, oldcount, newcount, err); if (result) { *err = 0; - inode->i_blocks += count << uspi->s_nspfshift; UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); - NULLIFY_FRAGMENTS unlock_super(sb); - UFSD(("EXIT, result %u\n", result)) + UFSD("EXIT, result %u\n", result); return result; } @@ -339,8 +418,8 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment, switch (fs32_to_cpu(sb, usb1->fs_optim)) { case UFS_OPTSPACE: request = newcount; - if (uspi->s_minfree < 5 || fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree) - > uspi->s_dsize * uspi->s_minfree / (2 * 100) ) + if (uspi->s_minfree < 5 || uspi->cs_total.cs_nffree + > uspi->s_dsize * uspi->s_minfree / (2 * 100)) break; usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); break; @@ -349,7 +428,7 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment, case UFS_OPTTIME: request = uspi->s_fpb; - if (fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree) < uspi->s_dsize * + if (uspi->cs_total.cs_nffree < uspi->s_dsize * (uspi->s_minfree - 2) / 100) break; usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); @@ -357,39 +436,22 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment, } result = ufs_alloc_fragments (inode, cgno, goal, request, err); if (result) { - for (i = 0; i < oldcount; i++) { - bh = sb_bread(sb, tmp + i); - if(bh) - { - clear_buffer_dirty(bh); - bh->b_blocknr = result + i; - mark_buffer_dirty (bh); - if (IS_SYNC(inode)) - sync_dirty_buffer(bh); - brelse (bh); - } - else - { - printk(KERN_ERR "ufs_new_fragments: bread fail\n"); - unlock_super(sb); - return 0; - } - } + ufs_change_blocknr(inode, fragment - oldcount, oldcount, tmp, + result, locked_page); + *p = cpu_to_fs32(sb, result); *err = 0; - inode->i_blocks += count << uspi->s_nspfshift; UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); - NULLIFY_FRAGMENTS unlock_super(sb); if (newcount < request) ufs_free_fragments (inode, result + newcount, request - newcount); ufs_free_fragments (inode, tmp, oldcount); - UFSD(("EXIT, result %u\n", result)) + UFSD("EXIT, result %u\n", result); return result; } unlock_super(sb); - UFSD(("EXIT (FAILED)\n")) + UFSD("EXIT (FAILED)\n"); return 0; } @@ -404,7 +466,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment, struct ufs_cylinder_group * ucg; unsigned cgno, fragno, fragoff, count, fragsize, i; - UFSD(("ENTER, fragment %u, oldcount %u, newcount %u\n", fragment, oldcount, newcount)) + UFSD("ENTER, fragment %u, oldcount %u, newcount %u\n", fragment, oldcount, newcount); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -419,7 +481,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment, ucpi = ufs_load_cylinder (sb, cgno); if (!ucpi) return 0; - ucg = ubh_get_ucg (UCPI_UBH); + ucg = ubh_get_ucg (UCPI_UBH(ucpi)); if (!ufs_cg_chkmagic(sb, ucg)) { ufs_panic (sb, "ufs_add_fragments", "internal error, bad magic number on cg %u", cgno); @@ -429,14 +491,14 @@ ufs_add_fragments (struct inode * inode, unsigned fragment, fragno = ufs_dtogd (fragment); fragoff = ufs_fragnum (fragno); for (i = oldcount; i < newcount; i++) - if (ubh_isclr (UCPI_UBH, ucpi->c_freeoff, fragno + i)) + if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i)) return 0; /* * Block can be extended */ ucg->cg_time = cpu_to_fs32(sb, get_seconds()); for (i = newcount; i < (uspi->s_fpb - fragoff); i++) - if (ubh_isclr (UCPI_UBH, ucpi->c_freeoff, fragno + i)) + if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i)) break; fragsize = i - oldcount; if (!fs32_to_cpu(sb, ucg->cg_frsum[fragsize])) @@ -446,7 +508,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment, if (fragsize != count) fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1); for (i = oldcount; i < newcount; i++) - ubh_clrbit (UCPI_UBH, ucpi->c_freeoff, fragno + i); + ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i); if(DQUOT_ALLOC_BLOCK(inode, count)) { *err = -EDQUOT; return 0; @@ -454,17 +516,17 @@ ufs_add_fragments (struct inode * inode, unsigned fragment, fs32_sub(sb, &ucg->cg_cs.cs_nffree, count); fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); - fs32_sub(sb, &usb1->fs_cstotal.cs_nffree, count); + uspi->cs_total.cs_nffree -= count; - ubh_mark_buffer_dirty (USPI_UBH); - ubh_mark_buffer_dirty (UCPI_UBH); + ubh_mark_buffer_dirty (USPI_UBH(uspi)); + ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); - ubh_wait_on_buffer (UCPI_UBH); + ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); + ubh_wait_on_buffer (UCPI_UBH(ucpi)); } sb->s_dirt = 1; - UFSD(("EXIT, fragment %u\n", fragment)) + UFSD("EXIT, fragment %u\n", fragment); return fragment; } @@ -487,7 +549,7 @@ static unsigned ufs_alloc_fragments (struct inode * inode, unsigned cgno, struct ufs_cylinder_group * ucg; unsigned oldcg, i, j, k, result, allocsize; - UFSD(("ENTER, ino %lu, cgno %u, goal %u, count %u\n", inode->i_ino, cgno, goal, count)) + UFSD("ENTER, ino %lu, cgno %u, goal %u, count %u\n", inode->i_ino, cgno, goal, count); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -521,14 +583,14 @@ static unsigned ufs_alloc_fragments (struct inode * inode, unsigned cgno, UFS_TEST_FREE_SPACE_CG } - UFSD(("EXIT (FAILED)\n")) + UFSD("EXIT (FAILED)\n"); return 0; cg_found: ucpi = ufs_load_cylinder (sb, cgno); if (!ucpi) return 0; - ucg = ubh_get_ucg (UCPI_UBH); + ucg = ubh_get_ucg (UCPI_UBH(ucpi)); if (!ufs_cg_chkmagic(sb, ucg)) ufs_panic (sb, "ufs_alloc_fragments", "internal error, bad magic number on cg %u", cgno); @@ -551,12 +613,12 @@ cg_found: return 0; goal = ufs_dtogd (result); for (i = count; i < uspi->s_fpb; i++) - ubh_setbit (UCPI_UBH, ucpi->c_freeoff, goal + i); + ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i); i = uspi->s_fpb - count; DQUOT_FREE_BLOCK(inode, i); fs32_add(sb, &ucg->cg_cs.cs_nffree, i); - fs32_add(sb, &usb1->fs_cstotal.cs_nffree, i); + uspi->cs_total.cs_nffree += i; fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i); fs32_add(sb, &ucg->cg_frsum[i], 1); goto succed; @@ -570,10 +632,10 @@ cg_found: return 0; } for (i = 0; i < count; i++) - ubh_clrbit (UCPI_UBH, ucpi->c_freeoff, result + i); + ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i); fs32_sub(sb, &ucg->cg_cs.cs_nffree, count); - fs32_sub(sb, &usb1->fs_cstotal.cs_nffree, count); + uspi->cs_total.cs_nffree -= count; fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); fs32_sub(sb, &ucg->cg_frsum[allocsize], 1); @@ -581,16 +643,16 @@ cg_found: fs32_add(sb, &ucg->cg_frsum[allocsize - count], 1); succed: - ubh_mark_buffer_dirty (USPI_UBH); - ubh_mark_buffer_dirty (UCPI_UBH); + ubh_mark_buffer_dirty (USPI_UBH(uspi)); + ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); - ubh_wait_on_buffer (UCPI_UBH); + ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); + ubh_wait_on_buffer (UCPI_UBH(ucpi)); } sb->s_dirt = 1; result += cgno * uspi->s_fpg; - UFSD(("EXIT3, result %u\n", result)) + UFSD("EXIT3, result %u\n", result); return result; } @@ -603,12 +665,12 @@ static unsigned ufs_alloccg_block (struct inode * inode, struct ufs_cylinder_group * ucg; unsigned result, cylno, blkno; - UFSD(("ENTER, goal %u\n", goal)) + UFSD("ENTER, goal %u\n", goal); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first(uspi); - ucg = ubh_get_ucg(UCPI_UBH); + ucg = ubh_get_ucg(UCPI_UBH(ucpi)); if (goal == 0) { goal = ucpi->c_rotor; @@ -620,7 +682,7 @@ static unsigned ufs_alloccg_block (struct inode * inode, /* * If the requested block is available, use it. */ - if (ubh_isblockset(UCPI_UBH, ucpi->c_freeoff, ufs_fragstoblks(goal))) { + if (ubh_isblockset(UCPI_UBH(ucpi), ucpi->c_freeoff, ufs_fragstoblks(goal))) { result = goal; goto gotit; } @@ -632,7 +694,7 @@ norot: ucpi->c_rotor = result; gotit: blkno = ufs_fragstoblks(result); - ubh_clrblock (UCPI_UBH, ucpi->c_freeoff, blkno); + ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) ufs_clusteracct (sb, ucpi, blkno, -1); if(DQUOT_ALLOC_BLOCK(inode, uspi->s_fpb)) { @@ -641,31 +703,76 @@ gotit: } fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1); - fs32_sub(sb, &usb1->fs_cstotal.cs_nbfree, 1); + uspi->cs_total.cs_nbfree--; fs32_sub(sb, &UFS_SB(sb)->fs_cs(ucpi->c_cgx).cs_nbfree, 1); cylno = ufs_cbtocylno(result); fs16_sub(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(result)), 1); fs32_sub(sb, &ubh_cg_blktot(ucpi, cylno), 1); - UFSD(("EXIT, result %u\n", result)) + UFSD("EXIT, result %u\n", result); return result; } -static unsigned ufs_bitmap_search (struct super_block * sb, - struct ufs_cg_private_info * ucpi, unsigned goal, unsigned count) +static unsigned ubh_scanc(struct ufs_sb_private_info *uspi, + struct ufs_buffer_head *ubh, + unsigned begin, unsigned size, + unsigned char *table, unsigned char mask) { - struct ufs_sb_private_info * uspi; - struct ufs_super_block_first * usb1; - struct ufs_cylinder_group * ucg; - unsigned start, length, location, result; - unsigned possition, fragsize, blockmap, mask; - - UFSD(("ENTER, cg %u, goal %u, count %u\n", ucpi->c_cgx, goal, count)) + unsigned rest, offset; + unsigned char *cp; + + + offset = begin & ~uspi->s_fmask; + begin >>= uspi->s_fshift; + for (;;) { + if ((offset + size) < uspi->s_fsize) + rest = size; + else + rest = uspi->s_fsize - offset; + size -= rest; + cp = ubh->bh[begin]->b_data + offset; + while ((table[*cp++] & mask) == 0 && --rest) + ; + if (rest || !size) + break; + begin++; + offset = 0; + } + return (size + rest); +} + +/* + * Find a block of the specified size in the specified cylinder group. + * @sp: pointer to super block + * @ucpi: pointer to cylinder group info + * @goal: near which block we want find new one + * @count: specified size + */ +static unsigned ufs_bitmap_search(struct super_block *sb, + struct ufs_cg_private_info *ucpi, + unsigned goal, unsigned count) +{ + /* + * Bit patterns for identifying fragments in the block map + * used as ((map & mask_arr) == want_arr) + */ + static const int mask_arr[9] = { + 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff + }; + static const int want_arr[9] = { + 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe + }; + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; + struct ufs_super_block_first *usb1; + struct ufs_cylinder_group *ucg; + unsigned start, length, loc, result; + unsigned pos, want, blockmap, mask, end; + + UFSD("ENTER, cg %u, goal %u, count %u\n", ucpi->c_cgx, goal, count); - uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first (uspi); - ucg = ubh_get_ucg(UCPI_UBH); + ucg = ubh_get_ucg(UCPI_UBH(ucpi)); if (goal) start = ufs_dtogd(goal) >> 3; @@ -673,53 +780,50 @@ static unsigned ufs_bitmap_search (struct super_block * sb, start = ucpi->c_frotor >> 3; length = ((uspi->s_fpg + 7) >> 3) - start; - location = ubh_scanc(UCPI_UBH, ucpi->c_freeoff + start, length, + loc = ubh_scanc(uspi, UCPI_UBH(ucpi), ucpi->c_freeoff + start, length, (uspi->s_fpb == 8) ? ufs_fragtable_8fpb : ufs_fragtable_other, 1 << (count - 1 + (uspi->s_fpb & 7))); - if (location == 0) { + if (loc == 0) { length = start + 1; - location = ubh_scanc(UCPI_UBH, ucpi->c_freeoff, length, - (uspi->s_fpb == 8) ? ufs_fragtable_8fpb : ufs_fragtable_other, - 1 << (count - 1 + (uspi->s_fpb & 7))); - if (location == 0) { - ufs_error (sb, "ufs_bitmap_search", - "bitmap corrupted on cg %u, start %u, length %u, count %u, freeoff %u\n", - ucpi->c_cgx, start, length, count, ucpi->c_freeoff); + loc = ubh_scanc(uspi, UCPI_UBH(ucpi), ucpi->c_freeoff, length, + (uspi->s_fpb == 8) ? ufs_fragtable_8fpb : + ufs_fragtable_other, + 1 << (count - 1 + (uspi->s_fpb & 7))); + if (loc == 0) { + ufs_error(sb, "ufs_bitmap_search", + "bitmap corrupted on cg %u, start %u," + " length %u, count %u, freeoff %u\n", + ucpi->c_cgx, start, length, count, + ucpi->c_freeoff); return (unsigned)-1; } start = 0; } - result = (start + length - location) << 3; + result = (start + length - loc) << 3; ucpi->c_frotor = result; /* * found the byte in the map */ - blockmap = ubh_blkmap(UCPI_UBH, ucpi->c_freeoff, result); - fragsize = 0; - for (possition = 0, mask = 1; possition < 8; possition++, mask <<= 1) { - if (blockmap & mask) { - if (!(possition & uspi->s_fpbmask)) - fragsize = 1; - else - fragsize++; - } - else { - if (fragsize == count) { - result += possition - count; - UFSD(("EXIT, result %u\n", result)) - return result; - } - fragsize = 0; - } - } - if (fragsize == count) { - result += possition - count; - UFSD(("EXIT, result %u\n", result)) - return result; - } - ufs_error (sb, "ufs_bitmap_search", "block not in map on cg %u\n", ucpi->c_cgx); - UFSD(("EXIT (FAILED)\n")) + + for (end = result + 8; result < end; result += uspi->s_fpb) { + blockmap = ubh_blkmap(UCPI_UBH(ucpi), ucpi->c_freeoff, result); + blockmap <<= 1; + mask = mask_arr[count]; + want = want_arr[count]; + for (pos = 0; pos <= uspi->s_fpb - count; pos++) { + if ((blockmap & mask) == want) { + UFSD("EXIT, result %u\n", result); + return result + pos; + } + mask <<= 1; + want <<= 1; + } + } + + ufs_error(sb, "ufs_bitmap_search", "block not in map on cg %u\n", + ucpi->c_cgx); + UFSD("EXIT (FAILED)\n"); return (unsigned)-1; } @@ -734,9 +838,9 @@ static void ufs_clusteracct(struct super_block * sb, return; if (cnt > 0) - ubh_setbit(UCPI_UBH, ucpi->c_clusteroff, blkno); + ubh_setbit(UCPI_UBH(ucpi), ucpi->c_clusteroff, blkno); else - ubh_clrbit(UCPI_UBH, ucpi->c_clusteroff, blkno); + ubh_clrbit(UCPI_UBH(ucpi), ucpi->c_clusteroff, blkno); /* * Find the size of the cluster going forward. @@ -745,7 +849,7 @@ static void ufs_clusteracct(struct super_block * sb, end = start + uspi->s_contigsumsize; if ( end >= ucpi->c_nclusterblks) end = ucpi->c_nclusterblks; - i = ubh_find_next_zero_bit (UCPI_UBH, ucpi->c_clusteroff, end, start); + i = ubh_find_next_zero_bit (UCPI_UBH(ucpi), ucpi->c_clusteroff, end, start); if (i > end) i = end; forw = i - start; @@ -757,7 +861,7 @@ static void ufs_clusteracct(struct super_block * sb, end = start - uspi->s_contigsumsize; if (end < 0 ) end = -1; - i = ubh_find_last_zero_bit (UCPI_UBH, ucpi->c_clusteroff, start, end); + i = ubh_find_last_zero_bit (UCPI_UBH(ucpi), ucpi->c_clusteroff, start, end); if ( i < end) i = end; back = start - i; @@ -769,11 +873,11 @@ static void ufs_clusteracct(struct super_block * sb, i = back + forw + 1; if (i > uspi->s_contigsumsize) i = uspi->s_contigsumsize; - fs32_add(sb, (__fs32*)ubh_get_addr(UCPI_UBH, ucpi->c_clustersumoff + (i << 2)), cnt); + fs32_add(sb, (__fs32*)ubh_get_addr(UCPI_UBH(ucpi), ucpi->c_clustersumoff + (i << 2)), cnt); if (back > 0) - fs32_sub(sb, (__fs32*)ubh_get_addr(UCPI_UBH, ucpi->c_clustersumoff + (back << 2)), cnt); + fs32_sub(sb, (__fs32*)ubh_get_addr(UCPI_UBH(ucpi), ucpi->c_clustersumoff + (back << 2)), cnt); if (forw > 0) - fs32_sub(sb, (__fs32*)ubh_get_addr(UCPI_UBH, ucpi->c_clustersumoff + (forw << 2)), cnt); + fs32_sub(sb, (__fs32*)ubh_get_addr(UCPI_UBH(ucpi), ucpi->c_clustersumoff + (forw << 2)), cnt); } diff --git a/fs/ufs/cylinder.c b/fs/ufs/cylinder.c index 14abb8b835f7..09c39e5e6386 100644 --- a/fs/ufs/cylinder.c +++ b/fs/ufs/cylinder.c @@ -20,15 +20,6 @@ #include "swab.h" #include "util.h" -#undef UFS_CYLINDER_DEBUG - -#ifdef UFS_CYLINDER_DEBUG -#define UFSD(x) printk("(%s, %d), %s:", __FILE__, __LINE__, __FUNCTION__); printk x; -#else -#define UFSD(x) -#endif - - /* * Read cylinder group into cache. The memory space for ufs_cg_private_info * structure is already allocated during ufs_read_super. @@ -42,19 +33,19 @@ static void ufs_read_cylinder (struct super_block * sb, struct ufs_cylinder_group * ucg; unsigned i, j; - UFSD(("ENTER, cgno %u, bitmap_nr %u\n", cgno, bitmap_nr)) + UFSD("ENTER, cgno %u, bitmap_nr %u\n", cgno, bitmap_nr); uspi = sbi->s_uspi; ucpi = sbi->s_ucpi[bitmap_nr]; ucg = (struct ufs_cylinder_group *)sbi->s_ucg[cgno]->b_data; - UCPI_UBH->fragment = ufs_cgcmin(cgno); - UCPI_UBH->count = uspi->s_cgsize >> sb->s_blocksize_bits; + UCPI_UBH(ucpi)->fragment = ufs_cgcmin(cgno); + UCPI_UBH(ucpi)->count = uspi->s_cgsize >> sb->s_blocksize_bits; /* * We have already the first fragment of cylinder group block in buffer */ - UCPI_UBH->bh[0] = sbi->s_ucg[cgno]; - for (i = 1; i < UCPI_UBH->count; i++) - if (!(UCPI_UBH->bh[i] = sb_bread(sb, UCPI_UBH->fragment + i))) + UCPI_UBH(ucpi)->bh[0] = sbi->s_ucg[cgno]; + for (i = 1; i < UCPI_UBH(ucpi)->count; i++) + if (!(UCPI_UBH(ucpi)->bh[i] = sb_bread(sb, UCPI_UBH(ucpi)->fragment + i))) goto failed; sbi->s_cgno[bitmap_nr] = cgno; @@ -73,7 +64,7 @@ static void ufs_read_cylinder (struct super_block * sb, ucpi->c_clustersumoff = fs32_to_cpu(sb, ucg->cg_u.cg_44.cg_clustersumoff); ucpi->c_clusteroff = fs32_to_cpu(sb, ucg->cg_u.cg_44.cg_clusteroff); ucpi->c_nclusterblks = fs32_to_cpu(sb, ucg->cg_u.cg_44.cg_nclusterblks); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return; failed: @@ -95,15 +86,15 @@ void ufs_put_cylinder (struct super_block * sb, unsigned bitmap_nr) struct ufs_cylinder_group * ucg; unsigned i; - UFSD(("ENTER, bitmap_nr %u\n", bitmap_nr)) + UFSD("ENTER, bitmap_nr %u\n", bitmap_nr); uspi = sbi->s_uspi; if (sbi->s_cgno[bitmap_nr] == UFS_CGNO_EMPTY) { - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return; } ucpi = sbi->s_ucpi[bitmap_nr]; - ucg = ubh_get_ucg(UCPI_UBH); + ucg = ubh_get_ucg(UCPI_UBH(ucpi)); if (uspi->s_ncg > UFS_MAX_GROUP_LOADED && bitmap_nr >= sbi->s_cg_loaded) { ufs_panic (sb, "ufs_put_cylinder", "internal error"); @@ -116,13 +107,13 @@ void ufs_put_cylinder (struct super_block * sb, unsigned bitmap_nr) ucg->cg_rotor = cpu_to_fs32(sb, ucpi->c_rotor); ucg->cg_frotor = cpu_to_fs32(sb, ucpi->c_frotor); ucg->cg_irotor = cpu_to_fs32(sb, ucpi->c_irotor); - ubh_mark_buffer_dirty (UCPI_UBH); - for (i = 1; i < UCPI_UBH->count; i++) { - brelse (UCPI_UBH->bh[i]); + ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); + for (i = 1; i < UCPI_UBH(ucpi)->count; i++) { + brelse (UCPI_UBH(ucpi)->bh[i]); } sbi->s_cgno[bitmap_nr] = UFS_CGNO_EMPTY; - UFSD(("EXIT\n")) + UFSD("EXIT\n"); } /* @@ -139,7 +130,7 @@ struct ufs_cg_private_info * ufs_load_cylinder ( struct ufs_cg_private_info * ucpi; unsigned cg, i, j; - UFSD(("ENTER, cgno %u\n", cgno)) + UFSD("ENTER, cgno %u\n", cgno); uspi = sbi->s_uspi; if (cgno >= uspi->s_ncg) { @@ -150,7 +141,7 @@ struct ufs_cg_private_info * ufs_load_cylinder ( * Cylinder group number cg it in cache and it was last used */ if (sbi->s_cgno[0] == cgno) { - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return sbi->s_ucpi[0]; } /* @@ -160,16 +151,16 @@ struct ufs_cg_private_info * ufs_load_cylinder ( if (sbi->s_cgno[cgno] != UFS_CGNO_EMPTY) { if (sbi->s_cgno[cgno] != cgno) { ufs_panic (sb, "ufs_load_cylinder", "internal error, wrong number of cg in cache"); - UFSD(("EXIT (FAILED)\n")) + UFSD("EXIT (FAILED)\n"); return NULL; } else { - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return sbi->s_ucpi[cgno]; } } else { ufs_read_cylinder (sb, cgno, cgno); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return sbi->s_ucpi[cgno]; } } @@ -204,6 +195,6 @@ struct ufs_cg_private_info * ufs_load_cylinder ( sbi->s_ucpi[0] = ucpi; ufs_read_cylinder (sb, cgno, 0); } - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return sbi->s_ucpi[0]; } diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 1a561202d3f4..7f0a0aa63584 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -11,31 +11,20 @@ * 4.4BSD (FreeBSD) support added on February 1st 1998 by * Niels Kristian Bech Jensen <nkbj@image.dk> partially based * on code by Martin von Loewis <martin@mira.isdn.cs.tu-berlin.de>. + * + * Migration to usage of "page cache" on May 2006 by + * Evgeniy Dushistov <dushistov@mail.ru> based on ext2 code base. */ #include <linux/time.h> #include <linux/fs.h> #include <linux/ufs_fs.h> #include <linux/smp_lock.h> -#include <linux/buffer_head.h> #include <linux/sched.h> #include "swab.h" #include "util.h" -#undef UFS_DIR_DEBUG - -#ifdef UFS_DIR_DEBUG -#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x; -#else -#define UFSD(x) -#endif - -static int -ufs_check_dir_entry (const char *, struct inode *, struct ufs_dir_entry *, - struct buffer_head *, unsigned long); - - /* * NOTE! unlike strncmp, ufs_match returns 1 for success, 0 for failure. * @@ -51,495 +40,541 @@ static inline int ufs_match(struct super_block *sb, int len, return !memcmp(name, de->d_name, len); } -/* - * This is blatantly stolen from ext2fs - */ -static int -ufs_readdir (struct file * filp, void * dirent, filldir_t filldir) +static int ufs_commit_chunk(struct page *page, unsigned from, unsigned to) { - struct inode *inode = filp->f_dentry->d_inode; - int error = 0; - unsigned long offset, lblk; - int i, stored; - struct buffer_head * bh; - struct ufs_dir_entry * de; - struct super_block * sb; - int de_reclen; - unsigned flags; - u64 blk= 0L; - - lock_kernel(); - - sb = inode->i_sb; - flags = UFS_SB(sb)->s_flags; - - UFSD(("ENTER, ino %lu f_pos %lu\n", inode->i_ino, (unsigned long) filp->f_pos)) - - stored = 0; - bh = NULL; - offset = filp->f_pos & (sb->s_blocksize - 1); - - while (!error && !stored && filp->f_pos < inode->i_size) { - lblk = (filp->f_pos) >> sb->s_blocksize_bits; - blk = ufs_frag_map(inode, lblk); - if (!blk || !(bh = sb_bread(sb, blk))) { - /* XXX - error - skip to the next block */ - printk("ufs_readdir: " - "dir inode %lu has a hole at offset %lu\n", - inode->i_ino, (unsigned long int)filp->f_pos); - filp->f_pos += sb->s_blocksize - offset; - continue; - } - -revalidate: - /* If the dir block has changed since the last call to - * readdir(2), then we might be pointing to an invalid - * dirent right now. Scan from the start of the block - * to make sure. */ - if (filp->f_version != inode->i_version) { - for (i = 0; i < sb->s_blocksize && i < offset; ) { - de = (struct ufs_dir_entry *)(bh->b_data + i); - /* It's too expensive to do a full - * dirent test each time round this - * loop, but we do have to test at - * least that it is non-zero. A - * failure will be detected in the - * dirent test below. */ - de_reclen = fs16_to_cpu(sb, de->d_reclen); - if (de_reclen < 1) - break; - i += de_reclen; - } - offset = i; - filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) - | offset; - filp->f_version = inode->i_version; - } + struct inode *dir = page->mapping->host; + int err = 0; + dir->i_version++; + page->mapping->a_ops->commit_write(NULL, page, from, to); + if (IS_DIRSYNC(dir)) + err = write_one_page(page, 1); + else + unlock_page(page); + return err; +} - while (!error && filp->f_pos < inode->i_size - && offset < sb->s_blocksize) { - de = (struct ufs_dir_entry *) (bh->b_data + offset); - /* XXX - put in a real ufs_check_dir_entry() */ - if ((de->d_reclen == 0) || (ufs_get_de_namlen(sb, de) == 0)) { - filp->f_pos = (filp->f_pos & - (sb->s_blocksize - 1)) + - sb->s_blocksize; - brelse(bh); - unlock_kernel(); - return stored; - } - if (!ufs_check_dir_entry ("ufs_readdir", inode, de, - bh, offset)) { - /* On error, skip the f_pos to the - next block. */ - filp->f_pos = (filp->f_pos | - (sb->s_blocksize - 1)) + - 1; - brelse (bh); - unlock_kernel(); - return stored; - } - offset += fs16_to_cpu(sb, de->d_reclen); - if (de->d_ino) { - /* We might block in the next section - * if the data destination is - * currently swapped out. So, use a - * version stamp to detect whether or - * not the directory has been modified - * during the copy operation. */ - unsigned long version = filp->f_version; - unsigned char d_type = DT_UNKNOWN; +static inline void ufs_put_page(struct page *page) +{ + kunmap(page); + page_cache_release(page); +} - UFSD(("filldir(%s,%u)\n", de->d_name, - fs32_to_cpu(sb, de->d_ino))) - UFSD(("namlen %u\n", ufs_get_de_namlen(sb, de))) +static inline unsigned long ufs_dir_pages(struct inode *inode) +{ + return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; +} - if ((flags & UFS_DE_MASK) == UFS_DE_44BSD) - d_type = de->d_u.d_44.d_type; - error = filldir(dirent, de->d_name, - ufs_get_de_namlen(sb, de), filp->f_pos, - fs32_to_cpu(sb, de->d_ino), d_type); - if (error) - break; - if (version != filp->f_version) - goto revalidate; - stored ++; - } - filp->f_pos += fs16_to_cpu(sb, de->d_reclen); - } - offset = 0; - brelse (bh); +ino_t ufs_inode_by_name(struct inode *dir, struct dentry *dentry) +{ + ino_t res = 0; + struct ufs_dir_entry *de; + struct page *page; + + de = ufs_find_entry(dir, dentry, &page); + if (de) { + res = fs32_to_cpu(dir->i_sb, de->d_ino); + ufs_put_page(page); } - unlock_kernel(); - return 0; + return res; } -/* - * define how far ahead to read directories while searching them. - */ -#define NAMEI_RA_CHUNKS 2 -#define NAMEI_RA_BLOCKS 4 -#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) -#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) -/* - * ufs_find_entry() - * - * finds an entry in the specified directory with the wanted name. It - * returns the cache buffer in which the entry was found, and the entry - * itself (as a parameter - res_bh). It does NOT read the inode of the - * entry - you'll have to do that yourself if you want to. - */ -struct ufs_dir_entry * ufs_find_entry (struct dentry *dentry, - struct buffer_head ** res_bh) +/* Releases the page */ +void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, + struct page *page, struct inode *inode) { - struct super_block * sb; - struct buffer_head * bh_use[NAMEI_RA_SIZE]; - struct buffer_head * bh_read[NAMEI_RA_SIZE]; - unsigned long offset; - int block, toread, i, err; - struct inode *dir = dentry->d_parent->d_inode; - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; + unsigned from = (char *) de - (char *) page_address(page); + unsigned to = from + fs16_to_cpu(dir->i_sb, de->d_reclen); + int err; - UFSD(("ENTER, dir_ino %lu, name %s, namlen %u\n", dir->i_ino, name, namelen)) - - *res_bh = NULL; - - sb = dir->i_sb; - - if (namelen > UFS_MAXNAMLEN) - return NULL; + lock_page(page); + err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + BUG_ON(err); + de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino); + ufs_set_de_type(dir->i_sb, de, inode->i_mode); + err = ufs_commit_chunk(page, from, to); + ufs_put_page(page); + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(dir); +} - memset (bh_use, 0, sizeof (bh_use)); - toread = 0; - for (block = 0; block < NAMEI_RA_SIZE; ++block) { - struct buffer_head * bh; - if ((block << sb->s_blocksize_bits) >= dir->i_size) - break; - bh = ufs_getfrag (dir, block, 0, &err); - bh_use[block] = bh; - if (bh && !buffer_uptodate(bh)) - bh_read[toread++] = bh; +static void ufs_check_page(struct page *page) +{ + struct inode *dir = page->mapping->host; + struct super_block *sb = dir->i_sb; + char *kaddr = page_address(page); + unsigned offs, rec_len; + unsigned limit = PAGE_CACHE_SIZE; + struct ufs_dir_entry *p; + char *error; + + if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { + limit = dir->i_size & ~PAGE_CACHE_MASK; + if (limit & (UFS_SECTOR_SIZE - 1)) + goto Ebadsize; + if (!limit) + goto out; } + for (offs = 0; offs <= limit - UFS_DIR_REC_LEN(1); offs += rec_len) { + p = (struct ufs_dir_entry *)(kaddr + offs); + rec_len = fs16_to_cpu(sb, p->d_reclen); + + if (rec_len < UFS_DIR_REC_LEN(1)) + goto Eshort; + if (rec_len & 3) + goto Ealign; + if (rec_len < UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, p))) + goto Enamelen; + if (((offs + rec_len - 1) ^ offs) & ~(UFS_SECTOR_SIZE-1)) + goto Espan; + if (fs32_to_cpu(sb, p->d_ino) > (UFS_SB(sb)->s_uspi->s_ipg * + UFS_SB(sb)->s_uspi->s_ncg)) + goto Einumber; + } + if (offs != limit) + goto Eend; +out: + SetPageChecked(page); + return; + + /* Too bad, we had an error */ + +Ebadsize: + ufs_error(sb, "ufs_check_page", + "size of directory #%lu is not a multiple of chunk size", + dir->i_ino + ); + goto fail; +Eshort: + error = "rec_len is smaller than minimal"; + goto bad_entry; +Ealign: + error = "unaligned directory entry"; + goto bad_entry; +Enamelen: + error = "rec_len is too small for name_len"; + goto bad_entry; +Espan: + error = "directory entry across blocks"; + goto bad_entry; +Einumber: + error = "inode out of bounds"; +bad_entry: + ufs_error (sb, "ufs_check_page", "bad entry in directory #%lu: %s - " + "offset=%lu, rec_len=%d, name_len=%d", + dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, + rec_len, ufs_get_de_namlen(sb, p)); + goto fail; +Eend: + p = (struct ufs_dir_entry *)(kaddr + offs); + ufs_error (sb, "ext2_check_page", + "entry in directory #%lu spans the page boundary" + "offset=%lu", + dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs); +fail: + SetPageChecked(page); + SetPageError(page); +} - for (block = 0, offset = 0; offset < dir->i_size; block++) { - struct buffer_head * bh; - struct ufs_dir_entry * de; - char * dlimit; - - if ((block % NAMEI_RA_BLOCKS) == 0 && toread) { - ll_rw_block (READ, toread, bh_read); - toread = 0; - } - bh = bh_use[block % NAMEI_RA_SIZE]; - if (!bh) { - ufs_error (sb, "ufs_find_entry", - "directory #%lu contains a hole at offset %lu", - dir->i_ino, offset); - offset += sb->s_blocksize; - continue; - } - wait_on_buffer (bh); - if (!buffer_uptodate(bh)) { - /* - * read error: all bets are off - */ - break; - } - - de = (struct ufs_dir_entry *) bh->b_data; - dlimit = bh->b_data + sb->s_blocksize; - while ((char *) de < dlimit && offset < dir->i_size) { - /* this code is executed quadratically often */ - /* do minimal checking by hand */ - int de_len; - - if ((char *) de + namelen <= dlimit && - ufs_match(sb, namelen, name, de)) { - /* found a match - - just to be sure, do a full check */ - if (!ufs_check_dir_entry("ufs_find_entry", - dir, de, bh, offset)) - goto failed; - for (i = 0; i < NAMEI_RA_SIZE; ++i) { - if (bh_use[i] != bh) - brelse (bh_use[i]); - } - *res_bh = bh; - return de; - } - /* prevent looping on a bad block */ - de_len = fs16_to_cpu(sb, de->d_reclen); - if (de_len <= 0) - goto failed; - offset += de_len; - de = (struct ufs_dir_entry *) ((char *) de + de_len); - } - - brelse (bh); - if (((block + NAMEI_RA_SIZE) << sb->s_blocksize_bits ) >= - dir->i_size) - bh = NULL; - else - bh = ufs_getfrag (dir, block + NAMEI_RA_SIZE, 0, &err); - bh_use[block % NAMEI_RA_SIZE] = bh; - if (bh && !buffer_uptodate(bh)) - bh_read[toread++] = bh; +static struct page *ufs_get_page(struct inode *dir, unsigned long n) +{ + struct address_space *mapping = dir->i_mapping; + struct page *page = read_cache_page(mapping, n, + (filler_t*)mapping->a_ops->readpage, NULL); + if (!IS_ERR(page)) { + wait_on_page_locked(page); + kmap(page); + if (!PageUptodate(page)) + goto fail; + if (!PageChecked(page)) + ufs_check_page(page); + if (PageError(page)) + goto fail; } + return page; -failed: - for (i = 0; i < NAMEI_RA_SIZE; ++i) brelse (bh_use[i]); - UFSD(("EXIT\n")) - return NULL; +fail: + ufs_put_page(page); + return ERR_PTR(-EIO); } -static int -ufs_check_dir_entry (const char *function, struct inode *dir, - struct ufs_dir_entry *de, struct buffer_head *bh, - unsigned long offset) +/* + * Return the offset into page `page_nr' of the last valid + * byte in that page, plus one. + */ +static unsigned +ufs_last_byte(struct inode *inode, unsigned long page_nr) { - struct super_block *sb = dir->i_sb; - const char *error_msg = NULL; - int rlen = fs16_to_cpu(sb, de->d_reclen); - - if (rlen < UFS_DIR_REC_LEN(1)) - error_msg = "reclen is smaller than minimal"; - else if (rlen % 4 != 0) - error_msg = "reclen % 4 != 0"; - else if (rlen < UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de))) - error_msg = "reclen is too small for namlen"; - else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) - error_msg = "directory entry across blocks"; - else if (fs32_to_cpu(sb, de->d_ino) > (UFS_SB(sb)->s_uspi->s_ipg * - UFS_SB(sb)->s_uspi->s_ncg)) - error_msg = "inode out of bounds"; - - if (error_msg != NULL) - ufs_error (sb, function, "bad entry in directory #%lu, size %Lu: %s - " - "offset=%lu, inode=%lu, reclen=%d, namlen=%d", - dir->i_ino, dir->i_size, error_msg, offset, - (unsigned long)fs32_to_cpu(sb, de->d_ino), - rlen, ufs_get_de_namlen(sb, de)); - - return (error_msg == NULL ? 1 : 0); + unsigned last_byte = inode->i_size; + + last_byte -= page_nr << PAGE_CACHE_SHIFT; + if (last_byte > PAGE_CACHE_SIZE) + last_byte = PAGE_CACHE_SIZE; + return last_byte; } -struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct buffer_head **p) +static inline struct ufs_dir_entry * +ufs_next_entry(struct super_block *sb, struct ufs_dir_entry *p) { - int err; - struct buffer_head *bh = ufs_bread (dir, 0, 0, &err); - struct ufs_dir_entry *res = NULL; - - if (bh) { - res = (struct ufs_dir_entry *) bh->b_data; - res = (struct ufs_dir_entry *)((char *)res + - fs16_to_cpu(dir->i_sb, res->d_reclen)); - } - *p = bh; - return res; + return (struct ufs_dir_entry *)((char *)p + + fs16_to_cpu(sb, p->d_reclen)); } -ino_t ufs_inode_by_name(struct inode * dir, struct dentry *dentry) + +struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p) { - ino_t res = 0; - struct ufs_dir_entry * de; - struct buffer_head *bh; + struct page *page = ufs_get_page(dir, 0); + struct ufs_dir_entry *de = NULL; - de = ufs_find_entry (dentry, &bh); - if (de) { - res = fs32_to_cpu(dir->i_sb, de->d_ino); - brelse(bh); + if (!IS_ERR(page)) { + de = ufs_next_entry(dir->i_sb, + (struct ufs_dir_entry *)page_address(page)); + *p = page; } - return res; + return de; } -void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, - struct buffer_head *bh, struct inode *inode) +/* + * ufs_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the page in which the entry was found, and the entry itself + * (as a parameter - res_dir). Page is returned mapped and unlocked. + * Entry is guaranteed to be valid. + */ +struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct dentry *dentry, + struct page **res_page) { - dir->i_version++; - de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino); - mark_buffer_dirty(bh); - if (IS_DIRSYNC(dir)) - sync_dirty_buffer(bh); - brelse (bh); + struct super_block *sb = dir->i_sb; + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + unsigned reclen = UFS_DIR_REC_LEN(namelen); + unsigned long start, n; + unsigned long npages = ufs_dir_pages(dir); + struct page *page = NULL; + struct ufs_inode_info *ui = UFS_I(dir); + struct ufs_dir_entry *de; + + UFSD("ENTER, dir_ino %lu, name %s, namlen %u\n", dir->i_ino, name, namelen); + + if (npages == 0 || namelen > UFS_MAXNAMLEN) + goto out; + + /* OFFSET_CACHE */ + *res_page = NULL; + + start = ui->i_dir_start_lookup; + + if (start >= npages) + start = 0; + n = start; + do { + char *kaddr; + page = ufs_get_page(dir, n); + if (!IS_ERR(page)) { + kaddr = page_address(page); + de = (struct ufs_dir_entry *) kaddr; + kaddr += ufs_last_byte(dir, n) - reclen; + while ((char *) de <= kaddr) { + if (de->d_reclen == 0) { + ufs_error(dir->i_sb, __FUNCTION__, + "zero-length directory entry"); + ufs_put_page(page); + goto out; + } + if (ufs_match(sb, namelen, name, de)) + goto found; + de = ufs_next_entry(sb, de); + } + ufs_put_page(page); + } + if (++n >= npages) + n = 0; + } while (n != start); +out: + return NULL; + +found: + *res_page = page; + ui->i_dir_start_lookup = n; + return de; } /* - * ufs_add_entry() - * - * adds a file entry to the specified directory, using the same - * semantics as ufs_find_entry(). It returns NULL if it failed. + * Parent is locked. */ int ufs_add_link(struct dentry *dentry, struct inode *inode) { - struct super_block * sb; - struct ufs_sb_private_info * uspi; - unsigned long offset; - unsigned fragoff; - unsigned short rec_len; - struct buffer_head * bh; - struct ufs_dir_entry * de, * de1; struct inode *dir = dentry->d_parent->d_inode; const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; + struct super_block *sb = dir->i_sb; + unsigned reclen = UFS_DIR_REC_LEN(namelen); + unsigned short rec_len, name_len; + struct page *page = NULL; + struct ufs_dir_entry *de; + unsigned long npages = ufs_dir_pages(dir); + unsigned long n; + char *kaddr; + unsigned from, to; int err; - UFSD(("ENTER, name %s, namelen %u\n", name, namelen)) - - sb = dir->i_sb; - uspi = UFS_SB(sb)->s_uspi; - - if (!namelen) - return -EINVAL; - bh = ufs_bread (dir, 0, 0, &err); - if (!bh) - return err; - rec_len = UFS_DIR_REC_LEN(namelen); - offset = 0; - de = (struct ufs_dir_entry *) bh->b_data; - while (1) { - if ((char *)de >= UFS_SECTOR_SIZE + bh->b_data) { - fragoff = offset & ~uspi->s_fmask; - if (fragoff != 0 && fragoff != UFS_SECTOR_SIZE) - ufs_error (sb, "ufs_add_entry", "internal error" - " fragoff %u", fragoff); - if (!fragoff) { - brelse (bh); - bh = ufs_bread (dir, offset >> sb->s_blocksize_bits, 1, &err); - if (!bh) - return err; - } - if (dir->i_size <= offset) { - if (dir->i_size == 0) { - brelse(bh); - return -ENOENT; - } - de = (struct ufs_dir_entry *) (bh->b_data + fragoff); - de->d_ino = 0; + UFSD("ENTER, name %s, namelen %u\n", name, namelen); + + /* + * We take care of directory expansion in the same loop. + * This code plays outside i_size, so it locks the page + * to protect that region. + */ + for (n = 0; n <= npages; n++) { + char *dir_end; + + page = ufs_get_page(dir, n); + err = PTR_ERR(page); + if (IS_ERR(page)) + goto out; + lock_page(page); + kaddr = page_address(page); + dir_end = kaddr + ufs_last_byte(dir, n); + de = (struct ufs_dir_entry *)kaddr; + kaddr += PAGE_CACHE_SIZE - reclen; + while ((char *)de <= kaddr) { + if ((char *)de == dir_end) { + /* We hit i_size */ + name_len = 0; + rec_len = UFS_SECTOR_SIZE; de->d_reclen = cpu_to_fs16(sb, UFS_SECTOR_SIZE); - ufs_set_de_namlen(sb, de, 0); - dir->i_size = offset + UFS_SECTOR_SIZE; - mark_inode_dirty(dir); - } else { - de = (struct ufs_dir_entry *) bh->b_data; + de->d_ino = 0; + goto got_it; } + if (de->d_reclen == 0) { + ufs_error(dir->i_sb, __FUNCTION__, + "zero-length directory entry"); + err = -EIO; + goto out_unlock; + } + err = -EEXIST; + if (ufs_match(sb, namelen, name, de)) + goto out_unlock; + name_len = UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de)); + rec_len = fs16_to_cpu(sb, de->d_reclen); + if (!de->d_ino && rec_len >= reclen) + goto got_it; + if (rec_len >= name_len + reclen) + goto got_it; + de = (struct ufs_dir_entry *) ((char *) de + rec_len); } - if (!ufs_check_dir_entry ("ufs_add_entry", dir, de, bh, offset)) { - brelse (bh); - return -ENOENT; - } - if (ufs_match(sb, namelen, name, de)) { - brelse (bh); - return -EEXIST; - } - if (de->d_ino == 0 && fs16_to_cpu(sb, de->d_reclen) >= rec_len) - break; - - if (fs16_to_cpu(sb, de->d_reclen) >= - UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de)) + rec_len) - break; - offset += fs16_to_cpu(sb, de->d_reclen); - de = (struct ufs_dir_entry *) ((char *) de + fs16_to_cpu(sb, de->d_reclen)); + unlock_page(page); + ufs_put_page(page); } - + BUG(); + return -EINVAL; + +got_it: + from = (char*)de - (char*)page_address(page); + to = from + rec_len; + err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + if (err) + goto out_unlock; if (de->d_ino) { - de1 = (struct ufs_dir_entry *) ((char *) de + - UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de))); - de1->d_reclen = - cpu_to_fs16(sb, fs16_to_cpu(sb, de->d_reclen) - - UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de))); - de->d_reclen = - cpu_to_fs16(sb, UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de))); + struct ufs_dir_entry *de1 = + (struct ufs_dir_entry *) ((char *) de + name_len); + de1->d_reclen = cpu_to_fs16(sb, rec_len - name_len); + de->d_reclen = cpu_to_fs16(sb, name_len); + de = de1; } - de->d_ino = 0; + ufs_set_de_namlen(sb, de, namelen); - memcpy (de->d_name, name, namelen + 1); + memcpy(de->d_name, name, namelen + 1); de->d_ino = cpu_to_fs32(sb, inode->i_ino); ufs_set_de_type(sb, de, inode->i_mode); - mark_buffer_dirty(bh); - if (IS_DIRSYNC(dir)) - sync_dirty_buffer(bh); - brelse (bh); + + err = ufs_commit_chunk(page, from, to); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; - dir->i_version++; + mark_inode_dirty(dir); + /* OFFSET_CACHE */ +out_put: + ufs_put_page(page); +out: + return err; +out_unlock: + unlock_page(page); + goto out_put; +} - UFSD(("EXIT\n")) +static inline unsigned +ufs_validate_entry(struct super_block *sb, char *base, + unsigned offset, unsigned mask) +{ + struct ufs_dir_entry *de = (struct ufs_dir_entry*)(base + offset); + struct ufs_dir_entry *p = (struct ufs_dir_entry*)(base + (offset&mask)); + while ((char*)p < (char*)de) { + if (p->d_reclen == 0) + break; + p = ufs_next_entry(sb, p); + } + return (char *)p - base; +} + + +/* + * This is blatantly stolen from ext2fs + */ +static int +ufs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + loff_t pos = filp->f_pos; + struct inode *inode = filp->f_dentry->d_inode; + struct super_block *sb = inode->i_sb; + unsigned int offset = pos & ~PAGE_CACHE_MASK; + unsigned long n = pos >> PAGE_CACHE_SHIFT; + unsigned long npages = ufs_dir_pages(inode); + unsigned chunk_mask = ~(UFS_SECTOR_SIZE - 1); + int need_revalidate = filp->f_version != inode->i_version; + unsigned flags = UFS_SB(sb)->s_flags; + + UFSD("BEGIN\n"); + + if (pos > inode->i_size - UFS_DIR_REC_LEN(1)) + return 0; + + for ( ; n < npages; n++, offset = 0) { + char *kaddr, *limit; + struct ufs_dir_entry *de; + + struct page *page = ufs_get_page(inode, n); + + if (IS_ERR(page)) { + ufs_error(sb, __FUNCTION__, + "bad page in #%lu", + inode->i_ino); + filp->f_pos += PAGE_CACHE_SIZE - offset; + return -EIO; + } + kaddr = page_address(page); + if (unlikely(need_revalidate)) { + if (offset) { + offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask); + filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset; + } + filp->f_version = inode->i_version; + need_revalidate = 0; + } + de = (struct ufs_dir_entry *)(kaddr+offset); + limit = kaddr + ufs_last_byte(inode, n) - UFS_DIR_REC_LEN(1); + for ( ;(char*)de <= limit; de = ufs_next_entry(sb, de)) { + if (de->d_reclen == 0) { + ufs_error(sb, __FUNCTION__, + "zero-length directory entry"); + ufs_put_page(page); + return -EIO; + } + if (de->d_ino) { + int over; + unsigned char d_type = DT_UNKNOWN; + + offset = (char *)de - kaddr; + + UFSD("filldir(%s,%u)\n", de->d_name, + fs32_to_cpu(sb, de->d_ino)); + UFSD("namlen %u\n", ufs_get_de_namlen(sb, de)); + + if ((flags & UFS_DE_MASK) == UFS_DE_44BSD) + d_type = de->d_u.d_44.d_type; + + over = filldir(dirent, de->d_name, + ufs_get_de_namlen(sb, de), + (n<<PAGE_CACHE_SHIFT) | offset, + fs32_to_cpu(sb, de->d_ino), d_type); + if (over) { + ufs_put_page(page); + return 0; + } + } + filp->f_pos += fs16_to_cpu(sb, de->d_reclen); + } + ufs_put_page(page); + } return 0; } + /* * ufs_delete_entry deletes a directory entry by merging it with the * previous entry. */ -int ufs_delete_entry (struct inode * inode, struct ufs_dir_entry * dir, - struct buffer_head * bh ) - +int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir, + struct page * page) { - struct super_block * sb; - struct ufs_dir_entry * de, * pde; - unsigned i; - - UFSD(("ENTER\n")) + struct super_block *sb = inode->i_sb; + struct address_space *mapping = page->mapping; + char *kaddr = page_address(page); + unsigned from = ((char*)dir - kaddr) & ~(UFS_SECTOR_SIZE - 1); + unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen); + struct ufs_dir_entry *pde = NULL; + struct ufs_dir_entry *de = (struct ufs_dir_entry *) (kaddr + from); + int err; - sb = inode->i_sb; - i = 0; - pde = NULL; - de = (struct ufs_dir_entry *) bh->b_data; - - UFSD(("ino %u, reclen %u, namlen %u, name %s\n", - fs32_to_cpu(sb, de->d_ino), - fs16_to_cpu(sb, de->d_reclen), - ufs_get_de_namlen(sb, de), de->d_name)) - - while (i < bh->b_size) { - if (!ufs_check_dir_entry ("ufs_delete_entry", inode, de, bh, i)) { - brelse(bh); - return -EIO; - } - if (de == dir) { - if (pde) - fs16_add(sb, &pde->d_reclen, - fs16_to_cpu(sb, dir->d_reclen)); - dir->d_ino = 0; - inode->i_version++; - inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); - mark_buffer_dirty(bh); - if (IS_DIRSYNC(inode)) - sync_dirty_buffer(bh); - brelse(bh); - UFSD(("EXIT\n")) - return 0; + UFSD("ENTER\n"); + + UFSD("ino %u, reclen %u, namlen %u, name %s\n", + fs32_to_cpu(sb, de->d_ino), + fs16_to_cpu(sb, de->d_reclen), + ufs_get_de_namlen(sb, de), de->d_name); + + while ((char*)de < (char*)dir) { + if (de->d_reclen == 0) { + ufs_error(inode->i_sb, __FUNCTION__, + "zero-length directory entry"); + err = -EIO; + goto out; } - i += fs16_to_cpu(sb, de->d_reclen); - if (i == UFS_SECTOR_SIZE) pde = NULL; - else pde = de; - de = (struct ufs_dir_entry *) - ((char *) de + fs16_to_cpu(sb, de->d_reclen)); - if (i == UFS_SECTOR_SIZE && de->d_reclen == 0) - break; + pde = de; + de = ufs_next_entry(sb, de); } - UFSD(("EXIT\n")) - brelse(bh); - return -ENOENT; + if (pde) + from = (char*)pde - (char*)page_address(page); + lock_page(page); + err = mapping->a_ops->prepare_write(NULL, page, from, to); + BUG_ON(err); + if (pde) + pde->d_reclen = cpu_to_fs16(sb, to-from); + dir->d_ino = 0; + err = ufs_commit_chunk(page, from, to); + inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); +out: + ufs_put_page(page); + UFSD("EXIT\n"); + return err; } int ufs_make_empty(struct inode * inode, struct inode *dir) { struct super_block * sb = dir->i_sb; - struct buffer_head * dir_block; + struct address_space *mapping = inode->i_mapping; + struct page *page = grab_cache_page(mapping, 0); struct ufs_dir_entry * de; + char *base; int err; - dir_block = ufs_bread (inode, 0, 1, &err); - if (!dir_block) - return err; + if (!page) + return -ENOMEM; + kmap(page); + err = mapping->a_ops->prepare_write(NULL, page, 0, UFS_SECTOR_SIZE); + if (err) { + unlock_page(page); + goto fail; + } + + + base = (char*)page_address(page); + memset(base, 0, PAGE_CACHE_SIZE); + + de = (struct ufs_dir_entry *) base; - inode->i_blocks = sb->s_blocksize / UFS_SECTOR_SIZE; - de = (struct ufs_dir_entry *) dir_block->b_data; de->d_ino = cpu_to_fs32(sb, inode->i_ino); ufs_set_de_type(sb, de, inode->i_mode); ufs_set_de_namlen(sb, de, 1); @@ -552,72 +587,65 @@ int ufs_make_empty(struct inode * inode, struct inode *dir) de->d_reclen = cpu_to_fs16(sb, UFS_SECTOR_SIZE - UFS_DIR_REC_LEN(1)); ufs_set_de_namlen(sb, de, 2); strcpy (de->d_name, ".."); - mark_buffer_dirty(dir_block); - brelse (dir_block); - mark_inode_dirty(inode); - return 0; + + err = ufs_commit_chunk(page, 0, UFS_SECTOR_SIZE); +fail: + kunmap(page); + page_cache_release(page); + return err; } /* * routine to check that the specified directory is empty (for rmdir) */ -int ufs_empty_dir (struct inode * inode) +int ufs_empty_dir(struct inode * inode) { - struct super_block * sb; - unsigned long offset; - struct buffer_head * bh; - struct ufs_dir_entry * de, * de1; - int err; - - sb = inode->i_sb; - - if (inode->i_size < UFS_DIR_REC_LEN(1) + UFS_DIR_REC_LEN(2) || - !(bh = ufs_bread (inode, 0, 0, &err))) { - ufs_warning (inode->i_sb, "empty_dir", - "bad directory (dir #%lu) - no data block", - inode->i_ino); - return 1; - } - de = (struct ufs_dir_entry *) bh->b_data; - de1 = (struct ufs_dir_entry *) - ((char *)de + fs16_to_cpu(sb, de->d_reclen)); - if (fs32_to_cpu(sb, de->d_ino) != inode->i_ino || de1->d_ino == 0 || - strcmp (".", de->d_name) || strcmp ("..", de1->d_name)) { - ufs_warning (inode->i_sb, "empty_dir", - "bad directory (dir #%lu) - no `.' or `..'", - inode->i_ino); - return 1; - } - offset = fs16_to_cpu(sb, de->d_reclen) + fs16_to_cpu(sb, de1->d_reclen); - de = (struct ufs_dir_entry *) - ((char *)de1 + fs16_to_cpu(sb, de1->d_reclen)); - while (offset < inode->i_size ) { - if (!bh || (void *) de >= (void *) (bh->b_data + sb->s_blocksize)) { - brelse (bh); - bh = ufs_bread (inode, offset >> sb->s_blocksize_bits, 1, &err); - if (!bh) { - ufs_error (sb, "empty_dir", - "directory #%lu contains a hole at offset %lu", - inode->i_ino, offset); - offset += sb->s_blocksize; - continue; + struct super_block *sb = inode->i_sb; + struct page *page = NULL; + unsigned long i, npages = ufs_dir_pages(inode); + + for (i = 0; i < npages; i++) { + char *kaddr; + struct ufs_dir_entry *de; + page = ufs_get_page(inode, i); + + if (IS_ERR(page)) + continue; + + kaddr = page_address(page); + de = (struct ufs_dir_entry *)kaddr; + kaddr += ufs_last_byte(inode, i) - UFS_DIR_REC_LEN(1); + + while ((char *)de <= kaddr) { + if (de->d_reclen == 0) { + ufs_error(inode->i_sb, __FUNCTION__, + "zero-length directory entry: " + "kaddr=%p, de=%p\n", kaddr, de); + goto not_empty; } - de = (struct ufs_dir_entry *) bh->b_data; - } - if (!ufs_check_dir_entry ("empty_dir", inode, de, bh, offset)) { - brelse (bh); - return 1; - } - if (de->d_ino) { - brelse (bh); - return 0; + if (de->d_ino) { + u16 namelen=ufs_get_de_namlen(sb, de); + /* check for . and .. */ + if (de->d_name[0] != '.') + goto not_empty; + if (namelen > 2) + goto not_empty; + if (namelen < 2) { + if (inode->i_ino != + fs32_to_cpu(sb, de->d_ino)) + goto not_empty; + } else if (de->d_name[1] != '.') + goto not_empty; + } + de = ufs_next_entry(sb, de); } - offset += fs16_to_cpu(sb, de->d_reclen); - de = (struct ufs_dir_entry *) - ((char *)de + fs16_to_cpu(sb, de->d_reclen)); + ufs_put_page(page); } - brelse (bh); return 1; + +not_empty: + ufs_put_page(page); + return 0; } const struct file_operations ufs_dir_operations = { diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 312fd3f86313..0e5001512a9d 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -25,6 +25,26 @@ #include <linux/fs.h> #include <linux/ufs_fs.h> +#include <linux/buffer_head.h> /* for sync_mapping_buffers() */ + +static int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + int err; + int ret; + + ret = sync_mapping_buffers(inode->i_mapping); + if (!(inode->i_state & I_DIRTY)) + return ret; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + return ret; + + err = ufs_sync_inode(inode); + if (ret == 0) + ret = err; + return ret; +} + /* * We have mostly NULL's here: the current defaults are ok for @@ -37,6 +57,7 @@ const struct file_operations ufs_file_operations = { .write = generic_file_write, .mmap = generic_file_mmap, .open = generic_file_open, + .fsync = ufs_sync_file, .sendfile = generic_file_sendfile, }; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index c7a47ed4f430..9501dcd3b213 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -34,14 +34,6 @@ #include "swab.h" #include "util.h" -#undef UFS_IALLOC_DEBUG - -#ifdef UFS_IALLOC_DEBUG -#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x; -#else -#define UFSD(x) -#endif - /* * NOTE! When we get the inode, we're the only people * that have access to it, and as such there are no @@ -68,7 +60,7 @@ void ufs_free_inode (struct inode * inode) int is_directory; unsigned ino, cg, bit; - UFSD(("ENTER, ino %lu\n", inode->i_ino)) + UFSD("ENTER, ino %lu\n", inode->i_ino); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -91,7 +83,7 @@ void ufs_free_inode (struct inode * inode) unlock_super (sb); return; } - ucg = ubh_get_ucg(UCPI_UBH); + ucg = ubh_get_ucg(UCPI_UBH(ucpi)); if (!ufs_cg_chkmagic(sb, ucg)) ufs_panic (sb, "ufs_free_fragments", "internal error, bad cg magic number"); @@ -104,33 +96,33 @@ void ufs_free_inode (struct inode * inode) clear_inode (inode); - if (ubh_isclr (UCPI_UBH, ucpi->c_iusedoff, bit)) + if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit)) ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino); else { - ubh_clrbit (UCPI_UBH, ucpi->c_iusedoff, bit); + ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit); if (ino < ucpi->c_irotor) ucpi->c_irotor = ino; fs32_add(sb, &ucg->cg_cs.cs_nifree, 1); - fs32_add(sb, &usb1->fs_cstotal.cs_nifree, 1); + uspi->cs_total.cs_nifree++; fs32_add(sb, &UFS_SB(sb)->fs_cs(cg).cs_nifree, 1); if (is_directory) { fs32_sub(sb, &ucg->cg_cs.cs_ndir, 1); - fs32_sub(sb, &usb1->fs_cstotal.cs_ndir, 1); + uspi->cs_total.cs_ndir--; fs32_sub(sb, &UFS_SB(sb)->fs_cs(cg).cs_ndir, 1); } } - ubh_mark_buffer_dirty (USPI_UBH); - ubh_mark_buffer_dirty (UCPI_UBH); + ubh_mark_buffer_dirty (USPI_UBH(uspi)); + ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi); - ubh_wait_on_buffer (UCPI_UBH); + ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); + ubh_wait_on_buffer (UCPI_UBH(ucpi)); } sb->s_dirt = 1; unlock_super (sb); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); } /* @@ -155,7 +147,7 @@ struct inode * ufs_new_inode(struct inode * dir, int mode) unsigned cg, bit, i, j, start; struct ufs_inode_info *ufsi; - UFSD(("ENTER\n")) + UFSD("ENTER\n"); /* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink) @@ -213,43 +205,43 @@ cg_found: ucpi = ufs_load_cylinder (sb, cg); if (!ucpi) goto failed; - ucg = ubh_get_ucg(UCPI_UBH); + ucg = ubh_get_ucg(UCPI_UBH(ucpi)); if (!ufs_cg_chkmagic(sb, ucg)) ufs_panic (sb, "ufs_new_inode", "internal error, bad cg magic number"); start = ucpi->c_irotor; - bit = ubh_find_next_zero_bit (UCPI_UBH, ucpi->c_iusedoff, uspi->s_ipg, start); + bit = ubh_find_next_zero_bit (UCPI_UBH(ucpi), ucpi->c_iusedoff, uspi->s_ipg, start); if (!(bit < uspi->s_ipg)) { - bit = ubh_find_first_zero_bit (UCPI_UBH, ucpi->c_iusedoff, start); + bit = ubh_find_first_zero_bit (UCPI_UBH(ucpi), ucpi->c_iusedoff, start); if (!(bit < start)) { ufs_error (sb, "ufs_new_inode", "cylinder group %u corrupted - error in inode bitmap\n", cg); goto failed; } } - UFSD(("start = %u, bit = %u, ipg = %u\n", start, bit, uspi->s_ipg)) - if (ubh_isclr (UCPI_UBH, ucpi->c_iusedoff, bit)) - ubh_setbit (UCPI_UBH, ucpi->c_iusedoff, bit); + UFSD("start = %u, bit = %u, ipg = %u\n", start, bit, uspi->s_ipg); + if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit)) + ubh_setbit (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit); else { ufs_panic (sb, "ufs_new_inode", "internal error"); goto failed; } fs32_sub(sb, &ucg->cg_cs.cs_nifree, 1); - fs32_sub(sb, &usb1->fs_cstotal.cs_nifree, 1); + uspi->cs_total.cs_nifree--; fs32_sub(sb, &sbi->fs_cs(cg).cs_nifree, 1); if (S_ISDIR(mode)) { fs32_add(sb, &ucg->cg_cs.cs_ndir, 1); - fs32_add(sb, &usb1->fs_cstotal.cs_ndir, 1); + uspi->cs_total.cs_ndir++; fs32_add(sb, &sbi->fs_cs(cg).cs_ndir, 1); } - ubh_mark_buffer_dirty (USPI_UBH); - ubh_mark_buffer_dirty (UCPI_UBH); + ubh_mark_buffer_dirty (USPI_UBH(uspi)); + ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi); - ubh_wait_on_buffer (UCPI_UBH); + ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); + ubh_wait_on_buffer (UCPI_UBH(ucpi)); } sb->s_dirt = 1; @@ -272,6 +264,7 @@ cg_found: ufsi->i_shadow = 0; ufsi->i_osync = 0; ufsi->i_oeftflag = 0; + ufsi->i_dir_start_lookup = 0; memset(&ufsi->i_u1, 0, sizeof(ufsi->i_u1)); insert_inode_hash(inode); @@ -287,14 +280,14 @@ cg_found: return ERR_PTR(-EDQUOT); } - UFSD(("allocating inode %lu\n", inode->i_ino)) - UFSD(("EXIT\n")) + UFSD("allocating inode %lu\n", inode->i_ino); + UFSD("EXIT\n"); return inode; failed: unlock_super (sb); make_bad_inode(inode); iput (inode); - UFSD(("EXIT (FAILED)\n")) + UFSD("EXIT (FAILED)\n"); return ERR_PTR(-ENOSPC); } diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 3c3f62ce2ad9..f2dbdf5a8769 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -41,14 +41,7 @@ #include "swab.h" #include "util.h" -#undef UFS_INODE_DEBUG -#undef UFS_INODE_DEBUG_MORE - -#ifdef UFS_INODE_DEBUG -#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x; -#else -#define UFSD(x) -#endif +static u64 ufs_frag_map(struct inode *inode, sector_t frag); static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4]) { @@ -61,7 +54,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off int n = 0; - UFSD(("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks)); + UFSD("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks); if (i_block < 0) { ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0"); } else if (i_block < direct_blocks) { @@ -89,7 +82,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off * the begining of the filesystem. */ -u64 ufs_frag_map(struct inode *inode, sector_t frag) +static u64 ufs_frag_map(struct inode *inode, sector_t frag) { struct ufs_inode_info *ufsi = UFS_I(inode); struct super_block *sb = inode->i_sb; @@ -104,8 +97,8 @@ u64 ufs_frag_map(struct inode *inode, sector_t frag) unsigned flags = UFS_SB(sb)->s_flags; u64 temp = 0L; - UFSD((": frag = %llu depth = %d\n", (unsigned long long)frag, depth)); - UFSD((": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",uspi->s_fpbshift,uspi->s_apbmask,mask)); + UFSD(": frag = %llu depth = %d\n", (unsigned long long)frag, depth); + UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",uspi->s_fpbshift,uspi->s_apbmask,mask); if (depth == 0) return 0; @@ -161,26 +154,64 @@ out: return ret; } -static struct buffer_head * ufs_inode_getfrag (struct inode *inode, - unsigned int fragment, unsigned int new_fragment, - unsigned int required, int *err, int metadata, long *phys, int *new) +static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh) +{ + lock_buffer(bh); + memset(bh->b_data, 0, inode->i_sb->s_blocksize); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + if (IS_SYNC(inode)) + sync_dirty_buffer(bh); +} + +static struct buffer_head * +ufs_clear_frags(struct inode *inode, sector_t beg, + unsigned int n) +{ + struct buffer_head *res, *bh; + sector_t end = beg + n; + + res = sb_getblk(inode->i_sb, beg); + ufs_clear_frag(inode, res); + for (++beg; beg < end; ++beg) { + bh = sb_getblk(inode->i_sb, beg); + ufs_clear_frag(inode, bh); + brelse(bh); + } + return res; +} + +/** + * ufs_inode_getfrag() - allocate new fragment(s) + * @inode - pointer to inode + * @fragment - number of `fragment' which hold pointer + * to new allocated fragment(s) + * @new_fragment - number of new allocated fragment(s) + * @required - how many fragment(s) we require + * @err - we set it if something wrong + * @phys - pointer to where we save physical number of new allocated fragments, + * NULL if we allocate not data(indirect blocks for example). + * @new - we set it if we allocate new block + * @locked_page - for ufs_new_fragments() + */ +static struct buffer_head * +ufs_inode_getfrag(struct inode *inode, unsigned int fragment, + sector_t new_fragment, unsigned int required, int *err, + long *phys, int *new, struct page *locked_page) { struct ufs_inode_info *ufsi = UFS_I(inode); - struct super_block * sb; - struct ufs_sb_private_info * uspi; + struct super_block *sb = inode->i_sb; + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct buffer_head * result; unsigned block, blockoff, lastfrag, lastblock, lastblockoff; unsigned tmp, goal; __fs32 * p, * p2; - unsigned flags = 0; - UFSD(("ENTER, ino %lu, fragment %u, new_fragment %u, required %u\n", - inode->i_ino, fragment, new_fragment, required)) + UFSD("ENTER, ino %lu, fragment %u, new_fragment %llu, required %u, " + "metadata %d\n", inode->i_ino, fragment, + (unsigned long long)new_fragment, required, !phys); - sb = inode->i_sb; - uspi = UFS_SB(sb)->s_uspi; - - flags = UFS_SB(sb)->s_flags; /* TODO : to be done for write support if ( (flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) goto ufs2; @@ -195,16 +226,16 @@ repeat: tmp = fs32_to_cpu(sb, *p); lastfrag = ufsi->i_lastfrag; if (tmp && fragment < lastfrag) { - if (metadata) { + if (!phys) { result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff); if (tmp == fs32_to_cpu(sb, *p)) { - UFSD(("EXIT, result %u\n", tmp + blockoff)) + UFSD("EXIT, result %u\n", tmp + blockoff); return result; } brelse (result); goto repeat; } else { - *phys = tmp; + *phys = tmp + blockoff; return NULL; } } @@ -221,7 +252,8 @@ repeat: if (lastblockoff) { p2 = ufsi->i_u1.i_data + lastblock; tmp = ufs_new_fragments (inode, p2, lastfrag, - fs32_to_cpu(sb, *p2), uspi->s_fpb - lastblockoff, err); + fs32_to_cpu(sb, *p2), uspi->s_fpb - lastblockoff, + err, locked_page); if (!tmp) { if (lastfrag != ufsi->i_lastfrag) goto repeat; @@ -233,14 +265,16 @@ repeat: } goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb; tmp = ufs_new_fragments (inode, p, fragment - blockoff, - goal, required + blockoff, err); + goal, required + blockoff, + err, locked_page); } /* * We will extend last allocated block */ else if (lastblock == block) { - tmp = ufs_new_fragments (inode, p, fragment - (blockoff - lastblockoff), - fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), err); + tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff), + fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), + err, locked_page); } /* * We will allocate new block before last allocated block @@ -248,8 +282,8 @@ repeat: else /* (lastblock > block) */ { if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1]))) goal = tmp + uspi->s_fpb; - tmp = ufs_new_fragments (inode, p, fragment - blockoff, - goal, uspi->s_fpb, err); + tmp = ufs_new_fragments(inode, p, fragment - blockoff, + goal, uspi->s_fpb, err, locked_page); } if (!tmp) { if ((!blockoff && *p) || @@ -259,14 +293,10 @@ repeat: return NULL; } - /* The nullification of framgents done in ufs/balloc.c is - * something I don't have the stomache to move into here right - * now. -DaveM - */ - if (metadata) { - result = sb_getblk(inode->i_sb, tmp + blockoff); + if (!phys) { + result = ufs_clear_frags(inode, tmp + blockoff, required); } else { - *phys = tmp; + *phys = tmp + blockoff; result = NULL; *err = 0; *new = 1; @@ -276,7 +306,7 @@ repeat: if (IS_SYNC(inode)) ufs_sync_inode (inode); mark_inode_dirty(inode); - UFSD(("EXIT, result %u\n", tmp + blockoff)) + UFSD("EXIT, result %u\n", tmp + blockoff); return result; /* This part : To be implemented .... @@ -295,22 +325,35 @@ repeat2: */ } -static struct buffer_head * ufs_block_getfrag (struct inode *inode, - struct buffer_head *bh, unsigned int fragment, unsigned int new_fragment, - unsigned int blocksize, int * err, int metadata, long *phys, int *new) +/** + * ufs_inode_getblock() - allocate new block + * @inode - pointer to inode + * @bh - pointer to block which hold "pointer" to new allocated block + * @fragment - number of `fragment' which hold pointer + * to new allocated block + * @new_fragment - number of new allocated fragment + * (block will hold this fragment and also uspi->s_fpb-1) + * @err - see ufs_inode_getfrag() + * @phys - see ufs_inode_getfrag() + * @new - see ufs_inode_getfrag() + * @locked_page - see ufs_inode_getfrag() + */ +static struct buffer_head * +ufs_inode_getblock(struct inode *inode, struct buffer_head *bh, + unsigned int fragment, sector_t new_fragment, int *err, + long *phys, int *new, struct page *locked_page) { - struct super_block * sb; - struct ufs_sb_private_info * uspi; + struct super_block *sb = inode->i_sb; + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct buffer_head * result; unsigned tmp, goal, block, blockoff; __fs32 * p; - sb = inode->i_sb; - uspi = UFS_SB(sb)->s_uspi; block = ufs_fragstoblks (fragment); blockoff = ufs_fragnum (fragment); - UFSD(("ENTER, ino %lu, fragment %u, new_fragment %u\n", inode->i_ino, fragment, new_fragment)) + UFSD("ENTER, ino %lu, fragment %u, new_fragment %llu, metadata %d\n", + inode->i_ino, fragment, (unsigned long long)new_fragment, !phys); result = NULL; if (!bh) @@ -326,14 +369,14 @@ static struct buffer_head * ufs_block_getfrag (struct inode *inode, repeat: tmp = fs32_to_cpu(sb, *p); if (tmp) { - if (metadata) { + if (!phys) { result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff); if (tmp == fs32_to_cpu(sb, *p)) goto out; brelse (result); goto repeat; } else { - *phys = tmp; + *phys = tmp + blockoff; goto out; } } @@ -342,21 +385,19 @@ repeat: goal = tmp + uspi->s_fpb; else goal = bh->b_blocknr + uspi->s_fpb; - tmp = ufs_new_fragments (inode, p, ufs_blknum(new_fragment), goal, uspi->s_fpb, err); + tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), goal, + uspi->s_fpb, err, locked_page); if (!tmp) { if (fs32_to_cpu(sb, *p)) goto repeat; goto out; } - /* The nullification of framgents done in ufs/balloc.c is - * something I don't have the stomache to move into here right - * now. -DaveM - */ - if (metadata) { - result = sb_getblk(sb, tmp + blockoff); + + if (!phys) { + result = ufs_clear_frags(inode, tmp + blockoff, uspi->s_fpb); } else { - *phys = tmp; + *phys = tmp + blockoff; *new = 1; } @@ -365,18 +406,19 @@ repeat: sync_dirty_buffer(bh); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - UFSD(("result %u\n", tmp + blockoff)); + UFSD("result %u\n", tmp + blockoff); out: brelse (bh); - UFSD(("EXIT\n")); + UFSD("EXIT\n"); return result; } -/* - * This function gets the block which contains the fragment. +/** + * ufs_getfrag_bloc() - `get_block_t' function, interface between UFS and + * readpage, writepage and so on */ -int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create) +int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create) { struct super_block * sb = inode->i_sb; struct ufs_sb_private_info * uspi = UFS_SB(sb)->s_uspi; @@ -387,7 +429,7 @@ int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_hea if (!create) { phys64 = ufs_frag_map(inode, fragment); - UFSD(("phys64 = %llu \n",phys64)); + UFSD("phys64 = %llu \n",phys64); if (phys64) map_bh(bh_result, sb, phys64); return 0; @@ -402,7 +444,7 @@ int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_hea lock_kernel(); - UFSD(("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment)) + UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); if (fragment < 0) goto abort_negative; if (fragment > @@ -418,15 +460,15 @@ int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_hea * it much more readable: */ #define GET_INODE_DATABLOCK(x) \ - ufs_inode_getfrag(inode, x, fragment, 1, &err, 0, &phys, &new) + ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new, bh_result->b_page) #define GET_INODE_PTR(x) \ - ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, 1, NULL, NULL) + ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, bh_result->b_page) #define GET_INDIRECT_DATABLOCK(x) \ - ufs_block_getfrag(inode, bh, x, fragment, sb->s_blocksize, \ - &err, 0, &phys, &new); + ufs_inode_getblock(inode, bh, x, fragment, \ + &err, &phys, &new, bh_result->b_page); #define GET_INDIRECT_PTR(x) \ - ufs_block_getfrag(inode, bh, x, fragment, sb->s_blocksize, \ - &err, 1, NULL, NULL); + ufs_inode_getblock(inode, bh, x, fragment, \ + &err, NULL, NULL, bh_result->b_page); if (ptr < UFS_NDIR_FRAGMENT) { bh = GET_INODE_DATABLOCK(ptr); @@ -474,8 +516,9 @@ abort_too_big: goto abort; } -struct buffer_head *ufs_getfrag(struct inode *inode, unsigned int fragment, - int create, int *err) +static struct buffer_head *ufs_getfrag(struct inode *inode, + unsigned int fragment, + int create, int *err) { struct buffer_head dummy; int error; @@ -502,7 +545,7 @@ struct buffer_head * ufs_bread (struct inode * inode, unsigned fragment, { struct buffer_head * bh; - UFSD(("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment)) + UFSD("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment); bh = ufs_getfrag (inode, fragment, create, err); if (!bh || buffer_uptodate(bh)) return bh; @@ -540,6 +583,28 @@ struct address_space_operations ufs_aops = { .bmap = ufs_bmap }; +static void ufs_set_inode_ops(struct inode *inode) +{ + if (S_ISREG(inode->i_mode)) { + inode->i_op = &ufs_file_inode_operations; + inode->i_fop = &ufs_file_operations; + inode->i_mapping->a_ops = &ufs_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &ufs_dir_inode_operations; + inode->i_fop = &ufs_dir_operations; + inode->i_mapping->a_ops = &ufs_aops; + } else if (S_ISLNK(inode->i_mode)) { + if (!inode->i_blocks) + inode->i_op = &ufs_fast_symlink_inode_operations; + else { + inode->i_op = &page_symlink_inode_operations; + inode->i_mapping->a_ops = &ufs_aops; + } + } else + init_special_inode(inode, inode->i_mode, + ufs_get_inode_dev(inode->i_sb, UFS_I(inode))); +} + void ufs_read_inode (struct inode * inode) { struct ufs_inode_info *ufsi = UFS_I(inode); @@ -552,7 +617,7 @@ void ufs_read_inode (struct inode * inode) unsigned i; unsigned flags; - UFSD(("ENTER, ino %lu\n", inode->i_ino)) + UFSD("ENTER, ino %lu\n", inode->i_ino); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -603,38 +668,22 @@ void ufs_read_inode (struct inode * inode) ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow); ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag); ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; + ufsi->i_dir_start_lookup = 0; if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) { for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++) ufsi->i_u1.i_data[i] = ufs_inode->ui_u2.ui_addr.ui_db[i]; - } - else { + } else { for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++) ufsi->i_u1.i_symlink[i] = ufs_inode->ui_u2.ui_symlink[i]; } ufsi->i_osync = 0; - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ufs_file_inode_operations; - inode->i_fop = &ufs_file_operations; - inode->i_mapping->a_ops = &ufs_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &ufs_dir_inode_operations; - inode->i_fop = &ufs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { - if (!inode->i_blocks) - inode->i_op = &ufs_fast_symlink_inode_operations; - else { - inode->i_op = &page_symlink_inode_operations; - inode->i_mapping->a_ops = &ufs_aops; - } - } else - init_special_inode(inode, inode->i_mode, - ufs_get_inode_dev(sb, ufsi)); + ufs_set_inode_ops(inode); brelse (bh); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return; bad_inode: @@ -642,7 +691,7 @@ bad_inode: return; ufs2_inode : - UFSD(("Reading ufs2 inode, ino %lu\n", inode->i_ino)) + UFSD("Reading ufs2 inode, ino %lu\n", inode->i_ino); ufs2_inode = (struct ufs2_inode *)(bh->b_data + sizeof(struct ufs2_inode) * ufs_inotofsbo(inode->i_ino)); @@ -690,27 +739,11 @@ ufs2_inode : } ufsi->i_osync = 0; - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ufs_file_inode_operations; - inode->i_fop = &ufs_file_operations; - inode->i_mapping->a_ops = &ufs_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &ufs_dir_inode_operations; - inode->i_fop = &ufs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { - if (!inode->i_blocks) - inode->i_op = &ufs_fast_symlink_inode_operations; - else { - inode->i_op = &page_symlink_inode_operations; - inode->i_mapping->a_ops = &ufs_aops; - } - } else /* TODO : here ...*/ - init_special_inode(inode, inode->i_mode, - ufs_get_inode_dev(sb, ufsi)); + ufs_set_inode_ops(inode); brelse(bh); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return; } @@ -724,7 +757,7 @@ static int ufs_update_inode(struct inode * inode, int do_sync) unsigned i; unsigned flags; - UFSD(("ENTER, ino %lu\n", inode->i_ino)) + UFSD("ENTER, ino %lu\n", inode->i_ino); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -785,7 +818,7 @@ static int ufs_update_inode(struct inode * inode, int do_sync) sync_dirty_buffer(bh); brelse (bh); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return 0; } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 8d5f98a01c74..abd5f23a426d 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -1,6 +1,9 @@ /* * linux/fs/ufs/namei.c * + * Migration to usage of "page cache" on May 2006 by + * Evgeniy Dushistov <dushistov@mail.ru> based on ext2 code base. + * * Copyright (C) 1998 * Daniel Pirkl <daniel.pirkl@email.cz> * Charles University, Faculty of Mathematics and Physics @@ -28,21 +31,9 @@ #include <linux/fs.h> #include <linux/ufs_fs.h> #include <linux/smp_lock.h> -#include <linux/buffer_head.h> #include "swab.h" /* will go away - see comment in mknod() */ #include "util.h" -/* -#undef UFS_NAMEI_DEBUG -*/ -#define UFS_NAMEI_DEBUG - -#ifdef UFS_NAMEI_DEBUG -#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x; -#else -#define UFSD(x) -#endif - static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ufs_add_link(dentry, inode); @@ -88,8 +79,13 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru static int ufs_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) { - struct inode * inode = ufs_new_inode(dir, mode); - int err = PTR_ERR(inode); + struct inode *inode; + int err; + + UFSD("BEGIN\n"); + inode = ufs_new_inode(dir, mode); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { inode->i_op = &ufs_file_inode_operations; inode->i_fop = &ufs_file_operations; @@ -99,6 +95,7 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode, err = ufs_add_nondir(dentry, inode); unlock_kernel(); } + UFSD("END: err=%d\n", err); return err; } @@ -205,6 +202,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode) inode->i_op = &ufs_dir_inode_operations; inode->i_fop = &ufs_dir_operations; + inode->i_mapping->a_ops = &ufs_aops; inode_inc_link_count(inode); @@ -231,19 +229,18 @@ out_dir: goto out; } -static int ufs_unlink(struct inode * dir, struct dentry *dentry) +static int ufs_unlink(struct inode *dir, struct dentry *dentry) { struct inode * inode = dentry->d_inode; - struct buffer_head * bh; - struct ufs_dir_entry * de; + struct ufs_dir_entry *de; + struct page *page; int err = -ENOENT; - lock_kernel(); - de = ufs_find_entry (dentry, &bh); + de = ufs_find_entry(dir, dentry, &page); if (!de) goto out; - err = ufs_delete_entry (dir, de, bh); + err = ufs_delete_entry(dir, de, page); if (err) goto out; @@ -251,7 +248,6 @@ static int ufs_unlink(struct inode * dir, struct dentry *dentry) inode_dec_link_count(inode); err = 0; out: - unlock_kernel(); return err; } @@ -273,42 +269,42 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) return err; } -static int ufs_rename (struct inode * old_dir, struct dentry * old_dentry, - struct inode * new_dir, struct dentry * new_dentry ) +static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; - struct buffer_head *dir_bh = NULL; - struct ufs_dir_entry *dir_de = NULL; - struct buffer_head *old_bh; + struct page *dir_page = NULL; + struct ufs_dir_entry * dir_de = NULL; + struct page *old_page; struct ufs_dir_entry *old_de; int err = -ENOENT; - lock_kernel(); - old_de = ufs_find_entry (old_dentry, &old_bh); + old_de = ufs_find_entry(old_dir, old_dentry, &old_page); if (!old_de) goto out; if (S_ISDIR(old_inode->i_mode)) { err = -EIO; - dir_de = ufs_dotdot(old_inode, &dir_bh); + dir_de = ufs_dotdot(old_inode, &dir_page); if (!dir_de) goto out_old; } if (new_inode) { - struct buffer_head *new_bh; + struct page *new_page; struct ufs_dir_entry *new_de; err = -ENOTEMPTY; - if (dir_de && !ufs_empty_dir (new_inode)) + if (dir_de && !ufs_empty_dir(new_inode)) goto out_dir; + err = -ENOENT; - new_de = ufs_find_entry (new_dentry, &new_bh); + new_de = ufs_find_entry(new_dir, new_dentry, &new_page); if (!new_de) goto out_dir; inode_inc_link_count(old_inode); - ufs_set_link(new_dir, new_de, new_bh, old_inode); + ufs_set_link(new_dir, new_de, new_page, old_inode); new_inode->i_ctime = CURRENT_TIME_SEC; if (dir_de) new_inode->i_nlink--; @@ -329,24 +325,32 @@ static int ufs_rename (struct inode * old_dir, struct dentry * old_dentry, inode_inc_link_count(new_dir); } - ufs_delete_entry (old_dir, old_de, old_bh); + /* + * Like most other Unix systems, set the ctime for inodes on a + * rename. + * inode_dec_link_count() will mark the inode dirty. + */ + old_inode->i_ctime = CURRENT_TIME_SEC; + ufs_delete_entry(old_dir, old_de, old_page); inode_dec_link_count(old_inode); if (dir_de) { - ufs_set_link(old_inode, dir_de, dir_bh, new_dir); + ufs_set_link(old_inode, dir_de, dir_page, new_dir); inode_dec_link_count(old_dir); } - unlock_kernel(); return 0; + out_dir: - if (dir_de) - brelse(dir_bh); + if (dir_de) { + kunmap(dir_page); + page_cache_release(dir_page); + } out_old: - brelse (old_bh); + kunmap(old_page); + page_cache_release(old_page); out: - unlock_kernel(); return err; } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index db98a4c71e63..74ef5e9bedff 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -90,95 +90,84 @@ #include "swab.h" #include "util.h" -#undef UFS_SUPER_DEBUG -#undef UFS_SUPER_DEBUG_MORE - - -#undef UFS_SUPER_DEBUG_MORE -#ifdef UFS_SUPER_DEBUG -#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x; -#else -#define UFSD(x) -#endif - -#ifdef UFS_SUPER_DEBUG_MORE +#ifdef CONFIG_UFS_DEBUG /* * Print contents of ufs_super_block, useful for debugging */ -void ufs_print_super_stuff(struct super_block *sb, - struct ufs_super_block_first * usb1, - struct ufs_super_block_second * usb2, - struct ufs_super_block_third * usb3) +static void ufs_print_super_stuff(struct super_block *sb, unsigned flags, + struct ufs_super_block_first *usb1, + struct ufs_super_block_second *usb2, + struct ufs_super_block_third *usb3) { printk("ufs_print_super_stuff\n"); - printk("size of usb: %u\n", sizeof(struct ufs_super_block)); - printk(" magic: 0x%x\n", fs32_to_cpu(sb, usb3->fs_magic)); - printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno)); - printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno)); - printk(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno)); - printk(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno)); - printk(" cgoffset: %u\n", fs32_to_cpu(sb, usb1->fs_cgoffset)); - printk(" ~cgmask: 0x%x\n", ~fs32_to_cpu(sb, usb1->fs_cgmask)); - printk(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size)); - printk(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize)); - printk(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg)); - printk(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize)); - printk(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize)); - printk(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag)); - printk(" fragshift: %u\n", fs32_to_cpu(sb, usb1->fs_fragshift)); - printk(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask)); - printk(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift)); - printk(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize)); - printk(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc)); - printk(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg)); - printk(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg)); - printk(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg)); - printk(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr)); - printk(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize)); - printk(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize)); - printk(" fstodb: %u\n", fs32_to_cpu(sb, usb1->fs_fsbtodb)); - printk(" contigsumsize: %d\n", fs32_to_cpu(sb, usb3->fs_u2.fs_44.fs_contigsumsize)); - printk(" postblformat: %u\n", fs32_to_cpu(sb, usb3->fs_postblformat)); - printk(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos)); - printk(" ndir %u\n", fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir)); - printk(" nifree %u\n", fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree)); - printk(" nbfree %u\n", fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree)); - printk(" nffree %u\n", fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree)); - printk("\n"); -} - -/* - * Print contents of ufs2 ufs_super_block, useful for debugging - */ -void ufs2_print_super_stuff( - struct super_block *sb, - struct ufs_super_block *usb) -{ - printk("ufs_print_super_stuff\n"); - printk("size of usb: %u\n", sizeof(struct ufs_super_block)); - printk(" magic: 0x%x\n", fs32_to_cpu(sb, usb->fs_magic)); - printk(" fs_size: %u\n",fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_size)); - printk(" fs_dsize: %u\n",fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_dsize)); - printk(" bsize: %u\n", fs32_to_cpu(usb, usb->fs_bsize)); - printk(" fsize: %u\n", fs32_to_cpu(usb, usb->fs_fsize)); - printk(" fs_volname: %s\n", usb->fs_u11.fs_u2.fs_volname); - printk(" fs_fsmnt: %s\n", usb->fs_u11.fs_u2.fs_fsmnt); - printk(" fs_sblockloc: %u\n",fs64_to_cpu(sb, - usb->fs_u11.fs_u2.fs_sblockloc)); - printk(" cs_ndir(No of dirs): %u\n",fs64_to_cpu(sb, - usb->fs_u11.fs_u2.fs_cstotal.cs_ndir)); - printk(" cs_nbfree(No of free blocks): %u\n",fs64_to_cpu(sb, - usb->fs_u11.fs_u2.fs_cstotal.cs_nbfree)); + printk(" magic: 0x%x\n", fs32_to_cpu(sb, usb3->fs_magic)); + if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { + printk(" fs_size: %llu\n", (unsigned long long) + fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size)); + printk(" fs_dsize: %llu\n", (unsigned long long) + fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize)); + printk(" bsize: %u\n", + fs32_to_cpu(sb, usb1->fs_bsize)); + printk(" fsize: %u\n", + fs32_to_cpu(sb, usb1->fs_fsize)); + printk(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname); + printk(" fs_sblockloc: %llu\n", (unsigned long long) + fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc)); + printk(" cs_ndir(No of dirs): %llu\n", (unsigned long long) + fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir)); + printk(" cs_nbfree(No of free blocks): %llu\n", + (unsigned long long) + fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree)); + } else { + printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno)); + printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno)); + printk(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno)); + printk(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno)); + printk(" cgoffset: %u\n", + fs32_to_cpu(sb, usb1->fs_cgoffset)); + printk(" ~cgmask: 0x%x\n", + ~fs32_to_cpu(sb, usb1->fs_cgmask)); + printk(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size)); + printk(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize)); + printk(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg)); + printk(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize)); + printk(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize)); + printk(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag)); + printk(" fragshift: %u\n", + fs32_to_cpu(sb, usb1->fs_fragshift)); + printk(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask)); + printk(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift)); + printk(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize)); + printk(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc)); + printk(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg)); + printk(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg)); + printk(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg)); + printk(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr)); + printk(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize)); + printk(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize)); + printk(" fstodb: %u\n", + fs32_to_cpu(sb, usb1->fs_fsbtodb)); + printk(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos)); + printk(" ndir %u\n", + fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir)); + printk(" nifree %u\n", + fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree)); + printk(" nbfree %u\n", + fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree)); + printk(" nffree %u\n", + fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree)); + } printk("\n"); } /* * Print contents of ufs_cylinder_group, useful for debugging */ -void ufs_print_cylinder_stuff(struct super_block *sb, struct ufs_cylinder_group *cg) +static void ufs_print_cylinder_stuff(struct super_block *sb, + struct ufs_cylinder_group *cg) { printk("\nufs_print_cylinder_stuff\n"); - printk("size of ucg: %u\n", sizeof(struct ufs_cylinder_group)); + printk("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group)); printk(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic)); printk(" time: %u\n", fs32_to_cpu(sb, cg->cg_time)); printk(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx)); @@ -202,12 +191,18 @@ void ufs_print_cylinder_stuff(struct super_block *sb, struct ufs_cylinder_group printk(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff)); printk(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff)); printk(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff)); - printk(" clustersumoff %u\n", fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff)); - printk(" clusteroff %u\n", fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff)); - printk(" nclusterblks %u\n", fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks)); + printk(" clustersumoff %u\n", + fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff)); + printk(" clusteroff %u\n", + fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff)); + printk(" nclusterblks %u\n", + fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks)); printk("\n"); } -#endif /* UFS_SUPER_DEBUG_MORE */ +#else +# define ufs_print_super_stuff(sb, flags, usb1, usb2, usb3) /**/ +# define ufs_print_cylinder_stuff(sb, cg) /**/ +#endif /* CONFIG_UFS_DEBUG */ static struct super_operations ufs_super_ops; @@ -225,7 +220,7 @@ void ufs_error (struct super_block * sb, const char * function, if (!(sb->s_flags & MS_RDONLY)) { usb1->fs_clean = UFS_FSBAD; - ubh_mark_buffer_dirty(USPI_UBH); + ubh_mark_buffer_dirty(USPI_UBH(uspi)); sb->s_dirt = 1; sb->s_flags |= MS_RDONLY; } @@ -257,7 +252,7 @@ void ufs_panic (struct super_block * sb, const char * function, if (!(sb->s_flags & MS_RDONLY)) { usb1->fs_clean = UFS_FSBAD; - ubh_mark_buffer_dirty(USPI_UBH); + ubh_mark_buffer_dirty(USPI_UBH(uspi)); sb->s_dirt = 1; } va_start (args, fmt); @@ -309,7 +304,7 @@ static int ufs_parse_options (char * options, unsigned * mount_options) { char * p; - UFSD(("ENTER\n")) + UFSD("ENTER\n"); if (!options) return 1; @@ -386,27 +381,57 @@ static int ufs_parse_options (char * options, unsigned * mount_options) } /* + * Diffrent types of UFS hold fs_cstotal in different + * places, and use diffrent data structure for it. + * To make things simplier we just copy fs_cstotal to ufs_sb_private_info + */ +static void ufs_setup_cstotal(struct super_block *sb) +{ + struct ufs_sb_info *sbi = UFS_SB(sb); + struct ufs_sb_private_info *uspi = sbi->s_uspi; + struct ufs_super_block_first *usb1; + struct ufs_super_block_second *usb2; + struct ufs_super_block_third *usb3; + unsigned mtype = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; + + UFSD("ENTER, mtype=%u\n", mtype); + usb1 = ubh_get_usb_first(uspi); + usb2 = ubh_get_usb_second(uspi); + usb3 = ubh_get_usb_third(uspi); + + if ((mtype == UFS_MOUNT_UFSTYPE_44BSD && + (usb1->fs_flags & UFS_FLAGS_UPDATED)) || + mtype == UFS_MOUNT_UFSTYPE_UFS2) { + /*we have statistic in different place, then usual*/ + uspi->cs_total.cs_ndir = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir); + uspi->cs_total.cs_nbfree = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree); + uspi->cs_total.cs_nifree = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree); + uspi->cs_total.cs_nffree = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree); + } else { + uspi->cs_total.cs_ndir = fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir); + uspi->cs_total.cs_nbfree = fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree); + uspi->cs_total.cs_nifree = fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree); + uspi->cs_total.cs_nffree = fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree); + } + UFSD("EXIT\n"); +} + +/* * Read on-disk structures associated with cylinder groups */ -static int ufs_read_cylinder_structures (struct super_block *sb) +static int ufs_read_cylinder_structures(struct super_block *sb) { - struct ufs_sb_info * sbi = UFS_SB(sb); - struct ufs_sb_private_info * uspi; - struct ufs_super_block *usb; + struct ufs_sb_info *sbi = UFS_SB(sb); + struct ufs_sb_private_info *uspi = sbi->s_uspi; + unsigned flags = sbi->s_flags; struct ufs_buffer_head * ubh; unsigned char * base, * space; unsigned size, blks, i; - unsigned flags = 0; - - UFSD(("ENTER\n")) - - uspi = sbi->s_uspi; + struct ufs_super_block_third *usb3; - usb = (struct ufs_super_block *) - ((struct ufs_buffer_head *)uspi)->bh[0]->b_data; + UFSD("ENTER\n"); - flags = UFS_SB(sb)->s_flags; - + usb3 = ubh_get_usb_third(uspi); /* * Read cs structures from (usually) first data block * on the device. @@ -424,7 +449,7 @@ static int ufs_read_cylinder_structures (struct super_block *sb) if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) ubh = ubh_bread(sb, - fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_csaddr) + i, size); + fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_csaddr) + i, size); else ubh = ubh_bread(sb, uspi->s_csaddr + i, size); @@ -451,14 +476,13 @@ static int ufs_read_cylinder_structures (struct super_block *sb) sbi->s_cgno[i] = UFS_CGNO_EMPTY; } for (i = 0; i < uspi->s_ncg; i++) { - UFSD(("read cg %u\n", i)) + UFSD("read cg %u\n", i); if (!(sbi->s_ucg[i] = sb_bread(sb, ufs_cgcmin(i)))) goto failed; if (!ufs_cg_chkmagic (sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data)) goto failed; -#ifdef UFS_SUPER_DEBUG_MORE + ufs_print_cylinder_stuff(sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data); -#endif } for (i = 0; i < UFS_MAX_GROUP_LOADED; i++) { if (!(sbi->s_ucpi[i] = kmalloc (sizeof(struct ufs_cg_private_info), GFP_KERNEL))) @@ -466,7 +490,7 @@ static int ufs_read_cylinder_structures (struct super_block *sb) sbi->s_cgno[i] = UFS_CGNO_EMPTY; } sbi->s_cg_loaded = 0; - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return 1; failed: @@ -479,26 +503,69 @@ failed: for (i = 0; i < UFS_MAX_GROUP_LOADED; i++) kfree (sbi->s_ucpi[i]); } - UFSD(("EXIT (FAILED)\n")) + UFSD("EXIT (FAILED)\n"); return 0; } /* - * Put on-disk structures associated with cylinder groups and - * write them back to disk + * Sync our internal copy of fs_cstotal with disk */ -static void ufs_put_cylinder_structures (struct super_block *sb) +static void ufs_put_cstotal(struct super_block *sb) { - struct ufs_sb_info * sbi = UFS_SB(sb); - struct ufs_sb_private_info * uspi; + unsigned mtype = UFS_SB(sb)->s_mount_opt & UFS_MOUNT_UFSTYPE; + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; + struct ufs_super_block_first *usb1; + struct ufs_super_block_second *usb2; + struct ufs_super_block_third *usb3; + + UFSD("ENTER\n"); + usb1 = ubh_get_usb_first(uspi); + usb2 = ubh_get_usb_second(uspi); + usb3 = ubh_get_usb_third(uspi); + + if ((mtype == UFS_MOUNT_UFSTYPE_44BSD && + (usb1->fs_flags & UFS_FLAGS_UPDATED)) || + mtype == UFS_MOUNT_UFSTYPE_UFS2) { + /*we have statistic in different place, then usual*/ + usb2->fs_un.fs_u2.cs_ndir = + cpu_to_fs64(sb, uspi->cs_total.cs_ndir); + usb2->fs_un.fs_u2.cs_nbfree = + cpu_to_fs64(sb, uspi->cs_total.cs_nbfree); + usb3->fs_un1.fs_u2.cs_nifree = + cpu_to_fs64(sb, uspi->cs_total.cs_nifree); + usb3->fs_un1.fs_u2.cs_nffree = + cpu_to_fs64(sb, uspi->cs_total.cs_nffree); + } else { + usb1->fs_cstotal.cs_ndir = + cpu_to_fs32(sb, uspi->cs_total.cs_ndir); + usb1->fs_cstotal.cs_nbfree = + cpu_to_fs32(sb, uspi->cs_total.cs_nbfree); + usb1->fs_cstotal.cs_nifree = + cpu_to_fs32(sb, uspi->cs_total.cs_nifree); + usb1->fs_cstotal.cs_nffree = + cpu_to_fs32(sb, uspi->cs_total.cs_nffree); + } + ubh_mark_buffer_dirty(USPI_UBH(uspi)); + UFSD("EXIT\n"); +} + +/** + * ufs_put_super_internal() - put on-disk intrenal structures + * @sb: pointer to super_block structure + * Put on-disk structures associated with cylinder groups + * and write them back to disk, also update cs_total on disk + */ +static void ufs_put_super_internal(struct super_block *sb) +{ + struct ufs_sb_info *sbi = UFS_SB(sb); + struct ufs_sb_private_info *uspi = sbi->s_uspi; struct ufs_buffer_head * ubh; unsigned char * base, * space; unsigned blks, size, i; - - UFSD(("ENTER\n")) - - uspi = sbi->s_uspi; + + UFSD("ENTER\n"); + ufs_put_cstotal(sb); size = uspi->s_cssize; blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; base = space = (char*) sbi->s_csp; @@ -523,7 +590,7 @@ static void ufs_put_cylinder_structures (struct super_block *sb) brelse (sbi->s_ucg[i]); kfree (sbi->s_ucg); kfree (base); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); } static int ufs_fill_super(struct super_block *sb, void *data, int silent) @@ -533,7 +600,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) struct ufs_super_block_first * usb1; struct ufs_super_block_second * usb2; struct ufs_super_block_third * usb3; - struct ufs_super_block *usb; struct ufs_buffer_head * ubh; struct inode *inode; unsigned block_size, super_block_size; @@ -544,7 +610,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) ubh = NULL; flags = 0; - UFSD(("ENTER\n")) + UFSD("ENTER\n"); sbi = kmalloc(sizeof(struct ufs_sb_info), GFP_KERNEL); if (!sbi) @@ -552,7 +618,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = sbi; memset(sbi, 0, sizeof(struct ufs_sb_info)); - UFSD(("flag %u\n", (int)(sb->s_flags & MS_RDONLY))) + UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); #ifndef CONFIG_UFS_FS_WRITE if (!(sb->s_flags & MS_RDONLY)) { @@ -593,7 +659,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) the rules */ switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) { case UFS_MOUNT_UFSTYPE_44BSD: - UFSD(("ufstype=44bsd\n")) + UFSD("ufstype=44bsd\n"); uspi->s_fsize = block_size = 512; uspi->s_fmask = ~(512 - 1); uspi->s_fshift = 9; @@ -602,7 +668,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD; break; case UFS_MOUNT_UFSTYPE_UFS2: - UFSD(("ufstype=ufs2\n")); + UFSD("ufstype=ufs2\n"); super_block_offset=SBLOCK_UFS2; uspi->s_fsize = block_size = 512; uspi->s_fmask = ~(512 - 1); @@ -617,7 +683,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) break; case UFS_MOUNT_UFSTYPE_SUN: - UFSD(("ufstype=sun\n")) + UFSD("ufstype=sun\n"); uspi->s_fsize = block_size = 1024; uspi->s_fmask = ~(1024 - 1); uspi->s_fshift = 10; @@ -628,7 +694,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) break; case UFS_MOUNT_UFSTYPE_SUNx86: - UFSD(("ufstype=sunx86\n")) + UFSD("ufstype=sunx86\n"); uspi->s_fsize = block_size = 1024; uspi->s_fmask = ~(1024 - 1); uspi->s_fshift = 10; @@ -639,7 +705,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) break; case UFS_MOUNT_UFSTYPE_OLD: - UFSD(("ufstype=old\n")) + UFSD("ufstype=old\n"); uspi->s_fsize = block_size = 1024; uspi->s_fmask = ~(1024 - 1); uspi->s_fshift = 10; @@ -654,7 +720,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) break; case UFS_MOUNT_UFSTYPE_NEXTSTEP: - UFSD(("ufstype=nextstep\n")) + UFSD("ufstype=nextstep\n"); uspi->s_fsize = block_size = 1024; uspi->s_fmask = ~(1024 - 1); uspi->s_fshift = 10; @@ -669,7 +735,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) break; case UFS_MOUNT_UFSTYPE_NEXTSTEP_CD: - UFSD(("ufstype=nextstep-cd\n")) + UFSD("ufstype=nextstep-cd\n"); uspi->s_fsize = block_size = 2048; uspi->s_fmask = ~(2048 - 1); uspi->s_fshift = 11; @@ -684,7 +750,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) break; case UFS_MOUNT_UFSTYPE_OPENSTEP: - UFSD(("ufstype=openstep\n")) + UFSD("ufstype=openstep\n"); uspi->s_fsize = block_size = 1024; uspi->s_fmask = ~(1024 - 1); uspi->s_fshift = 10; @@ -699,7 +765,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) break; case UFS_MOUNT_UFSTYPE_HP: - UFSD(("ufstype=hp\n")) + UFSD("ufstype=hp\n"); uspi->s_fsize = block_size = 1024; uspi->s_fmask = ~(1024 - 1); uspi->s_fshift = 10; @@ -737,8 +803,6 @@ again: usb1 = ubh_get_usb_first(uspi); usb2 = ubh_get_usb_second(uspi); usb3 = ubh_get_usb_third(uspi); - usb = (struct ufs_super_block *) - ((struct ufs_buffer_head *)uspi)->bh[0]->b_data ; /* * Check ufs magic number @@ -820,16 +884,12 @@ magic_found: ubh = NULL; block_size = uspi->s_fsize; super_block_size = uspi->s_sbsize; - UFSD(("another value of block_size or super_block_size %u, %u\n", block_size, super_block_size)) + UFSD("another value of block_size or super_block_size %u, %u\n", block_size, super_block_size); goto again; } -#ifdef UFS_SUPER_DEBUG_MORE - if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) - ufs2_print_super_stuff(sb,usb); - else - ufs_print_super_stuff(sb, usb1, usb2, usb3); -#endif + + ufs_print_super_stuff(sb, flags, usb1, usb2, usb3); /* * Check, if file system was correctly unmounted. @@ -842,13 +902,13 @@ magic_found: (ufs_get_fs_state(sb, usb1, usb3) == (UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time))))) { switch(usb1->fs_clean) { case UFS_FSCLEAN: - UFSD(("fs is clean\n")) + UFSD("fs is clean\n"); break; case UFS_FSSTABLE: - UFSD(("fs is stable\n")) + UFSD("fs is stable\n"); break; case UFS_FSOSF1: - UFSD(("fs is DEC OSF/1\n")) + UFSD("fs is DEC OSF/1\n"); break; case UFS_FSACTIVE: printk("ufs_read_super: fs is active\n"); @@ -863,8 +923,7 @@ magic_found: sb->s_flags |= MS_RDONLY; break; } - } - else { + } else { printk("ufs_read_super: fs needs fsck\n"); sb->s_flags |= MS_RDONLY; } @@ -884,10 +943,9 @@ magic_found: uspi->s_cgmask = fs32_to_cpu(sb, usb1->fs_cgmask); if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { - uspi->s_u2_size = fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_size); - uspi->s_u2_dsize = fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_dsize); - } - else { + uspi->s_u2_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size); + uspi->s_u2_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize); + } else { uspi->s_size = fs32_to_cpu(sb, usb1->fs_size); uspi->s_dsize = fs32_to_cpu(sb, usb1->fs_dsize); } @@ -901,8 +959,8 @@ magic_found: uspi->s_fmask = fs32_to_cpu(sb, usb1->fs_fmask); uspi->s_bshift = fs32_to_cpu(sb, usb1->fs_bshift); uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift); - UFSD(("uspi->s_bshift = %d,uspi->s_fshift = %d", uspi->s_bshift, - uspi->s_fshift)); + UFSD("uspi->s_bshift = %d,uspi->s_fshift = %d", uspi->s_bshift, + uspi->s_fshift); uspi->s_fpbshift = fs32_to_cpu(sb, usb1->fs_fragshift); uspi->s_fsbtodb = fs32_to_cpu(sb, usb1->fs_fsbtodb); /* s_sbsize already set */ @@ -922,8 +980,8 @@ magic_found: uspi->s_spc = fs32_to_cpu(sb, usb1->fs_spc); uspi->s_ipg = fs32_to_cpu(sb, usb1->fs_ipg); uspi->s_fpg = fs32_to_cpu(sb, usb1->fs_fpg); - uspi->s_cpc = fs32_to_cpu(sb, usb2->fs_cpc); - uspi->s_contigsumsize = fs32_to_cpu(sb, usb3->fs_u2.fs_44.fs_contigsumsize); + uspi->s_cpc = fs32_to_cpu(sb, usb2->fs_un.fs_u1.fs_cpc); + uspi->s_contigsumsize = fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_contigsumsize); uspi->s_qbmask = ufs_get_fs_qbmask(sb, usb3); uspi->s_qfmask = ufs_get_fs_qfmask(sb, usb3); uspi->s_postblformat = fs32_to_cpu(sb, usb3->fs_postblformat); @@ -935,12 +993,11 @@ magic_found: * Compute another frequently used values */ uspi->s_fpbmask = uspi->s_fpb - 1; - if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { + if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) uspi->s_apbshift = uspi->s_bshift - 3; - } - else { + else uspi->s_apbshift = uspi->s_bshift - 2; - } + uspi->s_2apbshift = uspi->s_apbshift * 2; uspi->s_3apbshift = uspi->s_apbshift * 3; uspi->s_apb = 1 << uspi->s_apbshift; @@ -956,7 +1013,7 @@ magic_found: if ((sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) == UFS_MOUNT_UFSTYPE_44BSD) uspi->s_maxsymlinklen = - fs32_to_cpu(sb, usb3->fs_u2.fs_44.fs_maxsymlinklen); + fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen); sbi->s_flags = flags; @@ -967,7 +1024,7 @@ magic_found: if (!sb->s_root) goto dalloc_failed; - + ufs_setup_cstotal(sb); /* * Read cylinder group structures */ @@ -975,7 +1032,7 @@ magic_found: if (!ufs_read_cylinder_structures(sb)) goto failed; - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return 0; dalloc_failed: @@ -986,15 +1043,16 @@ failed: kfree (uspi); kfree(sbi); sb->s_fs_info = NULL; - UFSD(("EXIT (FAILED)\n")) + UFSD("EXIT (FAILED)\n"); return -EINVAL; failed_nomem: - UFSD(("EXIT (NOMEM)\n")) + UFSD("EXIT (NOMEM)\n"); return -ENOMEM; } -static void ufs_write_super (struct super_block *sb) { +static void ufs_write_super(struct super_block *sb) +{ struct ufs_sb_private_info * uspi; struct ufs_super_block_first * usb1; struct ufs_super_block_third * usb3; @@ -1002,7 +1060,7 @@ static void ufs_write_super (struct super_block *sb) { lock_kernel(); - UFSD(("ENTER\n")) + UFSD("ENTER\n"); flags = UFS_SB(sb)->s_flags; uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first(uspi); @@ -1014,26 +1072,27 @@ static void ufs_write_super (struct super_block *sb) { || (flags & UFS_ST_MASK) == UFS_ST_SUNx86) ufs_set_fs_state(sb, usb1, usb3, UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); - ubh_mark_buffer_dirty (USPI_UBH); + ufs_put_cstotal(sb); } sb->s_dirt = 0; - UFSD(("EXIT\n")) + UFSD("EXIT\n"); unlock_kernel(); } -static void ufs_put_super (struct super_block *sb) +static void ufs_put_super(struct super_block *sb) { struct ufs_sb_info * sbi = UFS_SB(sb); - UFSD(("ENTER\n")) + UFSD("ENTER\n"); if (!(sb->s_flags & MS_RDONLY)) - ufs_put_cylinder_structures (sb); + ufs_put_super_internal(sb); ubh_brelse_uspi (sbi->s_uspi); kfree (sbi->s_uspi); kfree (sbi); sb->s_fs_info = NULL; + UFSD("EXIT\n"); return; } @@ -1062,8 +1121,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) return -EINVAL; if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { new_mount_opt |= ufstype; - } - else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { + } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { printk("ufstype can't be changed during remount\n"); return -EINVAL; } @@ -1077,20 +1135,19 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) * fs was mouted as rw, remounting ro */ if (*mount_flags & MS_RDONLY) { - ufs_put_cylinder_structures(sb); + ufs_put_super_internal(sb); usb1->fs_time = cpu_to_fs32(sb, get_seconds()); if ((flags & UFS_ST_MASK) == UFS_ST_SUN || (flags & UFS_ST_MASK) == UFS_ST_SUNx86) ufs_set_fs_state(sb, usb1, usb3, UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); - ubh_mark_buffer_dirty (USPI_UBH); + ubh_mark_buffer_dirty (USPI_UBH(uspi)); sb->s_dirt = 0; sb->s_flags |= MS_RDONLY; - } + } else { /* * fs was mounted as ro, remounting rw */ - else { #ifndef CONFIG_UFS_FS_WRITE printk("ufs was compiled with read-only support, " "can't be mounted as read-write\n"); @@ -1102,7 +1159,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) printk("this ufstype is read-only supported\n"); return -EINVAL; } - if (!ufs_read_cylinder_structures (sb)) { + if (!ufs_read_cylinder_structures(sb)) { printk("failed during remounting\n"); return -EPERM; } @@ -1113,36 +1170,31 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) return 0; } -static int ufs_statfs (struct super_block *sb, struct kstatfs *buf) +static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct ufs_sb_private_info * uspi; - struct ufs_super_block_first * usb1; - struct ufs_super_block * usb; - unsigned flags = 0; + struct super_block *sb = dentry->d_sb; + struct ufs_sb_private_info *uspi= UFS_SB(sb)->s_uspi; + unsigned flags = UFS_SB(sb)->s_flags; + struct ufs_super_block_first *usb1; + struct ufs_super_block_second *usb2; + struct ufs_super_block_third *usb3; lock_kernel(); - uspi = UFS_SB(sb)->s_uspi; - usb1 = ubh_get_usb_first (uspi); - usb = (struct ufs_super_block *) - ((struct ufs_buffer_head *)uspi)->bh[0]->b_data ; + usb1 = ubh_get_usb_first(uspi); + usb2 = ubh_get_usb_second(uspi); + usb3 = ubh_get_usb_third(uspi); - flags = UFS_SB(sb)->s_flags; if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { buf->f_type = UFS2_MAGIC; - buf->f_blocks = fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_dsize); - buf->f_bfree = ufs_blkstofrags(fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_cstotal.cs_nbfree)) + - fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_cstotal.cs_nffree); - buf->f_ffree = fs64_to_cpu(sb, - usb->fs_u11.fs_u2.fs_cstotal.cs_nifree); - } - else { + buf->f_blocks = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize); + } else { buf->f_type = UFS_MAGIC; buf->f_blocks = uspi->s_dsize; - buf->f_bfree = ufs_blkstofrags(fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree)) + - fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree); - buf->f_ffree = fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree); } + buf->f_bfree = ufs_blkstofrags(uspi->cs_total.cs_nbfree) + + uspi->cs_total.cs_nffree; + buf->f_ffree = uspi->cs_total.cs_nifree; buf->f_bsize = sb->s_blocksize; buf->f_bavail = (buf->f_bfree > (((long)buf->f_blocks / 100) * uspi->s_minfree)) ? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0; @@ -1311,10 +1363,10 @@ out: #endif -static struct super_block *ufs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int ufs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt); } static struct file_system_type ufs_fs_type = { diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 02e86291ef8a..3c3b301f8701 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -49,14 +49,6 @@ #include "swab.h" #include "util.h" -#undef UFS_TRUNCATE_DEBUG - -#ifdef UFS_TRUNCATE_DEBUG -#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x; -#else -#define UFSD(x) -#endif - /* * Secure deletion currently doesn't work. It interacts very badly * with buffers shared with memory mappings, and for that reason @@ -82,7 +74,7 @@ static int ufs_trunc_direct (struct inode * inode) unsigned i, tmp; int retry; - UFSD(("ENTER\n")) + UFSD("ENTER\n"); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -105,7 +97,7 @@ static int ufs_trunc_direct (struct inode * inode) block2 = ufs_fragstoblks (frag3); } - UFSD(("frag1 %u, frag2 %u, block1 %u, block2 %u, frag3 %u, frag4 %u\n", frag1, frag2, block1, block2, frag3, frag4)) + UFSD("frag1 %u, frag2 %u, block1 %u, block2 %u, frag3 %u, frag4 %u\n", frag1, frag2, block1, block2, frag3, frag4); if (frag1 >= frag2) goto next1; @@ -120,9 +112,8 @@ static int ufs_trunc_direct (struct inode * inode) frag1 = ufs_fragnum (frag1); frag2 = ufs_fragnum (frag2); - inode->i_blocks -= (frag2-frag1) << uspi->s_nspfshift; - mark_inode_dirty(inode); ufs_free_fragments (inode, tmp + frag1, frag2 - frag1); + mark_inode_dirty(inode); frag_to_free = tmp + frag1; next1: @@ -136,8 +127,7 @@ next1: continue; *p = 0; - inode->i_blocks -= uspi->s_nspb; - mark_inode_dirty(inode); + if (free_count == 0) { frag_to_free = tmp; free_count = uspi->s_fpb; @@ -148,6 +138,7 @@ next1: frag_to_free = tmp; free_count = uspi->s_fpb; } + mark_inode_dirty(inode); } if (free_count > 0) @@ -166,12 +157,12 @@ next1: frag4 = ufs_fragnum (frag4); *p = 0; - inode->i_blocks -= frag4 << uspi->s_nspfshift; - mark_inode_dirty(inode); + ufs_free_fragments (inode, tmp, frag4); + mark_inode_dirty(inode); next3: - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return retry; } @@ -186,7 +177,7 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p) unsigned frag_to_free, free_count; int retry; - UFSD(("ENTER\n")) + UFSD("ENTER\n"); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -227,7 +218,7 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p) frag_to_free = tmp; free_count = uspi->s_fpb; } - inode->i_blocks -= uspi->s_nspb; + mark_inode_dirty(inode); } @@ -238,26 +229,21 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p) if (*ubh_get_addr32(ind_ubh,i)) break; if (i >= uspi->s_apb) { - if (ubh_max_bcount(ind_ubh) != 1) { - retry = 1; - } - else { - tmp = fs32_to_cpu(sb, *p); - *p = 0; - inode->i_blocks -= uspi->s_nspb; - mark_inode_dirty(inode); - ufs_free_blocks (inode, tmp, uspi->s_fpb); - ubh_bforget(ind_ubh); - ind_ubh = NULL; - } + tmp = fs32_to_cpu(sb, *p); + *p = 0; + + ufs_free_blocks (inode, tmp, uspi->s_fpb); + mark_inode_dirty(inode); + ubh_bforget(ind_ubh); + ind_ubh = NULL; } if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { - ubh_ll_rw_block (SWRITE, 1, &ind_ubh); + ubh_ll_rw_block(SWRITE, ind_ubh); ubh_wait_on_buffer (ind_ubh); } ubh_brelse (ind_ubh); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return retry; } @@ -271,7 +257,7 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p) __fs32 * dind; int retry = 0; - UFSD(("ENTER\n")) + UFSD("ENTER\n"); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -306,25 +292,21 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p) if (*ubh_get_addr32 (dind_bh, i)) break; if (i >= uspi->s_apb) { - if (ubh_max_bcount(dind_bh) != 1) - retry = 1; - else { - tmp = fs32_to_cpu(sb, *p); - *p = 0; - inode->i_blocks -= uspi->s_nspb; - mark_inode_dirty(inode); - ufs_free_blocks (inode, tmp, uspi->s_fpb); - ubh_bforget(dind_bh); - dind_bh = NULL; - } + tmp = fs32_to_cpu(sb, *p); + *p = 0; + + ufs_free_blocks(inode, tmp, uspi->s_fpb); + mark_inode_dirty(inode); + ubh_bforget(dind_bh); + dind_bh = NULL; } if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { - ubh_ll_rw_block (SWRITE, 1, &dind_bh); + ubh_ll_rw_block(SWRITE, dind_bh); ubh_wait_on_buffer (dind_bh); } ubh_brelse (dind_bh); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return retry; } @@ -339,7 +321,7 @@ static int ufs_trunc_tindirect (struct inode * inode) __fs32 * tind, * p; int retry; - UFSD(("ENTER\n")) + UFSD("ENTER\n"); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -370,25 +352,21 @@ static int ufs_trunc_tindirect (struct inode * inode) if (*ubh_get_addr32 (tind_bh, i)) break; if (i >= uspi->s_apb) { - if (ubh_max_bcount(tind_bh) != 1) - retry = 1; - else { - tmp = fs32_to_cpu(sb, *p); - *p = 0; - inode->i_blocks -= uspi->s_nspb; - mark_inode_dirty(inode); - ufs_free_blocks (inode, tmp, uspi->s_fpb); - ubh_bforget(tind_bh); - tind_bh = NULL; - } + tmp = fs32_to_cpu(sb, *p); + *p = 0; + + ufs_free_blocks(inode, tmp, uspi->s_fpb); + mark_inode_dirty(inode); + ubh_bforget(tind_bh); + tind_bh = NULL; } if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { - ubh_ll_rw_block (SWRITE, 1, &tind_bh); + ubh_ll_rw_block(SWRITE, tind_bh); ubh_wait_on_buffer (tind_bh); } ubh_brelse (tind_bh); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); return retry; } @@ -399,7 +377,7 @@ void ufs_truncate (struct inode * inode) struct ufs_sb_private_info * uspi; int retry; - UFSD(("ENTER\n")) + UFSD("ENTER\n"); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -430,5 +408,5 @@ void ufs_truncate (struct inode * inode) ufsi->i_lastfrag = DIRECT_FRAGMENT; unlock_kernel(); mark_inode_dirty(inode); - UFSD(("EXIT\n")) + UFSD("EXIT\n"); } diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 59acc8f073ac..a2f13f45708b 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -14,15 +14,6 @@ #include "swab.h" #include "util.h" -#undef UFS_UTILS_DEBUG - -#ifdef UFS_UTILS_DEBUG -#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x; -#else -#define UFSD(x) -#endif - - struct ufs_buffer_head * _ubh_bread_ (struct ufs_sb_private_info * uspi, struct super_block *sb, u64 fragment, u64 size) { @@ -63,17 +54,17 @@ struct ufs_buffer_head * ubh_bread_uspi (struct ufs_sb_private_info * uspi, count = size >> uspi->s_fshift; if (count <= 0 || count > UFS_MAXFRAG) return NULL; - USPI_UBH->fragment = fragment; - USPI_UBH->count = count; + USPI_UBH(uspi)->fragment = fragment; + USPI_UBH(uspi)->count = count; for (i = 0; i < count; i++) - if (!(USPI_UBH->bh[i] = sb_bread(sb, fragment + i))) + if (!(USPI_UBH(uspi)->bh[i] = sb_bread(sb, fragment + i))) goto failed; for (; i < UFS_MAXFRAG; i++) - USPI_UBH->bh[i] = NULL; - return USPI_UBH; + USPI_UBH(uspi)->bh[i] = NULL; + return USPI_UBH(uspi); failed: for (j = 0; j < i; j++) - brelse (USPI_UBH->bh[j]); + brelse (USPI_UBH(uspi)->bh[j]); return NULL; } @@ -90,11 +81,11 @@ void ubh_brelse (struct ufs_buffer_head * ubh) void ubh_brelse_uspi (struct ufs_sb_private_info * uspi) { unsigned i; - if (!USPI_UBH) + if (!USPI_UBH(uspi)) return; - for ( i = 0; i < USPI_UBH->count; i++ ) { - brelse (USPI_UBH->bh[i]); - USPI_UBH->bh[i] = NULL; + for ( i = 0; i < USPI_UBH(uspi)->count; i++ ) { + brelse (USPI_UBH(uspi)->bh[i]); + USPI_UBH(uspi)->bh[i] = NULL; } } @@ -121,13 +112,12 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag) } } -void ubh_ll_rw_block (int rw, unsigned nr, struct ufs_buffer_head * ubh[]) +void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) { - unsigned i; if (!ubh) return; - for ( i = 0; i < nr; i++ ) - ll_rw_block (rw, ubh[i]->count, ubh[i]->bh); + + ll_rw_block(rw, ubh->count, ubh->bh); } void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) @@ -139,18 +129,6 @@ void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) wait_on_buffer (ubh->bh[i]); } -unsigned ubh_max_bcount (struct ufs_buffer_head * ubh) -{ - unsigned i; - unsigned max = 0; - if (!ubh) - return 0; - for ( i = 0; i < ubh->count; i++ ) - if ( atomic_read(&ubh->bh[i]->b_count) > max ) - max = atomic_read(&ubh->bh[i]->b_count); - return max; -} - void ubh_bforget (struct ufs_buffer_head * ubh) { unsigned i; diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 48d6d9bcc157..406981fff5e7 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -17,10 +17,16 @@ #define in_range(b,first,len) ((b)>=(first)&&(b)<(first)+(len)) /* - * macros used for retyping + * functions used for retyping */ -#define UCPI_UBH ((struct ufs_buffer_head *)ucpi) -#define USPI_UBH ((struct ufs_buffer_head *)uspi) +static inline struct ufs_buffer_head *UCPI_UBH(struct ufs_cg_private_info *cpi) +{ + return &cpi->c_ubh; +} +static inline struct ufs_buffer_head *USPI_UBH(struct ufs_sb_private_info *spi) +{ + return &spi->s_ubh; +} @@ -33,12 +39,12 @@ ufs_get_fs_state(struct super_block *sb, struct ufs_super_block_first *usb1, { switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { case UFS_ST_SUN: - return fs32_to_cpu(sb, usb3->fs_u2.fs_sun.fs_state); + return fs32_to_cpu(sb, usb3->fs_un2.fs_sun.fs_state); case UFS_ST_SUNx86: return fs32_to_cpu(sb, usb1->fs_u1.fs_sunx86.fs_state); case UFS_ST_44BSD: default: - return fs32_to_cpu(sb, usb3->fs_u2.fs_44.fs_state); + return fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_state); } } @@ -48,13 +54,13 @@ ufs_set_fs_state(struct super_block *sb, struct ufs_super_block_first *usb1, { switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { case UFS_ST_SUN: - usb3->fs_u2.fs_sun.fs_state = cpu_to_fs32(sb, value); + usb3->fs_un2.fs_sun.fs_state = cpu_to_fs32(sb, value); break; case UFS_ST_SUNx86: usb1->fs_u1.fs_sunx86.fs_state = cpu_to_fs32(sb, value); break; case UFS_ST_44BSD: - usb3->fs_u2.fs_44.fs_state = cpu_to_fs32(sb, value); + usb3->fs_un2.fs_44.fs_state = cpu_to_fs32(sb, value); break; } } @@ -64,7 +70,7 @@ ufs_get_fs_npsect(struct super_block *sb, struct ufs_super_block_first *usb1, struct ufs_super_block_third *usb3) { if ((UFS_SB(sb)->s_flags & UFS_ST_MASK) == UFS_ST_SUNx86) - return fs32_to_cpu(sb, usb3->fs_u2.fs_sunx86.fs_npsect); + return fs32_to_cpu(sb, usb3->fs_un2.fs_sunx86.fs_npsect); else return fs32_to_cpu(sb, usb1->fs_u1.fs_sun.fs_npsect); } @@ -76,16 +82,16 @@ ufs_get_fs_qbmask(struct super_block *sb, struct ufs_super_block_third *usb3) switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { case UFS_ST_SUN: - ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_sun.fs_qbmask[0]; - ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_sun.fs_qbmask[1]; + ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_sun.fs_qbmask[0]; + ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_sun.fs_qbmask[1]; break; case UFS_ST_SUNx86: - ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_sunx86.fs_qbmask[0]; - ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_sunx86.fs_qbmask[1]; + ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_sunx86.fs_qbmask[0]; + ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_sunx86.fs_qbmask[1]; break; case UFS_ST_44BSD: - ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_44.fs_qbmask[0]; - ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_44.fs_qbmask[1]; + ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_44.fs_qbmask[0]; + ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_44.fs_qbmask[1]; break; } @@ -99,16 +105,16 @@ ufs_get_fs_qfmask(struct super_block *sb, struct ufs_super_block_third *usb3) switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { case UFS_ST_SUN: - ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_sun.fs_qfmask[0]; - ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_sun.fs_qfmask[1]; + ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_sun.fs_qfmask[0]; + ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_sun.fs_qfmask[1]; break; case UFS_ST_SUNx86: - ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_sunx86.fs_qfmask[0]; - ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_sunx86.fs_qfmask[1]; + ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_sunx86.fs_qfmask[0]; + ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_sunx86.fs_qfmask[1]; break; case UFS_ST_44BSD: - ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_44.fs_qfmask[0]; - ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_44.fs_qfmask[1]; + ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_44.fs_qfmask[0]; + ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_44.fs_qfmask[1]; break; } @@ -236,9 +242,8 @@ extern void ubh_brelse (struct ufs_buffer_head *); extern void ubh_brelse_uspi (struct ufs_sb_private_info *); extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); -extern void ubh_ll_rw_block (int, unsigned, struct ufs_buffer_head **); +extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); extern void ubh_wait_on_buffer (struct ufs_buffer_head *); -extern unsigned ubh_max_bcount (struct ufs_buffer_head *); extern void ubh_bforget (struct ufs_buffer_head *); extern int ubh_buffer_dirty (struct ufs_buffer_head *); #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) @@ -297,40 +302,26 @@ static inline void *get_usb_offset(struct ufs_sb_private_info *uspi, #define ubh_blkmap(ubh,begin,bit) \ ((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb))) - -/* - * Macros for access to superblock array structures - */ -#define ubh_postbl(ubh,cylno,i) \ - ((uspi->s_postblformat != UFS_DYNAMICPOSTBLFMT) \ - ? (*(__s16*)(ubh_get_addr(ubh, \ - (unsigned)(&((struct ufs_super_block *)0)->fs_opostbl) \ - + (((cylno) * 16 + (i)) << 1) ) )) \ - : (*(__s16*)(ubh_get_addr(ubh, \ - uspi->s_postbloff + (((cylno) * uspi->s_nrpos + (i)) << 1) )))) - -#define ubh_rotbl(ubh,i) \ - ((uspi->s_postblformat != UFS_DYNAMICPOSTBLFMT) \ - ? (*(__u8*)(ubh_get_addr(ubh, \ - (unsigned)(&((struct ufs_super_block *)0)->fs_space) + (i)))) \ - : (*(__u8*)(ubh_get_addr(ubh, uspi->s_rotbloff + (i))))) - /* * Determine the number of available frags given a * percentage to hold in reserve. */ -#define ufs_freespace(usb, percentreserved) \ - (ufs_blkstofrags(fs32_to_cpu(sb, (usb)->fs_cstotal.cs_nbfree)) + \ - fs32_to_cpu(sb, (usb)->fs_cstotal.cs_nffree) - (uspi->s_dsize * (percentreserved) / 100)) +static inline u64 +ufs_freespace(struct ufs_sb_private_info *uspi, int percentreserved) +{ + return ufs_blkstofrags(uspi->cs_total.cs_nbfree) + + uspi->cs_total.cs_nffree - + (uspi->s_dsize * (percentreserved) / 100); +} /* * Macros to access cylinder group array structures */ #define ubh_cg_blktot(ucpi,cylno) \ - (*((__fs32*)ubh_get_addr(UCPI_UBH, (ucpi)->c_btotoff + ((cylno) << 2)))) + (*((__fs32*)ubh_get_addr(UCPI_UBH(ucpi), (ucpi)->c_btotoff + ((cylno) << 2)))) #define ubh_cg_blks(ucpi,cylno,rpos) \ - (*((__fs16*)ubh_get_addr(UCPI_UBH, \ + (*((__fs16*)ubh_get_addr(UCPI_UBH(ucpi), \ (ucpi)->c_boff + (((cylno) * uspi->s_nrpos + (rpos)) << 1 )))) /* @@ -508,29 +499,3 @@ static inline void ufs_fragacct (struct super_block * sb, unsigned blockmap, if (fragsize > 0 && fragsize < uspi->s_fpb) fs32_add(sb, &fraglist[fragsize], cnt); } - -#define ubh_scanc(ubh,begin,size,table,mask) _ubh_scanc_(uspi,ubh,begin,size,table,mask) -static inline unsigned _ubh_scanc_(struct ufs_sb_private_info * uspi, struct ufs_buffer_head * ubh, - unsigned begin, unsigned size, unsigned char * table, unsigned char mask) -{ - unsigned rest, offset; - unsigned char * cp; - - - offset = begin & ~uspi->s_fmask; - begin >>= uspi->s_fshift; - for (;;) { - if ((offset + size) < uspi->s_fsize) - rest = size; - else - rest = uspi->s_fsize - offset; - size -= rest; - cp = ubh->bh[begin]->b_data + offset; - while ((table[*cp++] & mask) == 0 && --rest); - if (rest || !size) - break; - begin++; - offset = 0; - } - return (size + rest); -} diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index a56cec3be5f0..9a8f48bae956 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -1023,11 +1023,12 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct super_block *vfat_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int vfat_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super, + mnt); } static struct file_system_type vfat_fs_type = { diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index bac27d66151d..26b364c9d62c 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -1,6 +1,5 @@ config XFS_FS tristate "XFS filesystem support" - select EXPORTFS if NFSD!=n help XFS is a high performance journaling filesystem which originated on the SGI IRIX platform. It is completely multi-threaded, can @@ -18,11 +17,6 @@ config XFS_FS system of your root partition is compiled as a module, you'll need to use an initial ramdisk (initrd) to boot. -config XFS_EXPORT - bool - depends on XFS_FS && EXPORTFS - default y - config XFS_QUOTA bool "XFS Quota support" depends on XFS_FS @@ -65,18 +59,19 @@ config XFS_POSIX_ACL If you don't know what Access Control Lists are, say N. config XFS_RT - bool "XFS Realtime support (EXPERIMENTAL)" - depends on XFS_FS && EXPERIMENTAL + bool "XFS Realtime subvolume support" + depends on XFS_FS help If you say Y here you will be able to mount and use XFS filesystems - which contain a realtime subvolume. The realtime subvolume is a - separate area of disk space where only file data is stored. The - realtime subvolume is designed to provide very deterministic - data rates suitable for media streaming applications. - - See the xfs man page in section 5 for a bit more information. + which contain a realtime subvolume. The realtime subvolume is a + separate area of disk space where only file data is stored. It was + originally designed to provide deterministic data rates suitable + for media streaming applications, but is also useful as a generic + mechanism for ensuring data and metadata/log I/Os are completely + separated. Regular file I/Os are isolated to a separate device + from all other requests, and this can be done quite transparently + to applications via the inherit-realtime directory inode flag. - This feature is unsupported at this time, is not yet fully - functional, and may cause serious problems. + See the xfs man page in section 5 for additional information. If unsure, say N. diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 index 5d73eaa1971f..9e7f85986d0d 100644 --- a/fs/xfs/Makefile-linux-2.6 +++ b/fs/xfs/Makefile-linux-2.6 @@ -59,7 +59,6 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o -xfs-$(CONFIG_XFS_EXPORT) += $(XFS_LINUX)/xfs_export.o xfs-y += xfs_alloc.o \ @@ -73,14 +72,12 @@ xfs-y += xfs_alloc.o \ xfs_btree.o \ xfs_buf_item.o \ xfs_da_btree.o \ - xfs_dir.o \ xfs_dir2.o \ xfs_dir2_block.o \ xfs_dir2_data.o \ xfs_dir2_leaf.o \ xfs_dir2_node.o \ xfs_dir2_sf.o \ - xfs_dir_leaf.o \ xfs_error.o \ xfs_extfree_item.o \ xfs_fsops.o \ @@ -117,6 +114,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ kmem.o \ xfs_aops.o \ xfs_buf.o \ + xfs_export.o \ xfs_file.o \ xfs_fs_subr.o \ xfs_globals.o \ diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 2cfd33d4d8aa..939bd84bc7ee 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -23,42 +23,6 @@ #include <linux/mm.h> /* - * Process flags handling - */ - -#define PFLAGS_TEST_NOIO() (current->flags & PF_NOIO) -#define PFLAGS_TEST_FSTRANS() (current->flags & PF_FSTRANS) - -#define PFLAGS_SET_NOIO() do { \ - current->flags |= PF_NOIO; \ -} while (0) - -#define PFLAGS_CLEAR_NOIO() do { \ - current->flags &= ~PF_NOIO; \ -} while (0) - -/* these could be nested, so we save state */ -#define PFLAGS_SET_FSTRANS(STATEP) do { \ - *(STATEP) = current->flags; \ - current->flags |= PF_FSTRANS; \ -} while (0) - -#define PFLAGS_CLEAR_FSTRANS(STATEP) do { \ - *(STATEP) = current->flags; \ - current->flags &= ~PF_FSTRANS; \ -} while (0) - -/* Restore the PF_FSTRANS state to what was saved in STATEP */ -#define PFLAGS_RESTORE_FSTRANS(STATEP) do { \ - current->flags = ((current->flags & ~PF_FSTRANS) | \ - (*(STATEP) & PF_FSTRANS)); \ -} while (0) - -#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \ - *(NSTATEP) = *(OSTATEP); \ -} while (0) - -/* * General memory allocation interfaces */ @@ -83,7 +47,7 @@ kmem_flags_convert(unsigned int __nocast flags) lflags = GFP_ATOMIC | __GFP_NOWARN; } else { lflags = GFP_KERNEL | __GFP_NOWARN; - if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS)) + if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) lflags &= ~__GFP_FS; } return lflags; diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h index 1b262b790d9c..32e1ce0f04c9 100644 --- a/fs/xfs/linux-2.6/mrlock.h +++ b/fs/xfs/linux-2.6/mrlock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -28,7 +28,7 @@ typedef struct { } mrlock_t; #define mrinit(mrp, name) \ - ( (mrp)->mr_writer = 0, init_rwsem(&(mrp)->mr_lock) ) + do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0) #define mrlock_init(mrp, t,n,s) mrinit(mrp, n) #define mrfree(mrp) do { } while (0) #define mraccess(mrp) mraccessf(mrp, 0) diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h index 194a84490bd1..b25090094cca 100644 --- a/fs/xfs/linux-2.6/sema.h +++ b/fs/xfs/linux-2.6/sema.h @@ -34,20 +34,21 @@ typedef struct semaphore sema_t; #define initnsema(sp, val, name) sema_init(sp, val) #define psema(sp, b) down(sp) #define vsema(sp) up(sp) -#define valusema(sp) (atomic_read(&(sp)->count)) -#define freesema(sema) +#define freesema(sema) do { } while (0) + +static inline int issemalocked(sema_t *sp) +{ + return down_trylock(sp) || (up(sp), 0); +} /* * Map cpsema (try to get the sema) to down_trylock. We need to switch * the return values since cpsema returns 1 (acquired) 0 (failed) and * down_trylock returns the reverse 0 (acquired) 1 (failed). */ - -#define cpsema(sp) (down_trylock(sp) ? 0 : 1) - -/* - * Didn't do cvsema(sp). Not sure how to map this to up/down/... - * It does a vsema if the values is < 0 other wise nothing. - */ +static inline int cpsema(sema_t *sp) +{ + return down_trylock(sp) ? 0 : 1; +} #endif /* __XFS_SUPPORT_SEMA_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 4d191ef39b67..3e807b828e22 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -21,7 +21,6 @@ #include "xfs_inum.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_trans.h" #include "xfs_dmapi.h" @@ -29,7 +28,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -76,7 +74,7 @@ xfs_page_trace( int mask) { xfs_inode_t *ip; - vnode_t *vp = vn_from_inode(inode); + bhv_vnode_t *vp = vn_from_inode(inode); loff_t isize = i_size_read(inode); loff_t offset = page_offset(page); int delalloc = -1, unmapped = -1, unwritten = -1; @@ -136,9 +134,10 @@ xfs_destroy_ioend( for (bh = ioend->io_buffer_head; bh; bh = next) { next = bh->b_private; - bh->b_end_io(bh, ioend->io_uptodate); + bh->b_end_io(bh, !ioend->io_error); } - + if (unlikely(ioend->io_error)) + vn_ioerror(ioend->io_vnode, ioend->io_error, __FILE__,__LINE__); vn_iowake(ioend->io_vnode); mempool_free(ioend, xfs_ioend_pool); } @@ -180,13 +179,12 @@ xfs_end_bio_unwritten( void *data) { xfs_ioend_t *ioend = data; - vnode_t *vp = ioend->io_vnode; + bhv_vnode_t *vp = ioend->io_vnode; xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; - int error; - if (ioend->io_uptodate) - VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); + if (likely(!ioend->io_error)) + bhv_vop_bmap(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL); xfs_destroy_ioend(ioend); } @@ -211,7 +209,7 @@ xfs_alloc_ioend( * all the I/O from calling the completion routine too early. */ atomic_set(&ioend->io_remaining, 1); - ioend->io_uptodate = 1; /* cleared if any I/O fails */ + ioend->io_error = 0; ioend->io_list = NULL; ioend->io_type = type; ioend->io_vnode = vn_from_inode(inode); @@ -239,10 +237,10 @@ xfs_map_blocks( xfs_iomap_t *mapp, int flags) { - vnode_t *vp = vn_from_inode(inode); + bhv_vnode_t *vp = vn_from_inode(inode); int error, nmaps = 1; - VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error); + error = bhv_vop_bmap(vp, offset, count, flags, mapp, &nmaps); if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) VMODIFY(vp); return -error; @@ -271,16 +269,14 @@ xfs_end_bio( if (bio->bi_size) return 1; - ASSERT(ioend); ASSERT(atomic_read(&bio->bi_cnt) >= 1); + ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; /* Toss bio and pass work off to an xfsdatad thread */ - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - ioend->io_uptodate = 0; bio->bi_private = NULL; bio->bi_end_io = NULL; - bio_put(bio); + xfs_finish_ioend(ioend); return 0; } @@ -1127,7 +1123,7 @@ xfs_vm_writepage( * then mark the page dirty again and leave the page * as is. */ - if (PFLAGS_TEST_FSTRANS() && need_trans) + if (current_test_flags(PF_FSTRANS) && need_trans) goto out_fail; /* @@ -1158,6 +1154,18 @@ out_unlock: return error; } +STATIC int +xfs_vm_writepages( + struct address_space *mapping, + struct writeback_control *wbc) +{ + struct bhv_vnode *vp = vn_from_inode(mapping->host); + + if (VN_TRUNC(vp)) + VUNTRUNCATE(vp); + return generic_writepages(mapping, wbc); +} + /* * Called to move a page into cleanable state - and from there * to be released. Possibly the page is already clean. We always @@ -1204,7 +1212,7 @@ xfs_vm_releasepage( /* If we are already inside a transaction or the thread cannot * do I/O, we cannot release this page. */ - if (PFLAGS_TEST_FSTRANS()) + if (current_test_flags(PF_FSTRANS)) return 0; /* @@ -1231,7 +1239,7 @@ __xfs_get_blocks( int direct, bmapi_flags_t flags) { - vnode_t *vp = vn_from_inode(inode); + bhv_vnode_t *vp = vn_from_inode(inode); xfs_iomap_t iomap; xfs_off_t offset; ssize_t size; @@ -1241,8 +1249,8 @@ __xfs_get_blocks( offset = (xfs_off_t)iblock << inode->i_blkbits; ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); size = bh_result->b_size; - VOP_BMAP(vp, offset, size, - create ? flags : BMAPI_READ, &iomap, &niomap, error); + error = bhv_vop_bmap(vp, offset, size, + create ? flags : BMAPI_READ, &iomap, &niomap); if (error) return -error; if (niomap == 0) @@ -1370,13 +1378,13 @@ xfs_vm_direct_IO( { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - vnode_t *vp = vn_from_inode(inode); + bhv_vnode_t *vp = vn_from_inode(inode); xfs_iomap_t iomap; int maps = 1; int error; ssize_t ret; - VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); + error = bhv_vop_bmap(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps); if (error) return -error; @@ -1409,14 +1417,12 @@ xfs_vm_bmap( sector_t block) { struct inode *inode = (struct inode *)mapping->host; - vnode_t *vp = vn_from_inode(inode); - int error; + bhv_vnode_t *vp = vn_from_inode(inode); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); - - VOP_RWLOCK(vp, VRWLOCK_READ); - VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error); - VOP_RWUNLOCK(vp, VRWLOCK_READ); + bhv_vop_rwlock(vp, VRWLOCK_READ); + bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF); + bhv_vop_rwunlock(vp, VRWLOCK_READ); return generic_block_bmap(mapping, block, xfs_get_blocks); } @@ -1452,6 +1458,7 @@ struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, + .writepages = xfs_vm_writepages, .sync_page = block_sync_page, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 60716543c68b..706d8c781b8a 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 Silicon Graphics, Inc. + * Copyright (c) 2005-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -30,9 +30,9 @@ typedef void (*xfs_ioend_func_t)(void *); typedef struct xfs_ioend { struct xfs_ioend *io_list; /* next ioend in chain */ unsigned int io_type; /* delalloc / unwritten */ - unsigned int io_uptodate; /* I/O status register */ + int io_error; /* I/O error code */ atomic_t io_remaining; /* hold count */ - struct vnode *io_vnode; /* file being written to */ + struct bhv_vnode *io_vnode; /* file being written to */ struct buffer_head *io_buffer_head;/* buffer linked list head */ struct buffer_head *io_buffer_tail;/* buffer linked list tail */ size_t io_size; /* size of the extent */ @@ -43,4 +43,4 @@ typedef struct xfs_ioend { extern struct address_space_operations xfs_address_space_operations; extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); -#endif /* __XFS_IOPS_H__ */ +#endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index b768ea910bbe..5fb75d9151f2 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -21,7 +21,6 @@ #include "xfs_log.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_dir.h" #include "xfs_mount.h" #include "xfs_export.h" @@ -97,7 +96,7 @@ xfs_fs_encode_fh( int len; int is64 = 0; #if XFS_BIG_INUMS - vfs_t *vfs = vfs_from_sb(inode->i_sb); + bhv_vfs_t *vfs = vfs_from_sb(inode->i_sb); if (!(vfs->vfs_flag & VFS_32BITINODES)) { /* filesystem may contain 64bit inode numbers */ @@ -136,13 +135,13 @@ xfs_fs_get_dentry( struct super_block *sb, void *data) { - vnode_t *vp; + bhv_vnode_t *vp; struct inode *inode; struct dentry *result; - vfs_t *vfsp = vfs_from_sb(sb); + bhv_vfs_t *vfsp = vfs_from_sb(sb); int error; - VFS_VGET(vfsp, &vp, (fid_t *)data, error); + error = bhv_vfs_vget(vfsp, &vp, (fid_t *)data); if (error || vp == NULL) return ERR_PTR(-ESTALE) ; @@ -160,12 +159,12 @@ xfs_fs_get_parent( struct dentry *child) { int error; - vnode_t *vp, *cvp; + bhv_vnode_t *vp, *cvp; struct dentry *parent; cvp = NULL; vp = vn_from_inode(child->d_inode); - VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error); + error = bhv_vop_lookup(vp, &dotdot, &cvp, 0, NULL, NULL); if (unlikely(error)) return ERR_PTR(-error); diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index c847416f6d10..3d4f6dff2113 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -21,7 +21,6 @@ #include "xfs_inum.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_trans.h" #include "xfs_dmapi.h" @@ -32,7 +31,6 @@ #include "xfs_alloc.h" #include "xfs_btree.h" #include "xfs_attr_sf.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" @@ -58,15 +56,12 @@ __xfs_file_read( { struct iovec iov = {buf, count}; struct file *file = iocb->ki_filp; - vnode_t *vp = vn_from_inode(file->f_dentry->d_inode); - ssize_t rval; + bhv_vnode_t *vp = vn_from_inode(file->f_dentry->d_inode); BUG_ON(iocb->ki_pos != pos); - if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; - VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval); - return rval; + return bhv_vop_read(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL); } STATIC ssize_t @@ -100,15 +95,12 @@ __xfs_file_write( struct iovec iov = {(void __user *)buf, count}; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - vnode_t *vp = vn_from_inode(inode); - ssize_t rval; + bhv_vnode_t *vp = vn_from_inode(inode); BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; - - VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval); - return rval; + return bhv_vop_write(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL); } STATIC ssize_t @@ -140,7 +132,7 @@ __xfs_file_readv( loff_t *ppos) { struct inode *inode = file->f_mapping->host; - vnode_t *vp = vn_from_inode(inode); + bhv_vnode_t *vp = vn_from_inode(inode); struct kiocb kiocb; ssize_t rval; @@ -149,7 +141,8 @@ __xfs_file_readv( if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; - VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval); + rval = bhv_vop_read(vp, &kiocb, iov, nr_segs, + &kiocb.ki_pos, ioflags, NULL); *ppos = kiocb.ki_pos; return rval; @@ -184,7 +177,7 @@ __xfs_file_writev( loff_t *ppos) { struct inode *inode = file->f_mapping->host; - vnode_t *vp = vn_from_inode(inode); + bhv_vnode_t *vp = vn_from_inode(inode); struct kiocb kiocb; ssize_t rval; @@ -193,7 +186,8 @@ __xfs_file_writev( if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; - VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval); + rval = bhv_vop_write(vp, &kiocb, iov, nr_segs, + &kiocb.ki_pos, ioflags, NULL); *ppos = kiocb.ki_pos; return rval; @@ -227,11 +221,8 @@ xfs_file_sendfile( read_actor_t actor, void *target) { - vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); - ssize_t rval; - - VOP_SENDFILE(vp, filp, pos, 0, count, actor, target, NULL, rval); - return rval; + return bhv_vop_sendfile(vn_from_inode(filp->f_dentry->d_inode), + filp, pos, 0, count, actor, target, NULL); } STATIC ssize_t @@ -242,11 +233,8 @@ xfs_file_sendfile_invis( read_actor_t actor, void *target) { - vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); - ssize_t rval; - - VOP_SENDFILE(vp, filp, pos, IO_INVIS, count, actor, target, NULL, rval); - return rval; + return bhv_vop_sendfile(vn_from_inode(filp->f_dentry->d_inode), + filp, pos, IO_INVIS, count, actor, target, NULL); } STATIC ssize_t @@ -257,11 +245,8 @@ xfs_file_splice_read( size_t len, unsigned int flags) { - vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode); - ssize_t rval; - - VOP_SPLICE_READ(vp, infilp, ppos, pipe, len, flags, 0, NULL, rval); - return rval; + return bhv_vop_splice_read(vn_from_inode(infilp->f_dentry->d_inode), + infilp, ppos, pipe, len, flags, 0, NULL); } STATIC ssize_t @@ -272,11 +257,9 @@ xfs_file_splice_read_invis( size_t len, unsigned int flags) { - vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode); - ssize_t rval; - - VOP_SPLICE_READ(vp, infilp, ppos, pipe, len, flags, IO_INVIS, NULL, rval); - return rval; + return bhv_vop_splice_read(vn_from_inode(infilp->f_dentry->d_inode), + infilp, ppos, pipe, len, flags, IO_INVIS, + NULL); } STATIC ssize_t @@ -287,11 +270,8 @@ xfs_file_splice_write( size_t len, unsigned int flags) { - vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode); - ssize_t rval; - - VOP_SPLICE_WRITE(vp, pipe, outfilp, ppos, len, flags, 0, NULL, rval); - return rval; + return bhv_vop_splice_write(vn_from_inode(outfilp->f_dentry->d_inode), + pipe, outfilp, ppos, len, flags, 0, NULL); } STATIC ssize_t @@ -302,11 +282,9 @@ xfs_file_splice_write_invis( size_t len, unsigned int flags) { - vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode); - ssize_t rval; - - VOP_SPLICE_WRITE(vp, pipe, outfilp, ppos, len, flags, IO_INVIS, NULL, rval); - return rval; + return bhv_vop_splice_write(vn_from_inode(outfilp->f_dentry->d_inode), + pipe, outfilp, ppos, len, flags, IO_INVIS, + NULL); } STATIC int @@ -314,13 +292,18 @@ xfs_file_open( struct inode *inode, struct file *filp) { - vnode_t *vp = vn_from_inode(inode); - int error; - if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EFBIG; - VOP_OPEN(vp, NULL, error); - return -error; + return -bhv_vop_open(vn_from_inode(inode), NULL); +} + +STATIC int +xfs_file_close( + struct file *filp, + fl_owner_t id) +{ + return -bhv_vop_close(vn_from_inode(filp->f_dentry->d_inode), 0, + file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL); } STATIC int @@ -328,12 +311,11 @@ xfs_file_release( struct inode *inode, struct file *filp) { - vnode_t *vp = vn_from_inode(inode); - int error = 0; + bhv_vnode_t *vp = vn_from_inode(inode); if (vp) - VOP_RELEASE(vp, error); - return -error; + return -bhv_vop_release(vp); + return 0; } STATIC int @@ -342,15 +324,14 @@ xfs_file_fsync( struct dentry *dentry, int datasync) { - struct inode *inode = dentry->d_inode; - vnode_t *vp = vn_from_inode(inode); - int error; + bhv_vnode_t *vp = vn_from_inode(dentry->d_inode); int flags = FSYNC_WAIT; if (datasync) flags |= FSYNC_DATA; - VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error); - return -error; + if (VN_TRUNC(vp)) + VUNTRUNCATE(vp); + return -bhv_vop_fsync(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1); } #ifdef CONFIG_XFS_DMAPI @@ -361,16 +342,11 @@ xfs_vm_nopage( int *type) { struct inode *inode = area->vm_file->f_dentry->d_inode; - vnode_t *vp = vn_from_inode(inode); - xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); - int error; + bhv_vnode_t *vp = vn_from_inode(inode); ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); - - error = XFS_SEND_MMAP(mp, area, 0); - if (error) + if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), area, 0)) return NULL; - return filemap_nopage(area, address, type); } #endif /* CONFIG_XFS_DMAPI */ @@ -382,7 +358,7 @@ xfs_file_readdir( filldir_t filldir) { int error = 0; - vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); + bhv_vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); uio_t uio; iovec_t iov; int eof = 0; @@ -417,7 +393,7 @@ xfs_file_readdir( start_offset = uio.uio_offset; - VOP_READDIR(vp, &uio, NULL, &eof, error); + error = bhv_vop_readdir(vp, &uio, NULL, &eof); if ((uio.uio_offset == start_offset) || error) { size = 0; break; @@ -456,38 +432,28 @@ xfs_file_mmap( struct file *filp, struct vm_area_struct *vma) { - struct inode *ip = filp->f_dentry->d_inode; - vnode_t *vp = vn_from_inode(ip); - vattr_t vattr; - int error; - vma->vm_ops = &xfs_file_vm_ops; #ifdef CONFIG_XFS_DMAPI - if (vp->v_vfsp->vfs_flag & VFS_DMI) { + if (vn_from_inode(filp->f_dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI) vma->vm_ops = &xfs_dmapi_file_vm_ops; - } #endif /* CONFIG_XFS_DMAPI */ - vattr.va_mask = XFS_AT_UPDATIME; - VOP_SETATTR(vp, &vattr, XFS_AT_UPDATIME, NULL, error); - if (likely(!error)) - __vn_revalidate(vp, &vattr); /* update flags */ + file_accessed(filp); return 0; } - STATIC long xfs_file_ioctl( struct file *filp, unsigned int cmd, - unsigned long arg) + unsigned long p) { int error; struct inode *inode = filp->f_dentry->d_inode; - vnode_t *vp = vn_from_inode(inode); + bhv_vnode_t *vp = vn_from_inode(inode); - VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error); + error = bhv_vop_ioctl(vp, inode, filp, 0, cmd, (void __user *)p); VMODIFY(vp); /* NOTE: some of the ioctl's return positive #'s as a @@ -503,13 +469,13 @@ STATIC long xfs_file_ioctl_invis( struct file *filp, unsigned int cmd, - unsigned long arg) + unsigned long p) { - struct inode *inode = filp->f_dentry->d_inode; - vnode_t *vp = vn_from_inode(inode); int error; + struct inode *inode = filp->f_dentry->d_inode; + bhv_vnode_t *vp = vn_from_inode(inode); - VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error); + error = bhv_vop_ioctl(vp, inode, filp, IO_INVIS, cmd, (void __user *)p); VMODIFY(vp); /* NOTE: some of the ioctl's return positive #'s as a @@ -528,7 +494,7 @@ xfs_vm_mprotect( struct vm_area_struct *vma, unsigned int newflags) { - vnode_t *vp = vn_from_inode(vma->vm_file->f_dentry->d_inode); + bhv_vnode_t *vp = vn_from_inode(vma->vm_file->f_dentry->d_inode); int error = 0; if (vp->v_vfsp->vfs_flag & VFS_DMI) { @@ -554,24 +520,19 @@ STATIC int xfs_file_open_exec( struct inode *inode) { - vnode_t *vp = vn_from_inode(inode); - xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); - int error = 0; - xfs_inode_t *ip; + bhv_vnode_t *vp = vn_from_inode(inode); - if (vp->v_vfsp->vfs_flag & VFS_DMI) { - ip = xfs_vtoi(vp); - if (!ip) { - error = -EINVAL; - goto open_exec_out; - } - if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) { - error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, + if (unlikely(vp->v_vfsp->vfs_flag & VFS_DMI)) { + xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); + xfs_inode_t *ip = xfs_vtoi(vp); + + if (!ip) + return -EINVAL; + if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) + return -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 0, 0, 0, NULL); - } } -open_exec_out: - return error; + return 0; } #endif /* HAVE_FOP_OPEN_EXEC */ @@ -592,6 +553,7 @@ const struct file_operations xfs_file_operations = { #endif .mmap = xfs_file_mmap, .open = xfs_file_open, + .flush = xfs_file_close, .release = xfs_file_release, .fsync = xfs_file_fsync, #ifdef HAVE_FOP_OPEN_EXEC @@ -616,6 +578,7 @@ const struct file_operations xfs_invis_file_operations = { #endif .mmap = xfs_file_mmap, .open = xfs_file_open, + .flush = xfs_file_close, .release = xfs_file_release, .fsync = xfs_file_fsync, }; diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 575f2a790f31..dc0562828e76 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -15,40 +15,12 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ - #include "xfs.h" -/* - * Stub for no-op vnode operations that return error status. - */ -int -fs_noerr(void) -{ - return 0; -} +int fs_noerr(void) { return 0; } +int fs_nosys(void) { return ENOSYS; } +void fs_noval(void) { return; } -/* - * Operation unsupported under this file system. - */ -int -fs_nosys(void) -{ - return ENOSYS; -} - -/* - * Stub for inactive, strategy, and read/write lock/unlock. Does nothing. - */ -/* ARGSUSED */ -void -fs_noval(void) -{ -} - -/* - * vnode pcache layer for vnode_tosspages. - * 'last' parameter unused but left in for IRIX compatibility - */ void fs_tosspages( bhv_desc_t *bdp, @@ -56,18 +28,13 @@ fs_tosspages( xfs_off_t last, int fiopt) { - vnode_t *vp = BHV_TO_VNODE(bdp); + bhv_vnode_t *vp = BHV_TO_VNODE(bdp); struct inode *ip = vn_to_inode(vp); if (VN_CACHED(vp)) truncate_inode_pages(ip->i_mapping, first); } - -/* - * vnode pcache layer for vnode_flushinval_pages. - * 'last' parameter unused but left in for IRIX compatibility - */ void fs_flushinval_pages( bhv_desc_t *bdp, @@ -75,20 +42,17 @@ fs_flushinval_pages( xfs_off_t last, int fiopt) { - vnode_t *vp = BHV_TO_VNODE(bdp); + bhv_vnode_t *vp = BHV_TO_VNODE(bdp); struct inode *ip = vn_to_inode(vp); if (VN_CACHED(vp)) { + if (VN_TRUNC(vp)) + VUNTRUNCATE(vp); filemap_write_and_wait(ip->i_mapping); - truncate_inode_pages(ip->i_mapping, first); } } -/* - * vnode pcache layer for vnode_flush_pages. - * 'last' parameter unused but left in for IRIX compatibility - */ int fs_flush_pages( bhv_desc_t *bdp, @@ -97,15 +61,16 @@ fs_flush_pages( uint64_t flags, int fiopt) { - vnode_t *vp = BHV_TO_VNODE(bdp); + bhv_vnode_t *vp = BHV_TO_VNODE(bdp); struct inode *ip = vn_to_inode(vp); - if (VN_CACHED(vp)) { + if (VN_DIRTY(vp)) { + if (VN_TRUNC(vp)) + VUNTRUNCATE(vp); filemap_fdatawrite(ip->i_mapping); if (flags & XFS_B_ASYNC) return 0; filemap_fdatawait(ip->i_mapping); } - return 0; } diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 6e8085f34635..6c162c3dde7e 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -45,6 +45,7 @@ xfs_param_t xfs_params = { .xfs_buf_age = { 1*100, 15*100, 7200*100}, .inherit_nosym = { 0, 0, 1 }, .rotorstep = { 1, 1, 255 }, + .inherit_nodfrg = { 0, 1, 1 }, }; /* diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 84478491609b..6e52a5dd38d8 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -31,7 +30,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_attr_sf.h" #include "xfs_dir2_sf.h" #include "xfs_dinode.h" @@ -78,7 +76,7 @@ xfs_find_handle( xfs_handle_t handle; xfs_fsop_handlereq_t hreq; struct inode *inode; - struct vnode *vp; + bhv_vnode_t *vp; if (copy_from_user(&hreq, arg, sizeof(hreq))) return -XFS_ERROR(EFAULT); @@ -192,7 +190,7 @@ xfs_vget_fsop_handlereq( xfs_mount_t *mp, struct inode *parinode, /* parent inode pointer */ xfs_fsop_handlereq_t *hreq, - vnode_t **vp, + bhv_vnode_t **vp, struct inode **inode) { void __user *hanp; @@ -202,7 +200,7 @@ xfs_vget_fsop_handlereq( xfs_handle_t handle; xfs_inode_t *ip; struct inode *inodep; - vnode_t *vpp; + bhv_vnode_t *vpp; xfs_ino_t ino; __u32 igen; int error; @@ -277,7 +275,7 @@ xfs_open_by_handle( struct file *filp; struct inode *inode; struct dentry *dentry; - vnode_t *vp; + bhv_vnode_t *vp; xfs_fsop_handlereq_t hreq; if (!capable(CAP_SYS_ADMIN)) @@ -362,7 +360,7 @@ xfs_readlink_by_handle( struct uio auio; struct inode *inode; xfs_fsop_handlereq_t hreq; - vnode_t *vp; + bhv_vnode_t *vp; __u32 olen; if (!capable(CAP_SYS_ADMIN)) @@ -393,9 +391,11 @@ xfs_readlink_by_handle( auio.uio_segflg = UIO_USERSPACE; auio.uio_resid = olen; - VOP_READLINK(vp, &auio, IO_INVIS, NULL, error); - + error = bhv_vop_readlink(vp, &auio, IO_INVIS, NULL); VN_RELE(vp); + if (error) + return -error; + return (olen - auio.uio_resid); } @@ -411,7 +411,7 @@ xfs_fssetdm_by_handle( xfs_fsop_setdm_handlereq_t dmhreq; struct inode *inode; bhv_desc_t *bdp; - vnode_t *vp; + bhv_vnode_t *vp; if (!capable(CAP_MKNOD)) return -XFS_ERROR(EPERM); @@ -452,7 +452,7 @@ xfs_attrlist_by_handle( attrlist_cursor_kern_t *cursor; xfs_fsop_attrlist_handlereq_t al_hreq; struct inode *inode; - vnode_t *vp; + bhv_vnode_t *vp; char *kbuf; if (!capable(CAP_SYS_ADMIN)) @@ -472,8 +472,8 @@ xfs_attrlist_by_handle( goto out_vn_rele; cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - VOP_ATTR_LIST(vp, kbuf, al_hreq.buflen, al_hreq.flags, - cursor, NULL, error); + error = bhv_vop_attr_list(vp, kbuf, al_hreq.buflen, al_hreq.flags, + cursor, NULL); if (error) goto out_kfree; @@ -490,7 +490,7 @@ xfs_attrlist_by_handle( STATIC int xfs_attrmulti_attr_get( - struct vnode *vp, + bhv_vnode_t *vp, char *name, char __user *ubuf, __uint32_t *len, @@ -505,7 +505,7 @@ xfs_attrmulti_attr_get( if (!kbuf) return ENOMEM; - VOP_ATTR_GET(vp, name, kbuf, len, flags, NULL, error); + error = bhv_vop_attr_get(vp, name, kbuf, len, flags, NULL); if (error) goto out_kfree; @@ -519,7 +519,7 @@ xfs_attrmulti_attr_get( STATIC int xfs_attrmulti_attr_set( - struct vnode *vp, + bhv_vnode_t *vp, char *name, const char __user *ubuf, __uint32_t len, @@ -542,7 +542,7 @@ xfs_attrmulti_attr_set( if (copy_from_user(kbuf, ubuf, len)) goto out_kfree; - VOP_ATTR_SET(vp, name, kbuf, len, flags, NULL, error); + error = bhv_vop_attr_set(vp, name, kbuf, len, flags, NULL); out_kfree: kfree(kbuf); @@ -551,20 +551,15 @@ xfs_attrmulti_attr_set( STATIC int xfs_attrmulti_attr_remove( - struct vnode *vp, + bhv_vnode_t *vp, char *name, __uint32_t flags) { - int error; - - if (IS_RDONLY(&vp->v_inode)) return -EROFS; if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode)) return EPERM; - - VOP_ATTR_REMOVE(vp, name, flags, NULL, error); - return error; + return bhv_vop_attr_remove(vp, name, flags, NULL); } STATIC int @@ -578,7 +573,7 @@ xfs_attrmulti_by_handle( xfs_attr_multiop_t *ops; xfs_fsop_attrmulti_handlereq_t am_hreq; struct inode *inode; - vnode_t *vp; + bhv_vnode_t *vp; unsigned int i, size; char *attr_name; @@ -658,7 +653,7 @@ xfs_attrmulti_by_handle( STATIC int xfs_ioc_space( bhv_desc_t *bdp, - vnode_t *vp, + bhv_vnode_t *vp, struct file *filp, int flags, unsigned int cmd, @@ -682,7 +677,7 @@ xfs_ioc_fsgeometry( STATIC int xfs_ioc_xattr( - vnode_t *vp, + bhv_vnode_t *vp, xfs_inode_t *ip, struct file *filp, unsigned int cmd, @@ -711,7 +706,7 @@ xfs_ioctl( void __user *arg) { int error; - vnode_t *vp; + bhv_vnode_t *vp; xfs_inode_t *ip; xfs_mount_t *mp; @@ -962,7 +957,7 @@ xfs_ioctl( STATIC int xfs_ioc_space( bhv_desc_t *bdp, - vnode_t *vp, + bhv_vnode_t *vp, struct file *filp, int ioflags, unsigned int cmd, @@ -1153,14 +1148,14 @@ xfs_di2lxflags( STATIC int xfs_ioc_xattr( - vnode_t *vp, + bhv_vnode_t *vp, xfs_inode_t *ip, struct file *filp, unsigned int cmd, void __user *arg) { struct fsxattr fa; - struct vattr *vattr; + struct bhv_vattr *vattr; int error = 0; int attr_flags; unsigned int flags; @@ -1173,7 +1168,7 @@ xfs_ioc_xattr( case XFS_IOC_FSGETXATTR: { vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \ XFS_AT_NEXTENTS | XFS_AT_PROJID; - VOP_GETATTR(vp, vattr, 0, NULL, error); + error = bhv_vop_getattr(vp, vattr, 0, NULL); if (unlikely(error)) { error = -error; break; @@ -1206,7 +1201,7 @@ xfs_ioc_xattr( vattr->va_extsize = fa.fsx_extsize; vattr->va_projid = fa.fsx_projid; - VOP_SETATTR(vp, vattr, attr_flags, NULL, error); + error = bhv_vop_setattr(vp, vattr, attr_flags, NULL); if (likely(!error)) __vn_revalidate(vp, vattr); /* update flags */ error = -error; @@ -1216,7 +1211,7 @@ xfs_ioc_xattr( case XFS_IOC_FSGETXATTRA: { vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \ XFS_AT_ANEXTENTS | XFS_AT_PROJID; - VOP_GETATTR(vp, vattr, 0, NULL, error); + error = bhv_vop_getattr(vp, vattr, 0, NULL); if (unlikely(error)) { error = -error; break; @@ -1262,7 +1257,7 @@ xfs_ioc_xattr( vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); - VOP_SETATTR(vp, vattr, attr_flags, NULL, error); + error = bhv_vop_setattr(vp, vattr, attr_flags, NULL); if (likely(!error)) __vn_revalidate(vp, vattr); /* update flags */ error = -error; diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 251bfe451a3f..601f01c92f7f 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -114,7 +114,7 @@ xfs_compat_ioctl( unsigned long arg) { struct inode *inode = file->f_dentry->d_inode; - vnode_t *vp = vn_from_inode(inode); + bhv_vnode_t *vp = vn_from_inode(inode); int error; switch (cmd) { @@ -193,7 +193,7 @@ xfs_compat_ioctl( return -ENOIOCTLCMD; } - VOP_IOCTL(vp, inode, file, mode, cmd, (void __user *)arg, error); + error = bhv_vop_ioctl(vp, inode, file, mode, cmd, (void __user *)arg); VMODIFY(vp); return error; diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 2e2e275c786f..12810baeb5d4 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -32,7 +31,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -61,7 +59,7 @@ */ xfs_inode_t * xfs_vtoi( - struct vnode *vp) + bhv_vnode_t *vp) { bhv_desc_t *bdp; @@ -80,7 +78,7 @@ void xfs_synchronize_atime( xfs_inode_t *ip) { - vnode_t *vp; + bhv_vnode_t *vp; vp = XFS_ITOV_NULL(ip); if (vp) { @@ -200,14 +198,10 @@ xfs_ichgtime_fast( STATIC void xfs_validate_fields( struct inode *ip, - struct vattr *vattr) + bhv_vattr_t *vattr) { - vnode_t *vp = vn_from_inode(ip); - int error; - vattr->va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS; - VOP_GETATTR(vp, vattr, ATTR_LAZY, NULL, error); - if (likely(!error)) { + if (!bhv_vop_getattr(vn_from_inode(ip), vattr, ATTR_LAZY, NULL)) { ip->i_nlink = vattr->va_nlink; ip->i_blocks = vattr->va_nblocks; @@ -225,7 +219,7 @@ xfs_validate_fields( */ STATIC int xfs_init_security( - struct vnode *vp, + bhv_vnode_t *vp, struct inode *dir) { struct inode *ip = vn_to_inode(vp); @@ -241,7 +235,7 @@ xfs_init_security( return -error; } - VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error); + error = bhv_vop_attr_set(vp, name, value, length, ATTR_SECURE, NULL); if (!error) VMODIFY(vp); @@ -264,13 +258,12 @@ xfs_has_fs_struct(struct task_struct *task) STATIC inline void xfs_cleanup_inode( - vnode_t *dvp, - vnode_t *vp, + bhv_vnode_t *dvp, + bhv_vnode_t *vp, struct dentry *dentry, int mode) { struct dentry teardown = {}; - int error; /* Oh, the horror. * If we can't add the ACL or we fail in @@ -281,9 +274,9 @@ xfs_cleanup_inode( teardown.d_name = dentry->d_name; if (S_ISDIR(mode)) - VOP_RMDIR(dvp, &teardown, NULL, error); + bhv_vop_rmdir(dvp, &teardown, NULL); else - VOP_REMOVE(dvp, &teardown, NULL, error); + bhv_vop_remove(dvp, &teardown, NULL); VN_RELE(vp); } @@ -295,8 +288,8 @@ xfs_vn_mknod( dev_t rdev) { struct inode *ip; - vattr_t vattr = { 0 }; - vnode_t *vp = NULL, *dvp = vn_from_inode(dir); + bhv_vattr_t vattr = { 0 }; + bhv_vnode_t *vp = NULL, *dvp = vn_from_inode(dir); xfs_acl_t *default_acl = NULL; attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; int error; @@ -330,10 +323,10 @@ xfs_vn_mknod( vattr.va_mask |= XFS_AT_RDEV; /*FALLTHROUGH*/ case S_IFREG: - VOP_CREATE(dvp, dentry, &vattr, &vp, NULL, error); + error = bhv_vop_create(dvp, dentry, &vattr, &vp, NULL); break; case S_IFDIR: - VOP_MKDIR(dvp, dentry, &vattr, &vp, NULL, error); + error = bhv_vop_mkdir(dvp, dentry, &vattr, &vp, NULL); break; default: error = EINVAL; @@ -396,14 +389,14 @@ xfs_vn_lookup( struct dentry *dentry, struct nameidata *nd) { - struct vnode *vp = vn_from_inode(dir), *cvp; + bhv_vnode_t *vp = vn_from_inode(dir), *cvp; int error; if (dentry->d_name.len >= MAXNAMELEN) return ERR_PTR(-ENAMETOOLONG); - VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error); - if (error) { + error = bhv_vop_lookup(vp, dentry, &cvp, 0, NULL, NULL); + if (unlikely(error)) { if (unlikely(error != ENOENT)) return ERR_PTR(-error); d_add(dentry, NULL); @@ -420,9 +413,9 @@ xfs_vn_link( struct dentry *dentry) { struct inode *ip; /* inode of guy being linked to */ - vnode_t *tdvp; /* target directory for new name/link */ - vnode_t *vp; /* vp of name being linked */ - vattr_t vattr; + bhv_vnode_t *tdvp; /* target directory for new name/link */ + bhv_vnode_t *vp; /* vp of name being linked */ + bhv_vattr_t vattr; int error; ip = old_dentry->d_inode; /* inode being linked to */ @@ -432,7 +425,7 @@ xfs_vn_link( tdvp = vn_from_inode(dir); vp = vn_from_inode(ip); - VOP_LINK(tdvp, vp, dentry, NULL, error); + error = bhv_vop_link(tdvp, vp, dentry, NULL); if (likely(!error)) { VMODIFY(tdvp); VN_HOLD(vp); @@ -448,14 +441,14 @@ xfs_vn_unlink( struct dentry *dentry) { struct inode *inode; - vnode_t *dvp; /* directory containing name to remove */ - vattr_t vattr; + bhv_vnode_t *dvp; /* directory containing name to remove */ + bhv_vattr_t vattr; int error; inode = dentry->d_inode; dvp = vn_from_inode(dir); - VOP_REMOVE(dvp, dentry, NULL, error); + error = bhv_vop_remove(dvp, dentry, NULL); if (likely(!error)) { xfs_validate_fields(dir, &vattr); /* size needs update */ xfs_validate_fields(inode, &vattr); @@ -470,27 +463,26 @@ xfs_vn_symlink( const char *symname) { struct inode *ip; - vattr_t vattr = { 0 }; - vnode_t *dvp; /* directory containing name of symlink */ - vnode_t *cvp; /* used to lookup symlink to put in dentry */ + bhv_vattr_t va = { 0 }; + bhv_vnode_t *dvp; /* directory containing name of symlink */ + bhv_vnode_t *cvp; /* used to lookup symlink to put in dentry */ int error; dvp = vn_from_inode(dir); cvp = NULL; - vattr.va_mode = S_IFLNK | + va.va_mode = S_IFLNK | (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); - vattr.va_mask = XFS_AT_TYPE|XFS_AT_MODE; + va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; - error = 0; - VOP_SYMLINK(dvp, dentry, &vattr, (char *)symname, &cvp, NULL, error); + error = bhv_vop_symlink(dvp, dentry, &va, (char *)symname, &cvp, NULL); if (likely(!error && cvp)) { error = xfs_init_security(cvp, dir); if (likely(!error)) { ip = vn_to_inode(cvp); d_instantiate(dentry, ip); - xfs_validate_fields(dir, &vattr); - xfs_validate_fields(ip, &vattr); + xfs_validate_fields(dir, &va); + xfs_validate_fields(ip, &va); } else { xfs_cleanup_inode(dvp, cvp, dentry, 0); } @@ -504,11 +496,11 @@ xfs_vn_rmdir( struct dentry *dentry) { struct inode *inode = dentry->d_inode; - vnode_t *dvp = vn_from_inode(dir); - vattr_t vattr; + bhv_vnode_t *dvp = vn_from_inode(dir); + bhv_vattr_t vattr; int error; - VOP_RMDIR(dvp, dentry, NULL, error); + error = bhv_vop_rmdir(dvp, dentry, NULL); if (likely(!error)) { xfs_validate_fields(inode, &vattr); xfs_validate_fields(dir, &vattr); @@ -524,15 +516,15 @@ xfs_vn_rename( struct dentry *ndentry) { struct inode *new_inode = ndentry->d_inode; - vnode_t *fvp; /* from directory */ - vnode_t *tvp; /* target directory */ - vattr_t vattr; + bhv_vnode_t *fvp; /* from directory */ + bhv_vnode_t *tvp; /* target directory */ + bhv_vattr_t vattr; int error; fvp = vn_from_inode(odir); tvp = vn_from_inode(ndir); - VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error); + error = bhv_vop_rename(fvp, odentry, tvp, ndentry, NULL); if (likely(!error)) { if (new_inode) xfs_validate_fields(new_inode, &vattr); @@ -553,7 +545,7 @@ xfs_vn_follow_link( struct dentry *dentry, struct nameidata *nd) { - vnode_t *vp; + bhv_vnode_t *vp; uio_t *uio; iovec_t iov; int error; @@ -586,8 +578,8 @@ xfs_vn_follow_link( uio->uio_resid = MAXPATHLEN; uio->uio_iovcnt = 1; - VOP_READLINK(vp, uio, 0, NULL, error); - if (error) { + error = bhv_vop_readlink(vp, uio, 0, NULL); + if (unlikely(error)) { kfree(link); link = ERR_PTR(-error); } else { @@ -618,12 +610,7 @@ xfs_vn_permission( int mode, struct nameidata *nd) { - vnode_t *vp = vn_from_inode(inode); - int error; - - mode <<= 6; /* convert from linux to vnode access bits */ - VOP_ACCESS(vp, mode, NULL, error); - return -error; + return -bhv_vop_access(vn_from_inode(inode), mode << 6, NULL); } #else #define xfs_vn_permission NULL @@ -636,14 +623,14 @@ xfs_vn_getattr( struct kstat *stat) { struct inode *inode = dentry->d_inode; - vnode_t *vp = vn_from_inode(inode); + bhv_vnode_t *vp = vn_from_inode(inode); int error = 0; if (unlikely(vp->v_flag & VMODIFIED)) error = vn_revalidate(vp); if (!error) generic_fillattr(inode, stat); - return 0; + return -error; } STATIC int @@ -653,8 +640,8 @@ xfs_vn_setattr( { struct inode *inode = dentry->d_inode; unsigned int ia_valid = attr->ia_valid; - vnode_t *vp = vn_from_inode(inode); - vattr_t vattr = { 0 }; + bhv_vnode_t *vp = vn_from_inode(inode); + bhv_vattr_t vattr = { 0 }; int flags = 0; int error; @@ -697,7 +684,7 @@ xfs_vn_setattr( flags |= ATTR_NONBLOCK; #endif - VOP_SETATTR(vp, &vattr, flags, NULL, error); + error = bhv_vop_setattr(vp, &vattr, flags, NULL); if (likely(!error)) __vn_revalidate(vp, &vattr); return -error; @@ -718,7 +705,7 @@ xfs_vn_setxattr( size_t size, int flags) { - vnode_t *vp = vn_from_inode(dentry->d_inode); + bhv_vnode_t *vp = vn_from_inode(dentry->d_inode); char *attr = (char *)name; attrnames_t *namesp; int xflags = 0; @@ -748,7 +735,7 @@ xfs_vn_getxattr( void *data, size_t size) { - vnode_t *vp = vn_from_inode(dentry->d_inode); + bhv_vnode_t *vp = vn_from_inode(dentry->d_inode); char *attr = (char *)name; attrnames_t *namesp; int xflags = 0; @@ -777,7 +764,7 @@ xfs_vn_listxattr( char *data, size_t size) { - vnode_t *vp = vn_from_inode(dentry->d_inode); + bhv_vnode_t *vp = vn_from_inode(dentry->d_inode); int error, xflags = ATTR_KERNAMELS; ssize_t result; @@ -796,7 +783,7 @@ xfs_vn_removexattr( struct dentry *dentry, const char *name) { - vnode_t *vp = vn_from_inode(dentry->d_inode); + bhv_vnode_t *vp = vn_from_inode(dentry->d_inode); char *attr = (char *)name; attrnames_t *namesp; int xflags = 0; diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index e9fe43d74768..aa26ab906c88 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -134,14 +134,21 @@ BUFFER_FNS(PrivateStart, unwritten); #define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val #define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val #define xfs_rotorstep xfs_params.rotorstep.val +#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val -#ifndef raw_smp_processor_id -#define raw_smp_processor_id() smp_processor_id() -#endif -#define current_cpu() raw_smp_processor_id() +#define current_cpu() (raw_smp_processor_id()) #define current_pid() (current->pid) #define current_fsuid(cred) (current->fsuid) #define current_fsgid(cred) (current->fsgid) +#define current_set_flags(f) (current->flags |= (f)) +#define current_test_flags(f) (current->flags & (f)) +#define current_clear_flags(f) (current->flags & ~(f)) +#define current_set_flags_nested(sp, f) \ + (*(sp) = current->flags, current->flags |= (f)) +#define current_clear_flags_nested(sp, f) \ + (*(sp) = current->flags, current->flags &= ~(f)) +#define current_restore_flags_nested(sp, f) \ + (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) #define NBPP PAGE_SIZE #define DPPSHFT (PAGE_SHIFT - 9) @@ -187,25 +194,9 @@ BUFFER_FNS(PrivateStart, unwritten); /* bytes to clicks */ #define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) -#ifndef ENOATTR #define ENOATTR ENODATA /* Attribute not found */ -#endif - -/* Note: EWRONGFS never visible outside the kernel */ -#define EWRONGFS EINVAL /* Mount with wrong filesystem type */ - -/* - * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't - * return codes out of its known range in errno. - * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't - * conflict with any code we use already or any code a driver may use) - * XXX Some options (currently we do #2): - * 1/ New error code ["Filesystem is corrupted", _after_ glibc updated] - * 2/ 990 ["Unknown error 990"] - * 3/ EUCLEAN ["Structure needs cleaning"] - * 4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace] - */ -#define EFSCORRUPTED 990 /* Filesystem is corrupted */ +#define EWRONGFS EINVAL /* Mount with wrong filesystem type */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define SYNCHRONIZE() barrier() #define __return_address __builtin_return_address(0) diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 67efe3308980..5d9cfd91ad08 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -32,7 +31,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -206,7 +204,7 @@ xfs_read( xfs_fsize_t n; xfs_inode_t *ip; xfs_mount_t *mp; - vnode_t *vp; + bhv_vnode_t *vp; unsigned long seg; ip = XFS_BHVTOI(bdp); @@ -258,7 +256,7 @@ xfs_read( if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { - vrwlock_t locktype = VRWLOCK_READ; + bhv_vrwlock_t locktype = VRWLOCK_READ; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, @@ -271,7 +269,7 @@ xfs_read( } if (unlikely((ioflags & IO_ISDIRECT) && VN_CACHED(vp))) - VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(*offset)), + bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), -1, FI_REMAPF_LOCKED); xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, @@ -313,7 +311,7 @@ xfs_sendfile( if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && (!(ioflags & IO_INVIS))) { - vrwlock_t locktype = VRWLOCK_READ; + bhv_vrwlock_t locktype = VRWLOCK_READ; int error; error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), @@ -357,7 +355,7 @@ xfs_splice_read( if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && (!(ioflags & IO_INVIS))) { - vrwlock_t locktype = VRWLOCK_READ; + bhv_vrwlock_t locktype = VRWLOCK_READ; int error; error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), @@ -401,7 +399,7 @@ xfs_splice_write( if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_WRITE) && (!(ioflags & IO_INVIS))) { - vrwlock_t locktype = VRWLOCK_WRITE; + bhv_vrwlock_t locktype = VRWLOCK_WRITE; int error; error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp), @@ -458,7 +456,7 @@ xfs_zero_last_block( last_fsb = XFS_B_TO_FSBT(mp, isize); nimaps = 1; error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap, - &nimaps, NULL); + &nimaps, NULL, NULL); if (error) { return error; } @@ -499,7 +497,7 @@ xfs_zero_last_block( int /* error (positive) */ xfs_zero_eof( - vnode_t *vp, + bhv_vnode_t *vp, xfs_iocore_t *io, xfs_off_t offset, /* starting I/O offset */ xfs_fsize_t isize, /* current inode size */ @@ -510,7 +508,6 @@ xfs_zero_eof( xfs_fileoff_t end_zero_fsb; xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; - xfs_extlen_t buf_len_fsb; xfs_mount_t *mp = io->io_mount; int nimaps; int error = 0; @@ -556,7 +553,7 @@ xfs_zero_eof( nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, - 0, NULL, 0, &imap, &nimaps, NULL); + 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); @@ -579,16 +576,7 @@ xfs_zero_eof( } /* - * There are blocks in the range requested. - * Zero them a single write at a time. We actually - * don't zero the entire range returned if it is - * too big and simply loop around to get the rest. - * That is not the most efficient thing to do, but it - * is simple and this path should not be exercised often. - */ - buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount, - mp->m_writeio_blocks << 8); - /* + * There are blocks we need to zero. * Drop the inode lock while we're doing the I/O. * We'll still have the iolock to protect us. */ @@ -596,14 +584,13 @@ xfs_zero_eof( error = xfs_iozero(ip, XFS_FSB_TO_B(mp, start_zero_fsb), - XFS_FSB_TO_B(mp, buf_len_fsb), + XFS_FSB_TO_B(mp, imap.br_blockcount), end_size); - if (error) { goto out_lock; } - start_zero_fsb = imap.br_startoff + buf_len_fsb; + start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); @@ -637,11 +624,11 @@ xfs_write( ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; xfs_iocore_t *io; - vnode_t *vp; + bhv_vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; - vrwlock_t locktype; + bhv_vrwlock_t locktype; size_t ocount = 0, count; loff_t pos; int need_i_mutex = 1, need_flush = 0; @@ -679,11 +666,11 @@ xfs_write( io = &xip->i_iocore; mp = io->io_mount; + vfs_wait_for_freeze(vp->v_vfsp, SB_FREEZE_WRITE); + if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE); - if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? @@ -814,7 +801,7 @@ retry: if (need_flush) { xfs_inval_cached_trace(io, pos, -1, ctooff(offtoct(pos)), -1); - VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)), + bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)), -1, FI_REMAPF_LOCKED); } @@ -903,79 +890,9 @@ retry: /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { - /* - * If we're treating this as O_DSYNC and we have not updated the - * size, force the log. - */ - if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && - !(xip->i_update_size)) { - xfs_inode_log_item_t *iip = xip->i_itemp; - - /* - * If an allocation transaction occurred - * without extending the size, then we have to force - * the log up the proper point to ensure that the - * allocation is permanent. We can't count on - * the fact that buffered writes lock out direct I/O - * writes - the direct I/O write could have extended - * the size nontransactionally, then finished before - * we started. xfs_write_file will think that the file - * didn't grow but the update isn't safe unless the - * size change is logged. - * - * Force the log if we've committed a transaction - * against the inode or if someone else has and - * the commit record hasn't gone to disk (e.g. - * the inode is pinned). This guarantees that - * all changes affecting the inode are permanent - * when we return. - */ - if (iip && iip->ili_last_lsn) { - xfs_log_force(mp, iip->ili_last_lsn, - XFS_LOG_FORCE | XFS_LOG_SYNC); - } else if (xfs_ipincount(xip) > 0) { - xfs_log_force(mp, (xfs_lsn_t)0, - XFS_LOG_FORCE | XFS_LOG_SYNC); - } - - } else { - xfs_trans_t *tp; - - /* - * O_SYNC or O_DSYNC _with_ a size update are handled - * the same way. - * - * If the write was synchronous then we need to make - * sure that the inode modification time is permanent. - * We'll have updated the timestamp above, so here - * we use a synchronous transaction to log the inode. - * It's not fast, but it's necessary. - * - * If this a dsync write and the size got changed - * non-transactionally, then we need to ensure that - * the size change gets logged in a synchronous - * transaction. - */ - - tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); - if ((error = xfs_trans_reserve(tp, 0, - XFS_SWRITE_LOG_RES(mp), - 0, 0, 0))) { - /* Transaction reserve failed */ - xfs_trans_cancel(tp, 0); - } else { - /* Transaction reserve successful */ - xfs_ilock(xip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, xip); - xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0, NULL); - xfs_iunlock(xip, XFS_ILOCK_EXCL); - } - if (error) - goto out_unlock_internal; - } + error = xfs_write_sync_logforce(mp, xip); + if (error) + goto out_unlock_internal; xfs_rwunlock(bdp, locktype); if (need_i_mutex) diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index 8f4539952350..c77e62efb742 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -18,8 +18,8 @@ #ifndef __XFS_LRW_H__ #define __XFS_LRW_H__ -struct vnode; struct bhv_desc; +struct bhv_vnode; struct xfs_mount; struct xfs_iocore; struct xfs_inode; @@ -49,7 +49,7 @@ struct xfs_iomap; #define XFS_CTRUNC4 14 #define XFS_CTRUNC5 15 #define XFS_CTRUNC6 16 -#define XFS_BUNMAPI 17 +#define XFS_BUNMAP 17 #define XFS_INVAL_CACHED 18 #define XFS_DIORD_ENTER 19 #define XFS_DIOWR_ENTER 20 @@ -82,7 +82,7 @@ extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); extern int xfs_bdstrat_cb(struct xfs_buf *); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); -extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t, +extern int xfs_zero_eof(struct bhv_vnode *, struct xfs_iocore *, xfs_off_t, xfs_fsize_t, xfs_fsize_t); extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *, const struct iovec *, unsigned int, diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index 1f0589a05eca..e480b6102051 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c @@ -62,7 +62,7 @@ xfs_read_xfsstats( while (j < xstats[i].endpoint) { val = 0; /* sum over all cpus */ - for_each_cpu(c) + for_each_possible_cpu(c) val += *(((__u32*)&per_cpu(xfsstats, c) + j)); len += sprintf(buffer + len, " %u", val); j++; @@ -70,7 +70,7 @@ xfs_read_xfsstats( buffer[len++] = '\n'; } /* extra precision counters */ - for_each_cpu(i) { + for_each_possible_cpu(i) { xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 68f4793e8a11..9bdef9d51900 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -32,7 +31,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -151,7 +149,7 @@ xfs_set_inodeops( STATIC __inline__ void xfs_revalidate_inode( xfs_mount_t *mp, - vnode_t *vp, + bhv_vnode_t *vp, xfs_inode_t *ip) { struct inode *inode = vn_to_inode(vp); @@ -206,7 +204,7 @@ xfs_revalidate_inode( void xfs_initialize_vnode( bhv_desc_t *bdp, - vnode_t *vp, + bhv_vnode_t *vp, bhv_desc_t *inode_bhv, int unlock) { @@ -336,7 +334,7 @@ STATIC struct inode * xfs_fs_alloc_inode( struct super_block *sb) { - vnode_t *vp; + bhv_vnode_t *vp; vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); if (unlikely(!vp)) @@ -359,13 +357,13 @@ xfs_fs_inode_init_once( { if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) - inode_init_once(vn_to_inode((vnode_t *)vnode)); + inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); } STATIC int xfs_init_zones(void) { - xfs_vnode_zone = kmem_zone_init_flags(sizeof(vnode_t), "xfs_vnode_t", + xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode", KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD, xfs_fs_inode_init_once); @@ -409,22 +407,17 @@ xfs_fs_write_inode( struct inode *inode, int sync) { - vnode_t *vp = vn_from_inode(inode); + bhv_vnode_t *vp = vn_from_inode(inode); int error = 0, flags = FLUSH_INODE; if (vp) { vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); if (sync) flags |= FLUSH_SYNC; - VOP_IFLUSH(vp, flags, error); - if (error == EAGAIN) { - if (sync) - VOP_IFLUSH(vp, flags | FLUSH_LOG, error); - else - error = 0; - } + error = bhv_vop_iflush(vp, flags); + if (error == EAGAIN) + error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0; } - return -error; } @@ -432,8 +425,7 @@ STATIC void xfs_fs_clear_inode( struct inode *inode) { - vnode_t *vp = vn_from_inode(inode); - int error, cache; + bhv_vnode_t *vp = vn_from_inode(inode); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); @@ -446,20 +438,18 @@ xfs_fs_clear_inode( * This can happen because xfs_iget_core calls xfs_idestroy if we * find an inode with di_mode == 0 but without IGET_CREATE set. */ - if (vp->v_fbhv) - VOP_INACTIVE(vp, NULL, cache); + if (VNHEAD(vp)) + bhv_vop_inactive(vp, NULL); VN_LOCK(vp); vp->v_flag &= ~VMODIFIED; VN_UNLOCK(vp, 0); - if (vp->v_fbhv) { - VOP_RECLAIM(vp, error); - if (error) - panic("vn_purge: cannot reclaim"); - } + if (VNHEAD(vp)) + if (bhv_vop_reclaim(vp)) + panic("%s: cannot reclaim 0x%p\n", __FUNCTION__, vp); - ASSERT(vp->v_fbhv == NULL); + ASSERT(VNHEAD(vp) == NULL); #ifdef XFS_VNODE_TRACE ktrace_free(vp->v_trace); @@ -475,13 +465,13 @@ xfs_fs_clear_inode( */ STATIC void xfs_syncd_queue_work( - struct vfs *vfs, + struct bhv_vfs *vfs, void *data, - void (*syncer)(vfs_t *, void *)) + void (*syncer)(bhv_vfs_t *, void *)) { - vfs_sync_work_t *work; + struct bhv_vfs_sync_work *work; - work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP); + work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); INIT_LIST_HEAD(&work->w_list); work->w_syncer = syncer; work->w_data = data; @@ -500,7 +490,7 @@ xfs_syncd_queue_work( */ STATIC void xfs_flush_inode_work( - vfs_t *vfs, + bhv_vfs_t *vfs, void *inode) { filemap_flush(((struct inode *)inode)->i_mapping); @@ -512,7 +502,7 @@ xfs_flush_inode( xfs_inode_t *ip) { struct inode *inode = vn_to_inode(XFS_ITOV(ip)); - struct vfs *vfs = XFS_MTOVFS(ip->i_mount); + struct bhv_vfs *vfs = XFS_MTOVFS(ip->i_mount); igrab(inode); xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work); @@ -525,7 +515,7 @@ xfs_flush_inode( */ STATIC void xfs_flush_device_work( - vfs_t *vfs, + bhv_vfs_t *vfs, void *inode) { sync_blockdev(vfs->vfs_super->s_bdev); @@ -537,7 +527,7 @@ xfs_flush_device( xfs_inode_t *ip) { struct inode *inode = vn_to_inode(XFS_ITOV(ip)); - struct vfs *vfs = XFS_MTOVFS(ip->i_mount); + struct bhv_vfs *vfs = XFS_MTOVFS(ip->i_mount); igrab(inode); xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work); @@ -545,16 +535,16 @@ xfs_flush_device( xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); } -#define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR|SYNC_REFCACHE) STATIC void vfs_sync_worker( - vfs_t *vfsp, + bhv_vfs_t *vfsp, void *unused) { int error; if (!(vfsp->vfs_flag & VFS_RDONLY)) - VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error); + error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \ + SYNC_ATTR | SYNC_REFCACHE, NULL); vfsp->vfs_sync_seq++; wmb(); wake_up(&vfsp->vfs_wait_single_sync_task); @@ -565,8 +555,8 @@ xfssyncd( void *arg) { long timeleft; - vfs_t *vfsp = (vfs_t *) arg; - struct vfs_sync_work *work, *n; + bhv_vfs_t *vfsp = (bhv_vfs_t *) arg; + bhv_vfs_sync_work_t *work, *n; LIST_HEAD (tmp); timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); @@ -600,7 +590,7 @@ xfssyncd( list_del(&work->w_list); if (work == &vfsp->vfs_sync_work) continue; - kmem_free(work, sizeof(struct vfs_sync_work)); + kmem_free(work, sizeof(struct bhv_vfs_sync_work)); } } @@ -609,7 +599,7 @@ xfssyncd( STATIC int xfs_fs_start_syncd( - vfs_t *vfsp) + bhv_vfs_t *vfsp) { vfsp->vfs_sync_work.w_syncer = vfs_sync_worker; vfsp->vfs_sync_work.w_vfs = vfsp; @@ -621,7 +611,7 @@ xfs_fs_start_syncd( STATIC void xfs_fs_stop_syncd( - vfs_t *vfsp) + bhv_vfs_t *vfsp) { kthread_stop(vfsp->vfs_sync_task); } @@ -630,35 +620,26 @@ STATIC void xfs_fs_put_super( struct super_block *sb) { - vfs_t *vfsp = vfs_from_sb(sb); + bhv_vfs_t *vfsp = vfs_from_sb(sb); int error; xfs_fs_stop_syncd(vfsp); - VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error); - if (!error) - VFS_UNMOUNT(vfsp, 0, NULL, error); + bhv_vfs_sync(vfsp, SYNC_ATTR | SYNC_DELWRI, NULL); + error = bhv_vfs_unmount(vfsp, 0, NULL); if (error) { - printk("XFS unmount got error %d\n", error); - printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp); - return; + printk("XFS: unmount got error=%d\n", error); + printk("%s: vfs=0x%p left dangling!\n", __FUNCTION__, vfsp); + } else { + vfs_deallocate(vfsp); } - - vfs_deallocate(vfsp); } STATIC void xfs_fs_write_super( struct super_block *sb) { - vfs_t *vfsp = vfs_from_sb(sb); - int error; - - if (sb->s_flags & MS_RDONLY) { - sb->s_dirt = 0; /* paranoia */ - return; - } - /* Push the log and superblock a little */ - VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error); + if (!(sb->s_flags & MS_RDONLY)) + bhv_vfs_sync(vfs_from_sb(sb), SYNC_FSDATA, NULL); sb->s_dirt = 0; } @@ -667,16 +648,16 @@ xfs_fs_sync_super( struct super_block *sb, int wait) { - vfs_t *vfsp = vfs_from_sb(sb); - int error; - int flags = SYNC_FSDATA; + bhv_vfs_t *vfsp = vfs_from_sb(sb); + int error; + int flags; if (unlikely(sb->s_frozen == SB_FREEZE_WRITE)) flags = SYNC_QUIESCE; else flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0); - VFS_SYNC(vfsp, flags, NULL, error); + error = bhv_vfs_sync(vfsp, flags, NULL); sb->s_dirt = 0; if (unlikely(laptop_mode)) { @@ -703,14 +684,11 @@ xfs_fs_sync_super( STATIC int xfs_fs_statfs( - struct super_block *sb, + struct dentry *dentry, struct kstatfs *statp) { - vfs_t *vfsp = vfs_from_sb(sb); - int error; - - VFS_STATVFS(vfsp, statp, NULL, error); - return -error; + return -bhv_vfs_statvfs(vfs_from_sb(dentry->d_sb), statp, + vn_from_inode(dentry->d_inode)); } STATIC int @@ -719,13 +697,13 @@ xfs_fs_remount( int *flags, char *options) { - vfs_t *vfsp = vfs_from_sb(sb); + bhv_vfs_t *vfsp = vfs_from_sb(sb); struct xfs_mount_args *args = xfs_args_allocate(sb, 0); int error; - VFS_PARSEARGS(vfsp, options, args, 1, error); + error = bhv_vfs_parseargs(vfsp, options, args, 1); if (!error) - VFS_MNTUPDATE(vfsp, flags, args, error); + error = bhv_vfs_mntupdate(vfsp, flags, args); kmem_free(args, sizeof(*args)); return -error; } @@ -734,7 +712,7 @@ STATIC void xfs_fs_lockfs( struct super_block *sb) { - VFS_FREEZE(vfs_from_sb(sb)); + bhv_vfs_freeze(vfs_from_sb(sb)); } STATIC int @@ -742,11 +720,7 @@ xfs_fs_show_options( struct seq_file *m, struct vfsmount *mnt) { - struct vfs *vfsp = vfs_from_sb(mnt->mnt_sb); - int error; - - VFS_SHOWARGS(vfsp, m, error); - return error; + return -bhv_vfs_showargs(vfs_from_sb(mnt->mnt_sb), m); } STATIC int @@ -754,11 +728,7 @@ xfs_fs_quotasync( struct super_block *sb, int type) { - struct vfs *vfsp = vfs_from_sb(sb); - int error; - - VFS_QUOTACTL(vfsp, Q_XQUOTASYNC, 0, (caddr_t)NULL, error); - return -error; + return -bhv_vfs_quotactl(vfs_from_sb(sb), Q_XQUOTASYNC, 0, NULL); } STATIC int @@ -766,11 +736,7 @@ xfs_fs_getxstate( struct super_block *sb, struct fs_quota_stat *fqs) { - struct vfs *vfsp = vfs_from_sb(sb); - int error; - - VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error); - return -error; + return -bhv_vfs_quotactl(vfs_from_sb(sb), Q_XGETQSTAT, 0, (caddr_t)fqs); } STATIC int @@ -779,11 +745,7 @@ xfs_fs_setxstate( unsigned int flags, int op) { - struct vfs *vfsp = vfs_from_sb(sb); - int error; - - VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error); - return -error; + return -bhv_vfs_quotactl(vfs_from_sb(sb), op, 0, (caddr_t)&flags); } STATIC int @@ -793,13 +755,10 @@ xfs_fs_getxquota( qid_t id, struct fs_disk_quota *fdq) { - struct vfs *vfsp = vfs_from_sb(sb); - int error, getmode; - - getmode = (type == USRQUOTA) ? Q_XGETQUOTA : - ((type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETPQUOTA); - VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error); - return -error; + return -bhv_vfs_quotactl(vfs_from_sb(sb), + (type == USRQUOTA) ? Q_XGETQUOTA : + ((type == GRPQUOTA) ? Q_XGETGQUOTA : + Q_XGETPQUOTA), id, (caddr_t)fdq); } STATIC int @@ -809,13 +768,10 @@ xfs_fs_setxquota( qid_t id, struct fs_disk_quota *fdq) { - struct vfs *vfsp = vfs_from_sb(sb); - int error, setmode; - - setmode = (type == USRQUOTA) ? Q_XSETQLIM : - ((type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETPQLIM); - VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error); - return -error; + return -bhv_vfs_quotactl(vfs_from_sb(sb), + (type == USRQUOTA) ? Q_XSETQLIM : + ((type == GRPQUOTA) ? Q_XSETGQLIM : + Q_XSETPQLIM), id, (caddr_t)fdq); } STATIC int @@ -824,34 +780,32 @@ xfs_fs_fill_super( void *data, int silent) { - vnode_t *rootvp; - struct vfs *vfsp = vfs_allocate(sb); + struct bhv_vnode *rootvp; + struct bhv_vfs *vfsp = vfs_allocate(sb); struct xfs_mount_args *args = xfs_args_allocate(sb, silent); struct kstatfs statvfs; - int error, error2; + int error; bhv_insert_all_vfsops(vfsp); - VFS_PARSEARGS(vfsp, (char *)data, args, 0, error); + error = bhv_vfs_parseargs(vfsp, (char *)data, args, 0); if (error) { bhv_remove_all_vfsops(vfsp, 1); goto fail_vfsop; } sb_min_blocksize(sb, BBSIZE); -#ifdef CONFIG_XFS_EXPORT sb->s_export_op = &xfs_export_operations; -#endif sb->s_qcop = &xfs_quotactl_operations; sb->s_op = &xfs_super_operations; - VFS_MOUNT(vfsp, args, NULL, error); + error = bhv_vfs_mount(vfsp, args, NULL); if (error) { bhv_remove_all_vfsops(vfsp, 1); goto fail_vfsop; } - VFS_STATVFS(vfsp, &statvfs, NULL, error); + error = bhv_vfs_statvfs(vfsp, &statvfs, NULL); if (error) goto fail_unmount; @@ -863,7 +817,7 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); - VFS_ROOT(vfsp, &rootvp, error); + error = bhv_vfs_root(vfsp, &rootvp); if (error) goto fail_unmount; @@ -892,7 +846,7 @@ fail_vnrele: } fail_unmount: - VFS_UNMOUNT(vfsp, 0, NULL, error2); + bhv_vfs_unmount(vfsp, 0, NULL); fail_vfsop: vfs_deallocate(vfsp); @@ -900,14 +854,16 @@ fail_vfsop: return -error; } -STATIC struct super_block * +STATIC int xfs_fs_get_sb( struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, + struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super, + mnt); } STATIC struct super_operations xfs_super_operations = { diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 376b96cb513a..33dd1ca13245 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -105,7 +105,7 @@ struct block_device; extern __uint64_t xfs_max_file_offset(unsigned int); -extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int); +extern void xfs_initialize_vnode(bhv_desc_t *, bhv_vnode_t *, bhv_desc_t *, int); extern void xfs_flush_inode(struct xfs_inode *); extern void xfs_flush_device(struct xfs_inode *); diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 7079cc837210..af246532fbfb 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -38,7 +38,7 @@ xfs_stats_clear_proc_handler( if (!ret && write && *valp) { printk("XFS Clearing xfsstats\n"); - for_each_cpu(c) { + for_each_possible_cpu(c) { preempt_disable(); /* save vn_active, it's a universal truth! */ vn_active = per_cpu(xfsstats, c).vn_active; @@ -120,6 +120,11 @@ STATIC ctl_table xfs_table[] = { &sysctl_intvec, NULL, &xfs_params.rotorstep.min, &xfs_params.rotorstep.max}, + {XFS_INHERIT_NODFRG, "inherit_nodefrag", &xfs_params.inherit_nodfrg.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.inherit_nodfrg.min, &xfs_params.inherit_nodfrg.max}, + /* please keep this the last entry */ #ifdef CONFIG_PROC_FS {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val, diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h index bc8c11f13722..a631fb8cc5ac 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.h +++ b/fs/xfs/linux-2.6/xfs_sysctl.h @@ -46,6 +46,7 @@ typedef struct xfs_param { xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */ xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */ xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ + xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ } xfs_param_t; /* @@ -84,6 +85,7 @@ enum { /* XFS_IO_BYPASS = 18 */ XFS_INHERIT_NOSYM = 19, XFS_ROTORSTEP = 20, + XFS_INHERIT_NODFRG = 21, }; extern xfs_param_t xfs_params; diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c index 6f7c9f7a8624..6145e8bd0be2 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.c +++ b/fs/xfs/linux-2.6/xfs_vfs.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_imap.h" #include "xfs_alloc.h" @@ -104,7 +103,7 @@ vfs_mntupdate( int vfs_root( struct bhv_desc *bdp, - struct vnode **vpp) + struct bhv_vnode **vpp) { struct bhv_desc *next = bdp; @@ -117,15 +116,15 @@ vfs_root( int vfs_statvfs( struct bhv_desc *bdp, - xfs_statfs_t *sp, - struct vnode *vp) + bhv_statvfs_t *statp, + struct bhv_vnode *vp) { struct bhv_desc *next = bdp; ASSERT(next); while (! (bhvtovfsops(next))->vfs_statvfs) next = BHV_NEXT(next); - return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp)); + return ((*bhvtovfsops(next)->vfs_statvfs)(next, statp, vp)); } int @@ -145,7 +144,7 @@ vfs_sync( int vfs_vget( struct bhv_desc *bdp, - struct vnode **vpp, + struct bhv_vnode **vpp, struct fid *fidp) { struct bhv_desc *next = bdp; @@ -187,7 +186,7 @@ vfs_quotactl( void vfs_init_vnode( struct bhv_desc *bdp, - struct vnode *vp, + struct bhv_vnode *vp, struct bhv_desc *bp, int unlock) { @@ -226,13 +225,13 @@ vfs_freeze( ((*bhvtovfsops(next)->vfs_freeze)(next)); } -vfs_t * +bhv_vfs_t * vfs_allocate( struct super_block *sb) { - struct vfs *vfsp; + struct bhv_vfs *vfsp; - vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP); + vfsp = kmem_zalloc(sizeof(bhv_vfs_t), KM_SLEEP); bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); INIT_LIST_HEAD(&vfsp->vfs_sync_list); spin_lock_init(&vfsp->vfs_sync_lock); @@ -247,25 +246,25 @@ vfs_allocate( return vfsp; } -vfs_t * +bhv_vfs_t * vfs_from_sb( struct super_block *sb) { - return (vfs_t *)sb->s_fs_info; + return (bhv_vfs_t *)sb->s_fs_info; } void vfs_deallocate( - struct vfs *vfsp) + struct bhv_vfs *vfsp) { bhv_head_destroy(VFS_BHVHEAD(vfsp)); - kmem_free(vfsp, sizeof(vfs_t)); + kmem_free(vfsp, sizeof(bhv_vfs_t)); } void vfs_insertops( - struct vfs *vfsp, - struct bhv_vfsops *vfsops) + struct bhv_vfs *vfsp, + struct bhv_module_vfsops *vfsops) { struct bhv_desc *bdp; @@ -276,9 +275,9 @@ vfs_insertops( void vfs_insertbhv( - struct vfs *vfsp, + struct bhv_vfs *vfsp, struct bhv_desc *bdp, - struct vfsops *vfsops, + struct bhv_vfsops *vfsops, void *mount) { bhv_desc_init(bdp, mount, vfsp, vfsops); @@ -287,7 +286,7 @@ vfs_insertbhv( void bhv_remove_vfsops( - struct vfs *vfsp, + struct bhv_vfs *vfsp, int pos) { struct bhv_desc *bhv; @@ -301,7 +300,7 @@ bhv_remove_vfsops( void bhv_remove_all_vfsops( - struct vfs *vfsp, + struct bhv_vfs *vfsp, int freebase) { struct xfs_mount *mp; @@ -317,7 +316,7 @@ bhv_remove_all_vfsops( void bhv_insert_all_vfsops( - struct vfs *vfsp) + struct bhv_vfs *vfsp) { struct xfs_mount *mp; diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 841200c03092..91fc2c4b3353 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -21,42 +21,40 @@ #include <linux/vfs.h> #include "xfs_fs.h" +struct bhv_vfs; +struct bhv_vnode; + struct fid; -struct vfs; struct cred; -struct vnode; -struct kstatfs; struct seq_file; struct super_block; struct xfs_mount_args; -typedef struct kstatfs xfs_statfs_t; +typedef struct kstatfs bhv_statvfs_t; -typedef struct vfs_sync_work { +typedef struct bhv_vfs_sync_work { struct list_head w_list; - struct vfs *w_vfs; + struct bhv_vfs *w_vfs; void *w_data; /* syncer routine argument */ - void (*w_syncer)(struct vfs *, void *); -} vfs_sync_work_t; + void (*w_syncer)(struct bhv_vfs *, void *); +} bhv_vfs_sync_work_t; -typedef struct vfs { +typedef struct bhv_vfs { u_int vfs_flag; /* flags */ xfs_fsid_t vfs_fsid; /* file system ID */ xfs_fsid_t *vfs_altfsid; /* An ID fixed for life of FS */ bhv_head_t vfs_bh; /* head of vfs behavior chain */ struct super_block *vfs_super; /* generic superblock pointer */ struct task_struct *vfs_sync_task; /* generalised sync thread */ - vfs_sync_work_t vfs_sync_work; /* work item for VFS_SYNC */ + bhv_vfs_sync_work_t vfs_sync_work; /* work item for VFS_SYNC */ struct list_head vfs_sync_list; /* sync thread work item list */ spinlock_t vfs_sync_lock; /* work item list lock */ - int vfs_sync_seq; /* sync thread generation no. */ + int vfs_sync_seq; /* sync thread generation no. */ wait_queue_head_t vfs_wait_single_sync_task; -} vfs_t; - -#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ +} bhv_vfs_t; -#define bhvtovfs(bdp) ( (struct vfs *)BHV_VOBJ(bdp) ) -#define bhvtovfsops(bdp) ( (struct vfsops *)BHV_OPS(bdp) ) +#define bhvtovfs(bdp) ( (struct bhv_vfs *)BHV_VOBJ(bdp) ) +#define bhvtovfsops(bdp) ( (struct bhv_vfsops *)BHV_OPS(bdp) ) #define VFS_BHVHEAD(vfs) ( &(vfs)->vfs_bh ) #define VFS_REMOVEBHV(vfs, bdp) ( bhv_remove(VFS_BHVHEAD(vfs), bdp) ) @@ -71,7 +69,7 @@ typedef enum { VFS_BHV_QM, /* quota manager */ VFS_BHV_IO, /* IO path */ VFS_BHV_END /* housekeeping end-of-range */ -} vfs_bhv_t; +} bhv_vfs_type_t; #define VFS_POSITION_XFS (BHV_POSITION_BASE) #define VFS_POSITION_DM (VFS_POSITION_BASE+10) @@ -81,8 +79,9 @@ typedef enum { #define VFS_RDONLY 0x0001 /* read-only vfs */ #define VFS_GRPID 0x0002 /* group-ID assigned from directory */ #define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ -#define VFS_32BITINODES 0x0008 /* do not use inums above 32 bits */ -#define VFS_END 0x0008 /* max flag */ +#define VFS_UMOUNT 0x0008 /* unmount in progress */ +#define VFS_32BITINODES 0x0010 /* do not use inums above 32 bits */ +#define VFS_END 0x0010 /* max flag */ #define SYNC_ATTR 0x0001 /* sync attributes */ #define SYNC_CLOSE 0x0002 /* close file system down */ @@ -92,7 +91,14 @@ typedef enum { #define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ #define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ #define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ -#define SYNC_QUIESCE 0x0100 /* quiesce filesystem for a snapshot */ +#define SYNC_QUIESCE 0x0100 /* quiesce fileystem for a snapshot */ + +#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ +#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ +#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ +#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */ +#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ +#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ typedef int (*vfs_mount_t)(bhv_desc_t *, struct xfs_mount_args *, struct cred *); @@ -102,18 +108,19 @@ typedef int (*vfs_showargs_t)(bhv_desc_t *, struct seq_file *); typedef int (*vfs_unmount_t)(bhv_desc_t *, int, struct cred *); typedef int (*vfs_mntupdate_t)(bhv_desc_t *, int *, struct xfs_mount_args *); -typedef int (*vfs_root_t)(bhv_desc_t *, struct vnode **); -typedef int (*vfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct vnode *); +typedef int (*vfs_root_t)(bhv_desc_t *, struct bhv_vnode **); +typedef int (*vfs_statvfs_t)(bhv_desc_t *, bhv_statvfs_t *, + struct bhv_vnode *); typedef int (*vfs_sync_t)(bhv_desc_t *, int, struct cred *); -typedef int (*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *); +typedef int (*vfs_vget_t)(bhv_desc_t *, struct bhv_vnode **, struct fid *); typedef int (*vfs_dmapiops_t)(bhv_desc_t *, caddr_t); typedef int (*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t); typedef void (*vfs_init_vnode_t)(bhv_desc_t *, - struct vnode *, bhv_desc_t *, int); + struct bhv_vnode *, bhv_desc_t *, int); typedef void (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int); typedef void (*vfs_freeze_t)(bhv_desc_t *); -typedef struct vfsops { +typedef struct bhv_vfsops { bhv_position_t vf_position; /* behavior chain position */ vfs_mount_t vfs_mount; /* mount file system */ vfs_parseargs_t vfs_parseargs; /* parse mount options */ @@ -129,82 +136,82 @@ typedef struct vfsops { vfs_init_vnode_t vfs_init_vnode; /* initialize a new vnode */ vfs_force_shutdown_t vfs_force_shutdown; /* crash and burn */ vfs_freeze_t vfs_freeze; /* freeze fs for snapshot */ -} vfsops_t; +} bhv_vfsops_t; /* - * VFS's. Operates on vfs structure pointers (starts at bhv head). + * Virtual filesystem operations, operating from head bhv. */ -#define VHEAD(v) ((v)->vfs_fbhv) -#define VFS_MOUNT(v, ma,cr, rv) ((rv) = vfs_mount(VHEAD(v), ma,cr)) -#define VFS_PARSEARGS(v, o,ma,f, rv) ((rv) = vfs_parseargs(VHEAD(v), o,ma,f)) -#define VFS_SHOWARGS(v, m, rv) ((rv) = vfs_showargs(VHEAD(v), m)) -#define VFS_UNMOUNT(v, f, cr, rv) ((rv) = vfs_unmount(VHEAD(v), f,cr)) -#define VFS_MNTUPDATE(v, fl, args, rv) ((rv) = vfs_mntupdate(VHEAD(v), fl, args)) -#define VFS_ROOT(v, vpp, rv) ((rv) = vfs_root(VHEAD(v), vpp)) -#define VFS_STATVFS(v, sp,vp, rv) ((rv) = vfs_statvfs(VHEAD(v), sp,vp)) -#define VFS_SYNC(v, flag,cr, rv) ((rv) = vfs_sync(VHEAD(v), flag,cr)) -#define VFS_VGET(v, vpp,fidp, rv) ((rv) = vfs_vget(VHEAD(v), vpp,fidp)) -#define VFS_DMAPIOPS(v, p, rv) ((rv) = vfs_dmapiops(VHEAD(v), p)) -#define VFS_QUOTACTL(v, c,id,p, rv) ((rv) = vfs_quotactl(VHEAD(v), c,id,p)) -#define VFS_INIT_VNODE(v, vp,b,ul) ( vfs_init_vnode(VHEAD(v), vp,b,ul) ) -#define VFS_FORCE_SHUTDOWN(v, fl,f,l) ( vfs_force_shutdown(VHEAD(v), fl,f,l) ) -#define VFS_FREEZE(v) ( vfs_freeze(VHEAD(v)) ) +#define VFSHEAD(v) ((v)->vfs_bh.bh_first) +#define bhv_vfs_mount(v, ma,cr) vfs_mount(VFSHEAD(v), ma,cr) +#define bhv_vfs_parseargs(v, o,ma,f) vfs_parseargs(VFSHEAD(v), o,ma,f) +#define bhv_vfs_showargs(v, m) vfs_showargs(VFSHEAD(v), m) +#define bhv_vfs_unmount(v, f,cr) vfs_unmount(VFSHEAD(v), f,cr) +#define bhv_vfs_mntupdate(v, fl,args) vfs_mntupdate(VFSHEAD(v), fl,args) +#define bhv_vfs_root(v, vpp) vfs_root(VFSHEAD(v), vpp) +#define bhv_vfs_statvfs(v, sp,vp) vfs_statvfs(VFSHEAD(v), sp,vp) +#define bhv_vfs_sync(v, flag,cr) vfs_sync(VFSHEAD(v), flag,cr) +#define bhv_vfs_vget(v, vpp,fidp) vfs_vget(VFSHEAD(v), vpp,fidp) +#define bhv_vfs_dmapiops(v, p) vfs_dmapiops(VFSHEAD(v), p) +#define bhv_vfs_quotactl(v, c,id,p) vfs_quotactl(VFSHEAD(v), c,id,p) +#define bhv_vfs_init_vnode(v, vp,b,ul) vfs_init_vnode(VFSHEAD(v), vp,b,ul) +#define bhv_vfs_force_shutdown(v,u,f,l) vfs_force_shutdown(VFSHEAD(v), u,f,l) +#define bhv_vfs_freeze(v) vfs_freeze(VFSHEAD(v)) /* - * PVFS's. Operates on behavior descriptor pointers. + * Virtual filesystem operations, operating from next bhv. */ -#define PVFS_MOUNT(b, ma,cr, rv) ((rv) = vfs_mount(b, ma,cr)) -#define PVFS_PARSEARGS(b, o,ma,f, rv) ((rv) = vfs_parseargs(b, o,ma,f)) -#define PVFS_SHOWARGS(b, m, rv) ((rv) = vfs_showargs(b, m)) -#define PVFS_UNMOUNT(b, f,cr, rv) ((rv) = vfs_unmount(b, f,cr)) -#define PVFS_MNTUPDATE(b, fl, args, rv) ((rv) = vfs_mntupdate(b, fl, args)) -#define PVFS_ROOT(b, vpp, rv) ((rv) = vfs_root(b, vpp)) -#define PVFS_STATVFS(b, sp,vp, rv) ((rv) = vfs_statvfs(b, sp,vp)) -#define PVFS_SYNC(b, flag,cr, rv) ((rv) = vfs_sync(b, flag,cr)) -#define PVFS_VGET(b, vpp,fidp, rv) ((rv) = vfs_vget(b, vpp,fidp)) -#define PVFS_DMAPIOPS(b, p, rv) ((rv) = vfs_dmapiops(b, p)) -#define PVFS_QUOTACTL(b, c,id,p, rv) ((rv) = vfs_quotactl(b, c,id,p)) -#define PVFS_INIT_VNODE(b, vp,b2,ul) ( vfs_init_vnode(b, vp,b2,ul) ) -#define PVFS_FORCE_SHUTDOWN(b, fl,f,l) ( vfs_force_shutdown(b, fl,f,l) ) -#define PVFS_FREEZE(b) ( vfs_freeze(b) ) +#define bhv_next_vfs_mount(b, ma,cr) vfs_mount(b, ma,cr) +#define bhv_next_vfs_parseargs(b, o,ma,f) vfs_parseargs(b, o,ma,f) +#define bhv_next_vfs_showargs(b, m) vfs_showargs(b, m) +#define bhv_next_vfs_unmount(b, f,cr) vfs_unmount(b, f,cr) +#define bhv_next_vfs_mntupdate(b, fl,args) vfs_mntupdate(b, fl, args) +#define bhv_next_vfs_root(b, vpp) vfs_root(b, vpp) +#define bhv_next_vfs_statvfs(b, sp,vp) vfs_statvfs(b, sp,vp) +#define bhv_next_vfs_sync(b, flag,cr) vfs_sync(b, flag,cr) +#define bhv_next_vfs_vget(b, vpp,fidp) vfs_vget(b, vpp,fidp) +#define bhv_next_vfs_dmapiops(b, p) vfs_dmapiops(b, p) +#define bhv_next_vfs_quotactl(b, c,id,p) vfs_quotactl(b, c,id,p) +#define bhv_next_vfs_init_vnode(b, vp,b2,ul) vfs_init_vnode(b, vp,b2,ul) +#define bhv_next_force_shutdown(b, fl,f,l) vfs_force_shutdown(b, fl,f,l) +#define bhv_next_vfs_freeze(b) vfs_freeze(b) extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *); extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int); extern int vfs_showargs(bhv_desc_t *, struct seq_file *); extern int vfs_unmount(bhv_desc_t *, int, struct cred *); extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *); -extern int vfs_root(bhv_desc_t *, struct vnode **); -extern int vfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct vnode *); +extern int vfs_root(bhv_desc_t *, struct bhv_vnode **); +extern int vfs_statvfs(bhv_desc_t *, bhv_statvfs_t *, struct bhv_vnode *); extern int vfs_sync(bhv_desc_t *, int, struct cred *); -extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *); +extern int vfs_vget(bhv_desc_t *, struct bhv_vnode **, struct fid *); extern int vfs_dmapiops(bhv_desc_t *, caddr_t); extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t); -extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int); +extern void vfs_init_vnode(bhv_desc_t *, struct bhv_vnode *, bhv_desc_t *, int); extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int); extern void vfs_freeze(bhv_desc_t *); -typedef struct bhv_vfsops { - struct vfsops bhv_common; +#define vfs_test_for_freeze(vfs) ((vfs)->vfs_super->s_frozen) +#define vfs_wait_for_freeze(vfs,l) vfs_check_frozen((vfs)->vfs_super, (l)) + +typedef struct bhv_module_vfsops { + struct bhv_vfsops bhv_common; void * bhv_custom; -} bhv_vfsops_t; +} bhv_module_vfsops_t; -#define vfs_bhv_lookup(v, id) ( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) ) -#define vfs_bhv_custom(b) ( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom ) -#define vfs_bhv_set_custom(b,o) ( (b)->bhv_custom = (void *)(o)) -#define vfs_bhv_clr_custom(b) ( (b)->bhv_custom = NULL ) +#define vfs_bhv_lookup(v, id) (bhv_lookup_range(&(v)->vfs_bh, (id), (id))) +#define vfs_bhv_custom(b) (((bhv_module_vfsops_t*)BHV_OPS(b))->bhv_custom) +#define vfs_bhv_set_custom(b,o) ((b)->bhv_custom = (void *)(o)) +#define vfs_bhv_clr_custom(b) ((b)->bhv_custom = NULL) -extern vfs_t *vfs_allocate(struct super_block *); -extern vfs_t *vfs_from_sb(struct super_block *); -extern void vfs_deallocate(vfs_t *); -extern void vfs_insertops(vfs_t *, bhv_vfsops_t *); -extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *); +extern bhv_vfs_t *vfs_allocate(struct super_block *); +extern bhv_vfs_t *vfs_from_sb(struct super_block *); +extern void vfs_deallocate(bhv_vfs_t *); +extern void vfs_insertbhv(bhv_vfs_t *, bhv_desc_t *, bhv_vfsops_t *, void *); -extern void bhv_insert_all_vfsops(struct vfs *); -extern void bhv_remove_all_vfsops(struct vfs *, int); -extern void bhv_remove_vfsops(struct vfs *, int); +extern void vfs_insertops(bhv_vfs_t *, bhv_module_vfsops_t *); -#define fs_frozen(vfsp) ((vfsp)->vfs_super->s_frozen) -#define fs_check_frozen(vfsp, level) \ - vfs_check_frozen(vfsp->vfs_super, level); +extern void bhv_insert_all_vfsops(struct bhv_vfs *); +extern void bhv_remove_all_vfsops(struct bhv_vfs *, int); +extern void bhv_remove_vfsops(struct bhv_vfs *, int); #endif /* __XFS_VFS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index d27c25b27ccd..6628d96b6fd6 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -39,7 +39,7 @@ vn_init(void) void vn_iowait( - struct vnode *vp) + bhv_vnode_t *vp) { wait_queue_head_t *wq = vptosync(vp); @@ -48,17 +48,33 @@ vn_iowait( void vn_iowake( - struct vnode *vp) + bhv_vnode_t *vp) { if (atomic_dec_and_test(&vp->v_iocount)) wake_up(vptosync(vp)); } -struct vnode * +/* + * Volume managers supporting multiple paths can send back ENODEV when the + * final path disappears. In this case continuing to fill the page cache + * with dirty data which cannot be written out is evil, so prevent that. + */ +void +vn_ioerror( + bhv_vnode_t *vp, + int error, + char *f, + int l) +{ + if (unlikely(error == -ENODEV)) + bhv_vfs_force_shutdown(vp->v_vfsp, SHUTDOWN_DEVICE_REQ, f, l); +} + +bhv_vnode_t * vn_initialize( struct inode *inode) { - struct vnode *vp = vn_from_inode(inode); + bhv_vnode_t *vp = vn_from_inode(inode); XFS_STATS_INC(vn_active); XFS_STATS_INC(vn_alloc); @@ -94,8 +110,8 @@ vn_initialize( */ void vn_revalidate_core( - struct vnode *vp, - vattr_t *vap) + bhv_vnode_t *vp, + bhv_vattr_t *vap) { struct inode *inode = vn_to_inode(vp); @@ -130,14 +146,14 @@ vn_revalidate_core( */ int __vn_revalidate( - struct vnode *vp, - struct vattr *vattr) + bhv_vnode_t *vp, + bhv_vattr_t *vattr) { int error; vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); vattr->va_mask = XFS_AT_STAT | XFS_AT_XFLAGS; - VOP_GETATTR(vp, vattr, 0, NULL, error); + error = bhv_vop_getattr(vp, vattr, 0, NULL); if (likely(!error)) { vn_revalidate_core(vp, vattr); VUNMODIFY(vp); @@ -147,9 +163,9 @@ __vn_revalidate( int vn_revalidate( - struct vnode *vp) + bhv_vnode_t *vp) { - vattr_t vattr; + bhv_vattr_t vattr; return __vn_revalidate(vp, &vattr); } @@ -157,9 +173,9 @@ vn_revalidate( /* * Add a reference to a referenced vnode. */ -struct vnode * +bhv_vnode_t * vn_hold( - struct vnode *vp) + bhv_vnode_t *vp) { struct inode *inode; @@ -192,31 +208,31 @@ vn_hold( * Vnode tracing code. */ void -vn_trace_entry(vnode_t *vp, const char *func, inst_t *ra) +vn_trace_entry(bhv_vnode_t *vp, const char *func, inst_t *ra) { KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra); } void -vn_trace_exit(vnode_t *vp, const char *func, inst_t *ra) +vn_trace_exit(bhv_vnode_t *vp, const char *func, inst_t *ra) { KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra); } void -vn_trace_hold(vnode_t *vp, char *file, int line, inst_t *ra) +vn_trace_hold(bhv_vnode_t *vp, char *file, int line, inst_t *ra) { KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra); } void -vn_trace_ref(vnode_t *vp, char *file, int line, inst_t *ra) +vn_trace_ref(bhv_vnode_t *vp, char *file, int line, inst_t *ra) { KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra); } void -vn_trace_rele(vnode_t *vp, char *file, int line, inst_t *ra) +vn_trace_rele(bhv_vnode_t *vp, char *file, int line, inst_t *ra) { KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra); } diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 2a8e16c22353..35c6a01963a7 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -14,57 +14,35 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Portions Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ #ifndef __XFS_VNODE_H__ #define __XFS_VNODE_H__ struct uio; struct file; -struct vattr; +struct bhv_vfs; +struct bhv_vattr; struct xfs_iomap; struct attrlist_cursor_kern; +typedef struct dentry bhv_vname_t; +typedef __u64 bhv_vnumber_t; -typedef xfs_ino_t vnumber_t; -typedef struct dentry vname_t; -typedef bhv_head_t vn_bhv_head_t; +typedef enum bhv_vflags { + VMODIFIED = 0x08, /* XFS inode state possibly differs */ + /* to the Linux inode state. */ + VTRUNCATED = 0x40, /* truncated down so flush-on-close */ +} bhv_vflags_t; /* * MP locking protocols: * v_flag, v_vfsp VN_LOCK/VN_UNLOCK */ -typedef struct vnode { - __u32 v_flag; /* vnode flags (see below) */ - struct vfs *v_vfsp; /* ptr to containing VFS */ - vnumber_t v_number; /* in-core vnode number */ - vn_bhv_head_t v_bh; /* behavior head */ +typedef struct bhv_vnode { + bhv_vflags_t v_flag; /* vnode flags (see above) */ + bhv_vfs_t *v_vfsp; /* ptr to containing VFS */ + bhv_vnumber_t v_number; /* in-core vnode number */ + bhv_head_t v_bh; /* behavior head */ spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ atomic_t v_iocount; /* outstanding I/O count */ #ifdef XFS_VNODE_TRACE @@ -72,7 +50,7 @@ typedef struct vnode { #endif struct inode v_inode; /* Linux inode */ /* inode MUST be last */ -} vnode_t; +} bhv_vnode_t; #define VN_ISLNK(vp) S_ISLNK((vp)->v_inode.i_mode) #define VN_ISREG(vp) S_ISREG((vp)->v_inode.i_mode) @@ -80,9 +58,6 @@ typedef struct vnode { #define VN_ISCHR(vp) S_ISCHR((vp)->v_inode.i_mode) #define VN_ISBLK(vp) S_ISBLK((vp)->v_inode.i_mode) -#define v_fbhv v_bh.bh_first /* first behavior */ -#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ - #define VNODE_POSITION_BASE BHV_POSITION_BASE /* chain bottom */ #define VNODE_POSITION_TOP BHV_POSITION_TOP /* chain top */ #define VNODE_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */ @@ -104,8 +79,8 @@ typedef enum { /* * Macros for dealing with the behavior descriptor inside of the vnode. */ -#define BHV_TO_VNODE(bdp) ((vnode_t *)BHV_VOBJ(bdp)) -#define BHV_TO_VNODE_NULL(bdp) ((vnode_t *)BHV_VOBJNULL(bdp)) +#define BHV_TO_VNODE(bdp) ((bhv_vnode_t *)BHV_VOBJ(bdp)) +#define BHV_TO_VNODE_NULL(bdp) ((bhv_vnode_t *)BHV_VOBJNULL(bdp)) #define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh))) #define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name) @@ -116,35 +91,29 @@ typedef enum { /* * Vnode to Linux inode mapping. */ -static inline struct vnode *vn_from_inode(struct inode *inode) +static inline struct bhv_vnode *vn_from_inode(struct inode *inode) { - return (vnode_t *)list_entry(inode, vnode_t, v_inode); + return (bhv_vnode_t *)list_entry(inode, bhv_vnode_t, v_inode); } -static inline struct inode *vn_to_inode(struct vnode *vnode) +static inline struct inode *vn_to_inode(struct bhv_vnode *vnode) { return &vnode->v_inode; } /* - * Vnode flags. - */ -#define VMODIFIED 0x8 /* XFS inode state possibly differs */ - /* to the Linux inode state. */ - -/* - * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter. + * Values for the vop_rwlock/rwunlock flags parameter. */ -typedef enum vrwlock { +typedef enum bhv_vrwlock { VRWLOCK_NONE, VRWLOCK_READ, VRWLOCK_WRITE, VRWLOCK_WRITE_DIRECT, VRWLOCK_TRY_READ, VRWLOCK_TRY_WRITE -} vrwlock_t; +} bhv_vrwlock_t; /* - * Return values for VOP_INACTIVE. A return value of + * Return values for bhv_vop_inactive. A return value of * VN_INACTIVE_NOCACHE implies that the file system behavior * has disassociated its state and bhv_desc_t from the vnode. */ @@ -152,18 +121,20 @@ typedef enum vrwlock { #define VN_INACTIVE_NOCACHE 1 /* - * Values for the cmd code given to VOP_VNODE_CHANGE. + * Values for the cmd code given to vop_vnode_change. */ -typedef enum vchange { +typedef enum bhv_vchange { VCHANGE_FLAGS_FRLOCKS = 0, VCHANGE_FLAGS_ENF_LOCKING = 1, VCHANGE_FLAGS_TRUNCATED = 2, VCHANGE_FLAGS_PAGE_DIRTY = 3, VCHANGE_FLAGS_IOEXCL_COUNT = 4 -} vchange_t; +} bhv_vchange_t; +typedef enum { L_FALSE, L_TRUE } lastclose_t; typedef int (*vop_open_t)(bhv_desc_t *, struct cred *); +typedef int (*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *); typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *, const struct iovec *, unsigned int, loff_t *, int, struct cred *); @@ -181,27 +152,27 @@ typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *, struct cred *); typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, int, unsigned int, void __user *); -typedef int (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int, +typedef int (*vop_getattr_t)(bhv_desc_t *, struct bhv_vattr *, int, struct cred *); -typedef int (*vop_setattr_t)(bhv_desc_t *, struct vattr *, int, +typedef int (*vop_setattr_t)(bhv_desc_t *, struct bhv_vattr *, int, struct cred *); typedef int (*vop_access_t)(bhv_desc_t *, int, struct cred *); -typedef int (*vop_lookup_t)(bhv_desc_t *, vname_t *, vnode_t **, - int, vnode_t *, struct cred *); -typedef int (*vop_create_t)(bhv_desc_t *, vname_t *, struct vattr *, - vnode_t **, struct cred *); -typedef int (*vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *); -typedef int (*vop_link_t)(bhv_desc_t *, vnode_t *, vname_t *, - struct cred *); -typedef int (*vop_rename_t)(bhv_desc_t *, vname_t *, vnode_t *, vname_t *, +typedef int (*vop_lookup_t)(bhv_desc_t *, bhv_vname_t *, bhv_vnode_t **, + int, bhv_vnode_t *, struct cred *); +typedef int (*vop_create_t)(bhv_desc_t *, bhv_vname_t *, struct bhv_vattr *, + bhv_vnode_t **, struct cred *); +typedef int (*vop_remove_t)(bhv_desc_t *, bhv_vname_t *, struct cred *); +typedef int (*vop_link_t)(bhv_desc_t *, bhv_vnode_t *, bhv_vname_t *, struct cred *); -typedef int (*vop_mkdir_t)(bhv_desc_t *, vname_t *, struct vattr *, - vnode_t **, struct cred *); -typedef int (*vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *); +typedef int (*vop_rename_t)(bhv_desc_t *, bhv_vname_t *, bhv_vnode_t *, + bhv_vname_t *, struct cred *); +typedef int (*vop_mkdir_t)(bhv_desc_t *, bhv_vname_t *, struct bhv_vattr *, + bhv_vnode_t **, struct cred *); +typedef int (*vop_rmdir_t)(bhv_desc_t *, bhv_vname_t *, struct cred *); typedef int (*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *, int *); -typedef int (*vop_symlink_t)(bhv_desc_t *, vname_t *, struct vattr *, - char *, vnode_t **, struct cred *); +typedef int (*vop_symlink_t)(bhv_desc_t *, bhv_vname_t *, struct bhv_vattr*, + char *, bhv_vnode_t **, struct cred *); typedef int (*vop_readlink_t)(bhv_desc_t *, struct uio *, int, struct cred *); typedef int (*vop_fsync_t)(bhv_desc_t *, int, struct cred *, @@ -209,8 +180,8 @@ typedef int (*vop_fsync_t)(bhv_desc_t *, int, struct cred *, typedef int (*vop_inactive_t)(bhv_desc_t *, struct cred *); typedef int (*vop_fid2_t)(bhv_desc_t *, struct fid *); typedef int (*vop_release_t)(bhv_desc_t *); -typedef int (*vop_rwlock_t)(bhv_desc_t *, vrwlock_t); -typedef void (*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t); +typedef int (*vop_rwlock_t)(bhv_desc_t *, bhv_vrwlock_t); +typedef void (*vop_rwunlock_t)(bhv_desc_t *, bhv_vrwlock_t); typedef int (*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int, struct xfs_iomap *, int *); typedef int (*vop_reclaim_t)(bhv_desc_t *); @@ -222,8 +193,8 @@ typedef int (*vop_attr_remove_t)(bhv_desc_t *, const char *, int, struct cred *); typedef int (*vop_attr_list_t)(bhv_desc_t *, char *, int, int, struct attrlist_cursor_kern *, struct cred *); -typedef void (*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int); -typedef void (*vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t); +typedef void (*vop_link_removed_t)(bhv_desc_t *, bhv_vnode_t *, int); +typedef void (*vop_vnode_change_t)(bhv_desc_t *, bhv_vchange_t, __psint_t); typedef void (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); typedef void (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, @@ -231,9 +202,10 @@ typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, typedef int (*vop_iflush_t)(bhv_desc_t *, int); -typedef struct vnodeops { +typedef struct bhv_vnodeops { bhv_position_t vn_position; /* position within behavior chain */ vop_open_t vop_open; + vop_close_t vop_close; vop_read_t vop_read; vop_write_t vop_write; vop_sendfile_t vop_sendfile; @@ -271,103 +243,80 @@ typedef struct vnodeops { vop_pflushvp_t vop_flush_pages; vop_release_t vop_release; vop_iflush_t vop_iflush; -} vnodeops_t; +} bhv_vnodeops_t; /* - * VOP's. - */ -#define _VOP_(op, vp) (*((vnodeops_t *)(vp)->v_fops)->op) - -#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv) \ - rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr) -#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv) \ - rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr) -#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \ - rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr) -#define VOP_SPLICE_READ(vp,f,o,pipe,cnt,fl,iofl,cr,rv) \ - rv = _VOP_(vop_splice_read, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr) -#define VOP_SPLICE_WRITE(vp,f,o,pipe,cnt,fl,iofl,cr,rv) \ - rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr) -#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \ - rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n) -#define VOP_OPEN(vp, cr, rv) \ - rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr) -#define VOP_GETATTR(vp, vap, f, cr, rv) \ - rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr) -#define VOP_SETATTR(vp, vap, f, cr, rv) \ - rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr) -#define VOP_ACCESS(vp, mode, cr, rv) \ - rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr) -#define VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv) \ - rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr) -#define VOP_CREATE(dvp,d,vap,vpp,cr,rv) \ - rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr) -#define VOP_REMOVE(dvp,d,cr,rv) \ - rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr) -#define VOP_LINK(tdvp,fvp,d,cr,rv) \ - rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr) -#define VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv) \ - rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr) -#define VOP_MKDIR(dp,d,vap,vpp,cr,rv) \ - rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr) -#define VOP_RMDIR(dp,d,cr,rv) \ - rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr) -#define VOP_READDIR(vp,uiop,cr,eofp,rv) \ - rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp) -#define VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv) \ - rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr) -#define VOP_READLINK(vp,uiop,fl,cr,rv) \ - rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr) -#define VOP_FSYNC(vp,f,cr,b,e,rv) \ - rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e) -#define VOP_INACTIVE(vp, cr, rv) \ - rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr) -#define VOP_RELEASE(vp, rv) \ - rv = _VOP_(vop_release, vp)((vp)->v_fbhv) -#define VOP_FID2(vp, fidp, rv) \ - rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp) -#define VOP_RWLOCK(vp,i) \ - (void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i) -#define VOP_RWLOCK_TRY(vp,i) \ - _VOP_(vop_rwlock, vp)((vp)->v_fbhv, i) -#define VOP_RWUNLOCK(vp,i) \ - (void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i) -#define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv) \ - rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr) -#define VOP_RECLAIM(vp, rv) \ - rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv) -#define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv) \ - rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred) -#define VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv) \ - rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred) -#define VOP_ATTR_REMOVE(vp, name, flags, cred, rv) \ - rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred) -#define VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv) \ - rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred) -#define VOP_LINK_REMOVED(vp, dvp, linkzero) \ - (void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero) -#define VOP_VNODE_CHANGE(vp, cmd, val) \ - (void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val) -/* - * These are page cache functions that now go thru VOPs. - * 'last' parameter is unused and left in for IRIX compatibility + * Virtual node operations, operating from head bhv. */ -#define VOP_TOSS_PAGES(vp, first, last, fiopt) \ - _VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt) -/* - * 'last' parameter is unused and left in for IRIX compatibility - */ -#define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt) \ - _VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt) -/* - * 'last' parameter is unused and left in for IRIX compatibility - */ -#define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv) \ - rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt) -#define VOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv) \ - rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg) -#define VOP_IFLUSH(vp, flags, rv) \ - rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags) +#define VNHEAD(vp) ((vp)->v_bh.bh_first) +#define VOP(op, vp) (*((bhv_vnodeops_t *)VNHEAD(vp)->bd_ops)->op) +#define bhv_vop_open(vp, cr) VOP(vop_open, vp)(VNHEAD(vp),cr) +#define bhv_vop_close(vp, f,last,cr) VOP(vop_close, vp)(VNHEAD(vp),f,last,cr) +#define bhv_vop_read(vp,file,iov,segs,offset,ioflags,cr) \ + VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr) +#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \ + VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr) +#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr) \ + VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr) +#define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr) \ + VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr) +#define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr) \ + VOP(vop_splice_write, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr) +#define bhv_vop_bmap(vp,of,sz,rw,b,n) \ + VOP(vop_bmap, vp)(VNHEAD(vp),of,sz,rw,b,n) +#define bhv_vop_getattr(vp, vap,f,cr) \ + VOP(vop_getattr, vp)(VNHEAD(vp), vap,f,cr) +#define bhv_vop_setattr(vp, vap,f,cr) \ + VOP(vop_setattr, vp)(VNHEAD(vp), vap,f,cr) +#define bhv_vop_access(vp, mode,cr) VOP(vop_access, vp)(VNHEAD(vp), mode,cr) +#define bhv_vop_lookup(vp,d,vpp,f,rdir,cr) \ + VOP(vop_lookup, vp)(VNHEAD(vp),d,vpp,f,rdir,cr) +#define bhv_vop_create(dvp,d,vap,vpp,cr) \ + VOP(vop_create, dvp)(VNHEAD(dvp),d,vap,vpp,cr) +#define bhv_vop_remove(dvp,d,cr) VOP(vop_remove, dvp)(VNHEAD(dvp),d,cr) +#define bhv_vop_link(dvp,fvp,d,cr) VOP(vop_link, dvp)(VNHEAD(dvp),fvp,d,cr) +#define bhv_vop_rename(fvp,fnm,tdvp,tnm,cr) \ + VOP(vop_rename, fvp)(VNHEAD(fvp),fnm,tdvp,tnm,cr) +#define bhv_vop_mkdir(dp,d,vap,vpp,cr) \ + VOP(vop_mkdir, dp)(VNHEAD(dp),d,vap,vpp,cr) +#define bhv_vop_rmdir(dp,d,cr) VOP(vop_rmdir, dp)(VNHEAD(dp),d,cr) +#define bhv_vop_readdir(vp,uiop,cr,eofp) \ + VOP(vop_readdir, vp)(VNHEAD(vp),uiop,cr,eofp) +#define bhv_vop_symlink(dvp,d,vap,tnm,vpp,cr) \ + VOP(vop_symlink, dvp)(VNHEAD(dvp),d,vap,tnm,vpp,cr) +#define bhv_vop_readlink(vp,uiop,fl,cr) \ + VOP(vop_readlink, vp)(VNHEAD(vp),uiop,fl,cr) +#define bhv_vop_fsync(vp,f,cr,b,e) VOP(vop_fsync, vp)(VNHEAD(vp),f,cr,b,e) +#define bhv_vop_inactive(vp,cr) VOP(vop_inactive, vp)(VNHEAD(vp),cr) +#define bhv_vop_release(vp) VOP(vop_release, vp)(VNHEAD(vp)) +#define bhv_vop_fid2(vp,fidp) VOP(vop_fid2, vp)(VNHEAD(vp),fidp) +#define bhv_vop_rwlock(vp,i) VOP(vop_rwlock, vp)(VNHEAD(vp),i) +#define bhv_vop_rwlock_try(vp,i) VOP(vop_rwlock, vp)(VNHEAD(vp),i) +#define bhv_vop_rwunlock(vp,i) VOP(vop_rwunlock, vp)(VNHEAD(vp),i) +#define bhv_vop_frlock(vp,c,fl,flags,offset,fr) \ + VOP(vop_frlock, vp)(VNHEAD(vp),c,fl,flags,offset,fr) +#define bhv_vop_reclaim(vp) VOP(vop_reclaim, vp)(VNHEAD(vp)) +#define bhv_vop_attr_get(vp, name, val, vallenp, fl, cred) \ + VOP(vop_attr_get, vp)(VNHEAD(vp),name,val,vallenp,fl,cred) +#define bhv_vop_attr_set(vp, name, val, vallen, fl, cred) \ + VOP(vop_attr_set, vp)(VNHEAD(vp),name,val,vallen,fl,cred) +#define bhv_vop_attr_remove(vp, name, flags, cred) \ + VOP(vop_attr_remove, vp)(VNHEAD(vp),name,flags,cred) +#define bhv_vop_attr_list(vp, buf, buflen, fl, cursor, cred) \ + VOP(vop_attr_list, vp)(VNHEAD(vp),buf,buflen,fl,cursor,cred) +#define bhv_vop_link_removed(vp, dvp, linkzero) \ + VOP(vop_link_removed, vp)(VNHEAD(vp), dvp, linkzero) +#define bhv_vop_vnode_change(vp, cmd, val) \ + VOP(vop_vnode_change, vp)(VNHEAD(vp), cmd, val) +#define bhv_vop_toss_pages(vp, first, last, fiopt) \ + VOP(vop_tosspages, vp)(VNHEAD(vp), first, last, fiopt) +#define bhv_vop_flushinval_pages(vp, first, last, fiopt) \ + VOP(vop_flushinval_pages, vp)(VNHEAD(vp),first,last,fiopt) +#define bhv_vop_flush_pages(vp, first, last, flags, fiopt) \ + VOP(vop_flush_pages, vp)(VNHEAD(vp),first,last,flags,fiopt) +#define bhv_vop_ioctl(vp, inode, filp, fl, cmd, arg) \ + VOP(vop_ioctl, vp)(VNHEAD(vp),inode,filp,fl,cmd,arg) +#define bhv_vop_iflush(vp, flags) VOP(vop_iflush, vp)(VNHEAD(vp), flags) /* * Flags for read/write calls - same values as IRIX @@ -377,7 +326,7 @@ typedef struct vnodeops { #define IO_INVIS 0x00020 /* don't update inode timestamps */ /* - * Flags for VOP_IFLUSH call + * Flags for vop_iflush call */ #define FLUSH_SYNC 1 /* wait for flush to complete */ #define FLUSH_INODE 2 /* flush the inode itself */ @@ -385,8 +334,7 @@ typedef struct vnodeops { * this inode out to disk */ /* - * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and - * VOP_FLUSH_PAGES. + * Flush/Invalidate options for vop_toss/flush/flushinval_pages. */ #define FI_NONE 0 /* none */ #define FI_REMAPF 1 /* Do a remapf prior to the operation */ @@ -398,7 +346,7 @@ typedef struct vnodeops { * Vnode attributes. va_mask indicates those attributes the caller * wants to set or extract. */ -typedef struct vattr { +typedef struct bhv_vattr { int va_mask; /* bit-mask of attributes present */ mode_t va_mode; /* file access mode and type */ xfs_nlink_t va_nlink; /* number of references to file */ @@ -418,7 +366,7 @@ typedef struct vattr { u_long va_nextents; /* number of extents in file */ u_long va_anextents; /* number of attr extents in file */ prid_t va_projid; /* project id */ -} vattr_t; +} bhv_vattr_t; /* * setattr or getattr attributes @@ -492,29 +440,17 @@ typedef struct vattr { (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) extern void vn_init(void); -extern vnode_t *vn_initialize(struct inode *); - -/* - * vnode_map structures _must_ match vn_epoch and vnode structure sizes. - */ -typedef struct vnode_map { - vfs_t *v_vfsp; - vnumber_t v_number; /* in-core vnode number */ - xfs_ino_t v_ino; /* inode # */ -} vmap_t; - -#define VMAP(vp, vmap) {(vmap).v_vfsp = (vp)->v_vfsp, \ - (vmap).v_number = (vp)->v_number, \ - (vmap).v_ino = (vp)->v_inode.i_ino; } +extern bhv_vnode_t *vn_initialize(struct inode *); +extern int vn_revalidate(struct bhv_vnode *); +extern int __vn_revalidate(struct bhv_vnode *, bhv_vattr_t *); +extern void vn_revalidate_core(struct bhv_vnode *, bhv_vattr_t *); -extern int vn_revalidate(struct vnode *); -extern int __vn_revalidate(struct vnode *, vattr_t *); -extern void vn_revalidate_core(struct vnode *, vattr_t *); +extern void vn_iowait(struct bhv_vnode *vp); +extern void vn_iowake(struct bhv_vnode *vp); -extern void vn_iowait(struct vnode *vp); -extern void vn_iowake(struct vnode *vp); +extern void vn_ioerror(struct bhv_vnode *vp, int error, char *f, int l); -static inline int vn_count(struct vnode *vp) +static inline int vn_count(struct bhv_vnode *vp) { return atomic_read(&vn_to_inode(vp)->i_count); } @@ -522,7 +458,7 @@ static inline int vn_count(struct vnode *vp) /* * Vnode reference counting functions (and macros for compatibility). */ -extern vnode_t *vn_hold(struct vnode *); +extern bhv_vnode_t *vn_hold(struct bhv_vnode *); #if defined(XFS_VNODE_TRACE) #define VN_HOLD(vp) \ @@ -536,7 +472,7 @@ extern vnode_t *vn_hold(struct vnode *); #define VN_RELE(vp) (iput(vn_to_inode(vp))) #endif -static inline struct vnode *vn_grab(struct vnode *vp) +static inline struct bhv_vnode *vn_grab(struct bhv_vnode *vp) { struct inode *inode = igrab(vn_to_inode(vp)); return inode ? vn_from_inode(inode) : NULL; @@ -554,32 +490,39 @@ static inline struct vnode *vn_grab(struct vnode *vp) */ #define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock) #define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s) -#define VN_FLAGSET(vp,b) vn_flagset(vp,b) -#define VN_FLAGCLR(vp,b) vn_flagclr(vp,b) -static __inline__ void vn_flagset(struct vnode *vp, uint flag) +static __inline__ void vn_flagset(struct bhv_vnode *vp, uint flag) { spin_lock(&vp->v_lock); vp->v_flag |= flag; spin_unlock(&vp->v_lock); } -static __inline__ void vn_flagclr(struct vnode *vp, uint flag) +static __inline__ uint vn_flagclr(struct bhv_vnode *vp, uint flag) { + uint cleared; + spin_lock(&vp->v_lock); + cleared = (vp->v_flag & flag); vp->v_flag &= ~flag; spin_unlock(&vp->v_lock); + return cleared; } +#define VMODIFY(vp) vn_flagset(vp, VMODIFIED) +#define VUNMODIFY(vp) vn_flagclr(vp, VMODIFIED) +#define VTRUNCATE(vp) vn_flagset(vp, VTRUNCATED) +#define VUNTRUNCATE(vp) vn_flagclr(vp, VTRUNCATED) + /* * Dealing with bad inodes */ -static inline void vn_mark_bad(struct vnode *vp) +static inline void vn_mark_bad(struct bhv_vnode *vp) { make_bad_inode(vn_to_inode(vp)); } -static inline int VN_BAD(struct vnode *vp) +static inline int VN_BAD(struct bhv_vnode *vp) { return is_bad_inode(vn_to_inode(vp)); } @@ -587,18 +530,18 @@ static inline int VN_BAD(struct vnode *vp) /* * Extracting atime values in various formats */ -static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime) +static inline void vn_atime_to_bstime(bhv_vnode_t *vp, xfs_bstime_t *bs_atime) { bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec; bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec; } -static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts) +static inline void vn_atime_to_timespec(bhv_vnode_t *vp, struct timespec *ts) { *ts = vp->v_inode.i_atime; } -static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt) +static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) { *tt = vp->v_inode.i_atime.tv_sec; } @@ -610,11 +553,10 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt) #define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages) #define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \ PAGECACHE_TAG_DIRTY) -#define VMODIFY(vp) VN_FLAGSET(vp, VMODIFIED) -#define VUNMODIFY(vp) VN_FLAGCLR(vp, VMODIFIED) +#define VN_TRUNC(vp) ((vp)->v_flag & VTRUNCATED) /* - * Flags to VOP_SETATTR/VOP_GETATTR. + * Flags to vop_setattr/getattr. */ #define ATTR_UTIME 0x01 /* non-default utime(2) request */ #define ATTR_DMI 0x08 /* invocation from a DMI function */ @@ -624,7 +566,7 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt) #define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */ /* - * Flags to VOP_FSYNC and VOP_RECLAIM. + * Flags to vop_fsync/reclaim. */ #define FSYNC_NOWAIT 0 /* asynchronous flush */ #define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */ @@ -643,11 +585,11 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt) #define VNODE_KTRACE_REF 4 #define VNODE_KTRACE_RELE 5 -extern void vn_trace_entry(struct vnode *, const char *, inst_t *); -extern void vn_trace_exit(struct vnode *, const char *, inst_t *); -extern void vn_trace_hold(struct vnode *, char *, int, inst_t *); -extern void vn_trace_ref(struct vnode *, char *, int, inst_t *); -extern void vn_trace_rele(struct vnode *, char *, int, inst_t *); +extern void vn_trace_entry(struct bhv_vnode *, const char *, inst_t *); +extern void vn_trace_exit(struct bhv_vnode *, const char *, inst_t *); +extern void vn_trace_hold(struct bhv_vnode *, char *, int, inst_t *); +extern void vn_trace_ref(struct bhv_vnode *, char *, int, inst_t *); +extern void vn_trace_rele(struct bhv_vnode *, char *, int, inst_t *); #define VN_TRACE(vp) \ vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address) diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 772ac48329ea..3aa771531856 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -32,7 +31,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -444,7 +442,7 @@ xfs_qm_dqalloc( XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), - &map, &nmaps, &flist))) { + &map, &nmaps, &flist, NULL))) { goto error0; } ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); @@ -559,7 +557,7 @@ xfs_qm_dqtobp( error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, - NULL, 0, &map, &nmaps, NULL); + NULL, 0, &map, &nmaps, NULL, NULL); xfs_iunlock(quotip, XFS_ILOCK_SHARED); if (error) @@ -1261,7 +1259,7 @@ xfs_qm_dqflush( if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { - xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE); + xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE); return XFS_ERROR(EIO); } diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index c0c629663a5c..78d3ab95c5fd 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -119,7 +119,7 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) */ #define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\ (dqp)->dq_flags |= XFS_DQ_FLOCKED; } -#define xfs_dqfunlock(dqp) { ASSERT(valusema(&((dqp)->q_flock)) <= 0); \ +#define xfs_dqfunlock(dqp) { ASSERT(issemalocked(&((dqp)->q_flock))); \ vsema(&((dqp)->q_flock)); \ (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); } @@ -128,7 +128,7 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) #define XFS_DQ_PINUNLOCK(dqp, s) mutex_spinunlock( \ &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s) -#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (valusema(&((dqp)->q_flock)) <= 0) +#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock))) #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 546f48af882a..5b2dcc58b244 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -32,7 +31,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -248,7 +246,7 @@ xfs_qm_dquot_logitem_pushbuf( * inode flush completed and the inode was taken off the AIL. * So, just get out. */ - if ((valusema(&(dqp->q_flock)) > 0) || + if (!issemalocked(&(dqp->q_flock)) || ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); @@ -261,7 +259,7 @@ xfs_qm_dquot_logitem_pushbuf( if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && - (valusema(&(dqp->q_flock)) <= 0)); + issemalocked(&(dqp->q_flock))); qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 7fb5eca9bd50..e23e45535c48 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -24,7 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -33,7 +32,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -1603,7 +1601,7 @@ xfs_qm_dqiterate( maxlblkcnt - lblkno, XFS_BMAPI_METADATA, NULL, - 0, map, &nmaps, NULL); + 0, map, &nmaps, NULL, NULL); xfs_iunlock(qip, XFS_ILOCK_SHARED); if (error) break; @@ -1905,9 +1903,7 @@ xfs_qm_quotacheck( */ if ((error = xfs_bulkstat(mp, &lastino, &count, xfs_qm_dqusage_adjust, NULL, - structsz, NULL, - BULKSTAT_FG_IGET|BULKSTAT_FG_VFSLOCKED, - &done))) + structsz, NULL, BULKSTAT_FG_IGET, &done))) break; } while (! done); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index 6838b36d95a9..e95e99f7168f 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -24,7 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -33,7 +32,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -129,7 +127,7 @@ xfs_qm_parseargs( return XFS_ERROR(EINVAL); } - PVFS_PARSEARGS(BHV_NEXT(bhv), options, args, update, error); + error = bhv_next_vfs_parseargs(BHV_NEXT(bhv), options, args, update); if (!error && !referenced) bhv_remove_vfsops(bhvtovfs(bhv), VFS_POSITION_QM); return error; @@ -140,9 +138,8 @@ xfs_qm_showargs( struct bhv_desc *bhv, struct seq_file *m) { - struct vfs *vfsp = bhvtovfs(bhv); + struct bhv_vfs *vfsp = bhvtovfs(bhv); struct xfs_mount *mp = XFS_VFSTOM(vfsp); - int error; if (mp->m_qflags & XFS_UQUOTA_ACCT) { (mp->m_qflags & XFS_UQUOTA_ENFD) ? @@ -165,8 +162,7 @@ xfs_qm_showargs( if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) seq_puts(m, "," MNTOPT_NOQUOTA); - PVFS_SHOWARGS(BHV_NEXT(bhv), m, error); - return error; + return bhv_next_vfs_showargs(BHV_NEXT(bhv), m); } STATIC int @@ -175,14 +171,67 @@ xfs_qm_mount( struct xfs_mount_args *args, struct cred *cr) { - struct vfs *vfsp = bhvtovfs(bhv); + struct bhv_vfs *vfsp = bhvtovfs(bhv); struct xfs_mount *mp = XFS_VFSTOM(vfsp); - int error; if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA | XFSMNT_PQUOTA)) xfs_qm_mount_quotainit(mp, args->flags); - PVFS_MOUNT(BHV_NEXT(bhv), args, cr, error); - return error; + return bhv_next_vfs_mount(BHV_NEXT(bhv), args, cr); +} + +/* + * Directory tree accounting is implemented using project quotas, where + * the project identifier is inherited from parent directories. + * A statvfs (df, etc.) of a directory that is using project quota should + * return a statvfs of the project, not the entire filesystem. + * This makes such trees appear as if they are filesystems in themselves. + */ +STATIC int +xfs_qm_statvfs( + struct bhv_desc *bhv, + bhv_statvfs_t *statp, + struct bhv_vnode *vnode) +{ + xfs_mount_t *mp; + xfs_inode_t *ip; + xfs_dquot_t *dqp; + xfs_disk_dquot_t *dp; + __uint64_t limit; + int error; + + error = bhv_next_vfs_statvfs(BHV_NEXT(bhv), statp, vnode); + if (error || !vnode) + return error; + + mp = XFS_BHVTOM(bhv); + ip = xfs_vtoi(vnode); + + if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) + return 0; + if (!(mp->m_qflags & XFS_PQUOTA_ACCT)) + return 0; + if (!(mp->m_qflags & XFS_OQUOTA_ENFD)) + return 0; + + if (xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) + return 0; + dp = &dqp->q_core; + + limit = dp->d_blk_softlimit ? dp->d_blk_softlimit : dp->d_blk_hardlimit; + if (limit && statp->f_blocks > limit) { + statp->f_blocks = limit; + statp->f_bfree = (statp->f_blocks > dp->d_bcount) ? + (statp->f_blocks - dp->d_bcount) : 0; + } + limit = dp->d_ino_softlimit ? dp->d_ino_softlimit : dp->d_ino_hardlimit; + if (limit && statp->f_files > limit) { + statp->f_files = limit; + statp->f_ffree = (statp->f_files > dp->d_icount) ? + (statp->f_ffree - dp->d_icount) : 0; + } + + xfs_qm_dqput(dqp); + return 0; } STATIC int @@ -191,7 +240,7 @@ xfs_qm_syncall( int flags, cred_t *credp) { - struct vfs *vfsp = bhvtovfs(bhv); + struct bhv_vfs *vfsp = bhvtovfs(bhv); struct xfs_mount *mp = XFS_VFSTOM(vfsp); int error; @@ -210,8 +259,7 @@ xfs_qm_syncall( } } } - PVFS_SYNC(BHV_NEXT(bhv), flags, credp, error); - return error; + return bhv_next_vfs_sync(BHV_NEXT(bhv), flags, credp); } STATIC int @@ -346,11 +394,12 @@ STATIC struct xfs_qmops xfs_qmcore_xfs = { .xfs_dqtrxops = &xfs_trans_dquot_ops, }; -struct bhv_vfsops xfs_qmops = { { +struct bhv_module_vfsops xfs_qmops = { { BHV_IDENTITY_INIT(VFS_BHV_QM, VFS_POSITION_QM), .vfs_parseargs = xfs_qm_parseargs, .vfs_showargs = xfs_qm_showargs, .vfs_mount = xfs_qm_mount, + .vfs_statvfs = xfs_qm_statvfs, .vfs_sync = xfs_qm_syncall, .vfs_quotactl = xfs_qm_quotactl, }, }; diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c index 0570f7733550..6f858fb81a36 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/quota/xfs_qm_stats.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -32,7 +31,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index c55db463bbf2..ed620c4d1594 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -26,7 +26,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -35,7 +34,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -91,8 +89,8 @@ xfs_qm_quotactl( xfs_caddr_t addr) { xfs_mount_t *mp; + bhv_vfs_t *vfsp; int error; - struct vfs *vfsp; vfsp = bhvtovfs(bdp); mp = XFS_VFSTOM(vfsp); @@ -1035,7 +1033,7 @@ xfs_qm_dqrele_all_inodes( { xfs_inode_t *ip, *topino; uint ireclaims; - vnode_t *vp; + bhv_vnode_t *vp; boolean_t vnode_refd; ASSERT(mp->m_quotainfo); diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 9168918db252..0242e9666e8e 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -33,7 +32,6 @@ #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" #include "xfs_attr_sf.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index b08b3d9345b7..36fbeccdc722 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -47,7 +47,7 @@ cmn_err(register int level, char *fmt, ...) va_start(ap, fmt); if (*fmt == '!') fp++; len = vsprintf(message, fp, ap); - if (message[len-1] != '\n') + if (level != CE_DEBUG && message[len-1] != '\n') strcat(message, "\n"); printk("%s%s", err_level[level], message); va_end(ap); @@ -68,7 +68,7 @@ icmn_err(register int level, char *fmt, va_list ap) level = XFS_MAX_ERR_LEVEL; spin_lock_irqsave(&xfs_err_lock,flags); len = vsprintf(message, fmt, ap); - if (message[len-1] != '\n') + if (level != CE_DEBUG && message[len-1] != '\n') strcat(message, "\n"); spin_unlock_irqrestore(&xfs_err_lock,flags); printk("%s%s", err_level[level], message); diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h index e3bf58112e7e..4f54dca662a8 100644 --- a/fs/xfs/support/debug.h +++ b/fs/xfs/support/debug.h @@ -33,9 +33,6 @@ extern void cmn_err(int, char *, ...) __attribute__ ((format (printf, 2, 3))); extern void assfail(char *expr, char *f, int l); -#define prdev(fmt,targ,args...) \ - printk("Device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args) - #define ASSERT_ALWAYS(expr) \ (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 2539af34eb63..4b0cb474be4c 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -21,12 +21,10 @@ #include "xfs_bit.h" #include "xfs_inum.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -39,15 +37,15 @@ #include <linux/capability.h> #include <linux/posix_acl_xattr.h> -STATIC int xfs_acl_setmode(vnode_t *, xfs_acl_t *, int *); +STATIC int xfs_acl_setmode(bhv_vnode_t *, xfs_acl_t *, int *); STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); STATIC void xfs_acl_get_endian(xfs_acl_t *); STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); STATIC int xfs_acl_invalid(xfs_acl_t *); STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); -STATIC void xfs_acl_get_attr(vnode_t *, xfs_acl_t *, int, int, int *); -STATIC void xfs_acl_set_attr(vnode_t *, xfs_acl_t *, int, int *); -STATIC int xfs_acl_allow_set(vnode_t *, int); +STATIC void xfs_acl_get_attr(bhv_vnode_t *, xfs_acl_t *, int, int, int *); +STATIC void xfs_acl_set_attr(bhv_vnode_t *, xfs_acl_t *, int, int *); +STATIC int xfs_acl_allow_set(bhv_vnode_t *, int); kmem_zone_t *xfs_acl_zone; @@ -57,7 +55,7 @@ kmem_zone_t *xfs_acl_zone; */ int xfs_acl_vhasacl_access( - vnode_t *vp) + bhv_vnode_t *vp) { int error; @@ -70,7 +68,7 @@ xfs_acl_vhasacl_access( */ int xfs_acl_vhasacl_default( - vnode_t *vp) + bhv_vnode_t *vp) { int error; @@ -209,7 +207,7 @@ posix_acl_xfs_to_xattr( int xfs_acl_vget( - vnode_t *vp, + bhv_vnode_t *vp, void *acl, size_t size, int kind) @@ -241,10 +239,10 @@ xfs_acl_vget( goto out; } if (kind == _ACL_TYPE_ACCESS) { - vattr_t va; + bhv_vattr_t va; va.va_mask = XFS_AT_MODE; - VOP_GETATTR(vp, &va, 0, sys_cred, error); + error = bhv_vop_getattr(vp, &va, 0, sys_cred); if (error) goto out; xfs_acl_sync_mode(va.va_mode, xfs_acl); @@ -260,7 +258,7 @@ out: int xfs_acl_vremove( - vnode_t *vp, + bhv_vnode_t *vp, int kind) { int error; @@ -268,9 +266,9 @@ xfs_acl_vremove( VN_HOLD(vp); error = xfs_acl_allow_set(vp, kind); if (!error) { - VOP_ATTR_REMOVE(vp, kind == _ACL_TYPE_DEFAULT? - SGI_ACL_DEFAULT: SGI_ACL_FILE, - ATTR_ROOT, sys_cred, error); + error = bhv_vop_attr_remove(vp, kind == _ACL_TYPE_DEFAULT? + SGI_ACL_DEFAULT: SGI_ACL_FILE, + ATTR_ROOT, sys_cred); if (error == ENOATTR) error = 0; /* 'scool */ } @@ -280,7 +278,7 @@ xfs_acl_vremove( int xfs_acl_vset( - vnode_t *vp, + bhv_vnode_t *vp, void *acl, size_t size, int kind) @@ -370,10 +368,10 @@ xfs_acl_iaccess( STATIC int xfs_acl_allow_set( - vnode_t *vp, + bhv_vnode_t *vp, int kind) { - vattr_t va; + bhv_vattr_t va; int error; if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) @@ -383,7 +381,7 @@ xfs_acl_allow_set( if (vp->v_vfsp->vfs_flag & VFS_RDONLY) return EROFS; va.va_mask = XFS_AT_UID; - VOP_GETATTR(vp, &va, 0, NULL, error); + error = bhv_vop_getattr(vp, &va, 0, NULL); if (error) return error; if (va.va_uid != current->fsuid && !capable(CAP_FOWNER)) @@ -606,7 +604,7 @@ xfs_acl_get_endian( */ STATIC void xfs_acl_get_attr( - vnode_t *vp, + bhv_vnode_t *vp, xfs_acl_t *aclp, int kind, int flags, @@ -616,9 +614,9 @@ xfs_acl_get_attr( ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); flags |= ATTR_ROOT; - VOP_ATTR_GET(vp, - kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE : SGI_ACL_DEFAULT, - (char *)aclp, &len, flags, sys_cred, *error); + *error = bhv_vop_attr_get(vp, kind == _ACL_TYPE_ACCESS ? + SGI_ACL_FILE : SGI_ACL_DEFAULT, + (char *)aclp, &len, flags, sys_cred); if (*error || (flags & ATTR_KERNOVAL)) return; xfs_acl_get_endian(aclp); @@ -629,7 +627,7 @@ xfs_acl_get_attr( */ STATIC void xfs_acl_set_attr( - vnode_t *vp, + bhv_vnode_t *vp, xfs_acl_t *aclp, int kind, int *error) @@ -654,19 +652,19 @@ xfs_acl_set_attr( INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); } INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); - VOP_ATTR_SET(vp, - kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE: SGI_ACL_DEFAULT, - (char *)newacl, len, ATTR_ROOT, sys_cred, *error); + *error = bhv_vop_attr_set(vp, kind == _ACL_TYPE_ACCESS ? + SGI_ACL_FILE: SGI_ACL_DEFAULT, + (char *)newacl, len, ATTR_ROOT, sys_cred); _ACL_FREE(newacl); } int xfs_acl_vtoacl( - vnode_t *vp, + bhv_vnode_t *vp, xfs_acl_t *access_acl, xfs_acl_t *default_acl) { - vattr_t va; + bhv_vattr_t va; int error = 0; if (access_acl) { @@ -678,7 +676,7 @@ xfs_acl_vtoacl( if (!error) { /* Got the ACL, need the mode... */ va.va_mask = XFS_AT_MODE; - VOP_GETATTR(vp, &va, 0, sys_cred, error); + error = bhv_vop_getattr(vp, &va, 0, sys_cred); } if (error) @@ -701,8 +699,8 @@ xfs_acl_vtoacl( */ int xfs_acl_inherit( - vnode_t *vp, - vattr_t *vap, + bhv_vnode_t *vp, + bhv_vattr_t *vap, xfs_acl_t *pdaclp) { xfs_acl_t *cacl; @@ -757,11 +755,11 @@ xfs_acl_inherit( */ STATIC int xfs_acl_setmode( - vnode_t *vp, + bhv_vnode_t *vp, xfs_acl_t *acl, int *basicperms) { - vattr_t va; + bhv_vattr_t va; xfs_acl_entry_t *ap; xfs_acl_entry_t *gap = NULL; int i, error, nomask = 1; @@ -776,7 +774,7 @@ xfs_acl_setmode( * mode. The m:: bits take precedence over the g:: bits. */ va.va_mask = XFS_AT_MODE; - VOP_GETATTR(vp, &va, 0, sys_cred, error); + error = bhv_vop_getattr(vp, &va, 0, sys_cred); if (error) return error; @@ -810,8 +808,7 @@ xfs_acl_setmode( if (gap && nomask) va.va_mode |= gap->ae_perm << 3; - VOP_SETATTR(vp, &va, 0, sys_cred, error); - return error; + return bhv_vop_setattr(vp, &va, 0, sys_cred); } /* diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 538d0d65b04c..f853cf1a6270 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -50,7 +50,7 @@ typedef struct xfs_acl { #ifdef CONFIG_XFS_POSIX_ACL struct vattr; -struct vnode; +struct bhv_vnode; struct xfs_inode; extern struct kmem_zone *xfs_acl_zone; @@ -58,14 +58,14 @@ extern struct kmem_zone *xfs_acl_zone; (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name)) #define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone) -extern int xfs_acl_inherit(struct vnode *, struct vattr *, xfs_acl_t *); +extern int xfs_acl_inherit(struct bhv_vnode *, struct bhv_vattr *, xfs_acl_t *); extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); -extern int xfs_acl_vtoacl(struct vnode *, xfs_acl_t *, xfs_acl_t *); -extern int xfs_acl_vhasacl_access(struct vnode *); -extern int xfs_acl_vhasacl_default(struct vnode *); -extern int xfs_acl_vset(struct vnode *, void *, size_t, int); -extern int xfs_acl_vget(struct vnode *, void *, size_t, int); -extern int xfs_acl_vremove(struct vnode *vp, int); +extern int xfs_acl_vtoacl(struct bhv_vnode *, xfs_acl_t *, xfs_acl_t *); +extern int xfs_acl_vhasacl_access(struct bhv_vnode *); +extern int xfs_acl_vhasacl_default(struct bhv_vnode *); +extern int xfs_acl_vset(struct bhv_vnode *, void *, size_t, int); +extern int xfs_acl_vget(struct bhv_vnode *, void *, size_t, int); +extern int xfs_acl_vremove(struct bhv_vnode *, int); #define _ACL_TYPE_ACCESS 1 #define _ACL_TYPE_DEFAULT 2 diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 8558226281c4..eef6763f3a67 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -1862,7 +1860,7 @@ xfs_alloc_fix_freelist( (pag->pagf_longest - delta) : (pag->pagf_flcount > 0 || pag->pagf_longest > 0); if (args->minlen + args->alignment + args->minalignslop - 1 > longest || - (args->minleft && + (!(flags & XFS_ALLOC_FLAG_FREEING) && (int)(pag->pagf_freeblks + pag->pagf_flcount - need - args->total) < (int)args->minleft)) { @@ -1898,7 +1896,7 @@ xfs_alloc_fix_freelist( longest = (longest > delta) ? (longest - delta) : (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); if (args->minlen + args->alignment + args->minalignslop - 1 > longest || - (args->minleft && + (!(flags & XFS_ALLOC_FLAG_FREEING) && (int)(be32_to_cpu(agf->agf_freeblks) + be32_to_cpu(agf->agf_flcount) - need - args->total) < (int)args->minleft)) { @@ -1951,8 +1949,14 @@ xfs_alloc_fix_freelist( * the restrictions correctly. Can happen for free calls * on a completely full ag. */ - if (targs.agbno == NULLAGBLOCK) + if (targs.agbno == NULLAGBLOCK) { + if (!(flags & XFS_ALLOC_FLAG_FREEING)) { + xfs_trans_brelse(tp, agflbp); + args->agbp = NULL; + return 0; + } break; + } /* * Put each allocated block on the list. */ @@ -2360,8 +2364,19 @@ xfs_alloc_vextent( if (args->agno == sagno && type == XFS_ALLOCTYPE_START_BNO) args->type = XFS_ALLOCTYPE_THIS_AG; - if (++(args->agno) == mp->m_sb.sb_agcount) - args->agno = 0; + /* + * For the first allocation, we can try any AG to get + * space. However, if we already have allocated a + * block, we don't want to try AGs whose number is below + * sagno. Otherwise, we may end up with out-of-order + * locking of AGF, which might cause deadlock. + */ + if (++(args->agno) == mp->m_sb.sb_agcount) { + if (args->firstblock != NULLFSBLOCK) + args->agno = sagno; + else + args->agno = 0; + } /* * Reached the starting a.g., must either be done * or switch to non-trylock mode. @@ -2443,7 +2458,7 @@ xfs_free_extent( args.minlen = args.minleft = args.minalignslop = 0; down_read(&args.mp->m_peraglock); args.pag = &args.mp->m_perag[args.agno]; - if ((error = xfs_alloc_fix_freelist(&args, 0))) + if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) goto error0; #ifdef DEBUG ASSERT(args.agbp != NULL); diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 2d1f8928b267..650591f999ae 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -41,6 +41,7 @@ typedef enum xfs_alloctype * Flags for xfs_alloc_fix_freelist. */ #define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ +#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ /* * Argument structure for xfs_alloc routines. @@ -70,6 +71,7 @@ typedef struct xfs_alloc_arg { char wasfromfl; /* set if allocation is from freelist */ char isfl; /* set if is freelist blocks - !acctg */ char userdata; /* set if this is user data */ + xfs_fsblock_t firstblock; /* io first block allocated */ } xfs_alloc_arg_t; /* diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index a1d92da86ccd..7446556e8021 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index b6e1e02bbb28..1a2101043275 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -27,7 +27,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" @@ -35,7 +34,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -1910,7 +1908,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno, args->rmtblkcnt, XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - NULL, 0, map, &nmap, NULL); + NULL, 0, map, &nmap, NULL, NULL); if (error) return(error); ASSERT(nmap >= 1); @@ -1988,7 +1986,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, args->firstblock, args->total, &map, &nmap, - args->flist); + args->flist, NULL); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, *args->firstblock, &committed); @@ -2039,7 +2037,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno, args->rmtblkcnt, XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - args->firstblock, 0, &map, &nmap, NULL); + args->firstblock, 0, &map, &nmap, + NULL, NULL); if (error) { return(error); } @@ -2104,7 +2103,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) args->rmtblkcnt, XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, args->firstblock, 0, &map, &nmap, - args->flist); + args->flist, NULL); if (error) { return(error); } @@ -2142,7 +2141,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) XFS_BMAP_INIT(args->flist, args->firstblock); error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - 1, args->firstblock, args->flist, &done); + 1, args->firstblock, args->flist, + NULL, &done); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, *args->firstblock, &committed); @@ -2322,56 +2322,56 @@ xfs_attr_trace_enter(int type, char *where, STATIC int posix_acl_access_set( - vnode_t *vp, char *name, void *data, size_t size, int xflags) + bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) { return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS); } STATIC int posix_acl_access_remove( - struct vnode *vp, char *name, int xflags) + bhv_vnode_t *vp, char *name, int xflags) { return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); } STATIC int posix_acl_access_get( - vnode_t *vp, char *name, void *data, size_t size, int xflags) + bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) { return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS); } STATIC int posix_acl_access_exists( - vnode_t *vp) + bhv_vnode_t *vp) { return xfs_acl_vhasacl_access(vp); } STATIC int posix_acl_default_set( - vnode_t *vp, char *name, void *data, size_t size, int xflags) + bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) { return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT); } STATIC int posix_acl_default_get( - vnode_t *vp, char *name, void *data, size_t size, int xflags) + bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) { return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT); } STATIC int posix_acl_default_remove( - struct vnode *vp, char *name, int xflags) + bhv_vnode_t *vp, char *name, int xflags) { return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT); } STATIC int posix_acl_default_exists( - vnode_t *vp) + bhv_vnode_t *vp) { return xfs_acl_vhasacl_default(vp); } @@ -2404,21 +2404,18 @@ STATIC struct attrnames *attr_system_names[] = STATIC int attr_generic_set( - struct vnode *vp, char *name, void *data, size_t size, int xflags) + bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) { - int error; - - VOP_ATTR_SET(vp, name, data, size, xflags, NULL, error); - return -error; + return -bhv_vop_attr_set(vp, name, data, size, xflags, NULL); } STATIC int attr_generic_get( - struct vnode *vp, char *name, void *data, size_t size, int xflags) + bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) { int error, asize = size; - VOP_ATTR_GET(vp, name, data, &asize, xflags, NULL, error); + error = bhv_vop_attr_get(vp, name, data, &asize, xflags, NULL); if (!error) return asize; return -error; @@ -2426,12 +2423,9 @@ attr_generic_get( STATIC int attr_generic_remove( - struct vnode *vp, char *name, int xflags) + bhv_vnode_t *vp, char *name, int xflags) { - int error; - - VOP_ATTR_REMOVE(vp, name, xflags, NULL, error); - return -error; + return -bhv_vop_attr_remove(vp, name, xflags, NULL); } STATIC int @@ -2459,7 +2453,7 @@ attr_generic_listadd( STATIC int attr_system_list( - struct vnode *vp, + bhv_vnode_t *vp, void *data, size_t size, ssize_t *result) @@ -2481,12 +2475,12 @@ attr_system_list( int attr_generic_list( - struct vnode *vp, void *data, size_t size, int xflags, ssize_t *result) + bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result) { attrlist_cursor_kern_t cursor = { 0 }; int error; - VOP_ATTR_LIST(vp, data, size, xflags, &cursor, NULL, error); + error = bhv_vop_attr_list(vp, data, size, xflags, &cursor, NULL); if (error > 0) return -error; *result = -error; @@ -2514,7 +2508,7 @@ attr_lookup_namespace( */ STATIC int attr_user_capable( - struct vnode *vp, + bhv_vnode_t *vp, cred_t *cred) { struct inode *inode = vn_to_inode(vp); @@ -2532,7 +2526,7 @@ attr_user_capable( STATIC int attr_trusted_capable( - struct vnode *vp, + bhv_vnode_t *vp, cred_t *cred) { struct inode *inode = vn_to_inode(vp); @@ -2546,7 +2540,7 @@ attr_trusted_capable( STATIC int attr_secure_capable( - struct vnode *vp, + bhv_vnode_t *vp, cred_t *cred) { return -ENOSECURITY; @@ -2554,7 +2548,7 @@ attr_secure_capable( STATIC int attr_system_set( - struct vnode *vp, char *name, void *data, size_t size, int xflags) + bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) { attrnames_t *namesp; int error; @@ -2573,7 +2567,7 @@ attr_system_set( STATIC int attr_system_get( - struct vnode *vp, char *name, void *data, size_t size, int xflags) + bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) { attrnames_t *namesp; @@ -2585,7 +2579,7 @@ attr_system_get( STATIC int attr_system_remove( - struct vnode *vp, char *name, int xflags) + bhv_vnode_t *vp, char *name, int xflags) { attrnames_t *namesp; diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index b2c7b9fcded3..981633f6c077 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -36,13 +36,13 @@ *========================================================================*/ struct cred; -struct vnode; +struct bhv_vnode; -typedef int (*attrset_t)(struct vnode *, char *, void *, size_t, int); -typedef int (*attrget_t)(struct vnode *, char *, void *, size_t, int); -typedef int (*attrremove_t)(struct vnode *, char *, int); -typedef int (*attrexists_t)(struct vnode *); -typedef int (*attrcapable_t)(struct vnode *, struct cred *); +typedef int (*attrset_t)(struct bhv_vnode *, char *, void *, size_t, int); +typedef int (*attrget_t)(struct bhv_vnode *, char *, void *, size_t, int); +typedef int (*attrremove_t)(struct bhv_vnode *, char *, int); +typedef int (*attrexists_t)(struct bhv_vnode *); +typedef int (*attrcapable_t)(struct bhv_vnode *, struct cred *); typedef struct attrnames { char * attr_name; @@ -63,7 +63,7 @@ extern struct attrnames attr_trusted; extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT]; extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int); -extern int attr_generic_list(struct vnode *, void *, size_t, int, ssize_t *); +extern int attr_generic_list(struct bhv_vnode *, void *, size_t, int, ssize_t *); #define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */ #define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */ diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 9462be86aa14..9455051f0120 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -24,7 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" @@ -34,7 +33,6 @@ #include "xfs_ialloc_btree.h" #include "xfs_alloc.h" #include "xfs_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -2990,7 +2988,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, nmap = 1; error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt, XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - NULL, 0, &map, &nmap, NULL); + NULL, 0, &map, &nmap, NULL, NULL); if (error) { return(error); } diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 26939d364bc4..3a6137539064 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -24,13 +24,11 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -40,13 +38,15 @@ #include "xfs_mount.h" #include "xfs_ialloc.h" #include "xfs_itable.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" #include "xfs_inode_item.h" #include "xfs_extfree_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_dir_leaf.h" #include "xfs_attr_leaf.h" #include "xfs_rw.h" #include "xfs_quota.h" @@ -101,6 +101,7 @@ xfs_bmap_add_extent( xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ int *logflagsp, /* inode logging flags */ + xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork, /* data or attr fork */ int rsvd); /* OK to allocate reserved blocks */ @@ -118,6 +119,7 @@ xfs_bmap_add_extent_delay_real( xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ int *logflagsp, /* inode logging flags */ + xfs_extdelta_t *delta, /* Change made to incore extents */ int rsvd); /* OK to allocate reserved blocks */ /* @@ -131,6 +133,7 @@ xfs_bmap_add_extent_hole_delay( xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp,/* inode logging flags */ + xfs_extdelta_t *delta, /* Change made to incore extents */ int rsvd); /* OK to allocate reserved blocks */ /* @@ -144,6 +147,7 @@ xfs_bmap_add_extent_hole_real( xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp, /* inode logging flags */ + xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork); /* data or attr fork */ /* @@ -156,7 +160,8 @@ xfs_bmap_add_extent_unwritten_real( xfs_extnum_t idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp); /* inode logging flags */ + int *logflagsp, /* inode logging flags */ + xfs_extdelta_t *delta); /* Change made to incore extents */ /* * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. @@ -203,6 +208,7 @@ xfs_bmap_del_extent( xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp,/* inode logging flags */ + xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork, /* data or attr fork */ int rsvd); /* OK to allocate reserved blocks */ @@ -510,7 +516,7 @@ xfs_bmap_add_attrfork_local( dargs.total = mp->m_dirblkfsbs; dargs.whichfork = XFS_DATA_FORK; dargs.trans = tp; - error = XFS_DIR_SHORTFORM_TO_SINGLE(mp, &dargs); + error = xfs_dir2_sf_to_block(&dargs); } else error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, XFS_DATA_FORK); @@ -530,6 +536,7 @@ xfs_bmap_add_extent( xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ int *logflagsp, /* inode logging flags */ + xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork, /* data or attr fork */ int rsvd) /* OK to use reserved data blocks */ { @@ -567,6 +574,15 @@ xfs_bmap_add_extent( logflags = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); } else logflags = 0; + /* DELTA: single new extent */ + if (delta) { + if (delta->xed_startoff > new->br_startoff) + delta->xed_startoff = new->br_startoff; + if (delta->xed_blockcount < + new->br_startoff + new->br_blockcount) + delta->xed_blockcount = new->br_startoff + + new->br_blockcount; + } } /* * Any kind of new delayed allocation goes here. @@ -576,7 +592,7 @@ xfs_bmap_add_extent( ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, cur, new, - &logflags, rsvd))) + &logflags, delta, rsvd))) goto done; } /* @@ -587,7 +603,7 @@ xfs_bmap_add_extent( ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, - &logflags, whichfork))) + &logflags, delta, whichfork))) goto done; } else { xfs_bmbt_irec_t prev; /* old extent at offset idx */ @@ -612,17 +628,17 @@ xfs_bmap_add_extent( XFS_BTCUR_BPRV_WASDEL); if ((error = xfs_bmap_add_extent_delay_real(ip, idx, &cur, new, &da_new, first, flist, - &logflags, rsvd))) + &logflags, delta, rsvd))) goto done; } else if (new->br_state == XFS_EXT_NORM) { ASSERT(new->br_state == XFS_EXT_NORM); if ((error = xfs_bmap_add_extent_unwritten_real( - ip, idx, &cur, new, &logflags))) + ip, idx, &cur, new, &logflags, delta))) goto done; } else { ASSERT(new->br_state == XFS_EXT_UNWRITTEN); if ((error = xfs_bmap_add_extent_unwritten_real( - ip, idx, &cur, new, &logflags))) + ip, idx, &cur, new, &logflags, delta))) goto done; } ASSERT(*curp == cur || *curp == NULL); @@ -635,7 +651,7 @@ xfs_bmap_add_extent( ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, - new, &logflags, whichfork))) + new, &logflags, delta, whichfork))) goto done; } } @@ -700,6 +716,7 @@ xfs_bmap_add_extent_delay_real( xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ int *logflagsp, /* inode logging flags */ + xfs_extdelta_t *delta, /* Change made to incore extents */ int rsvd) /* OK to use reserved data block allocation */ { xfs_btree_cur_t *cur; /* btree cursor */ @@ -716,8 +733,8 @@ xfs_bmap_add_extent_delay_real( /* left is 0, right is 1, prev is 2 */ int rval=0; /* return value (logging flags) */ int state = 0;/* state bits, accessed thru macros */ - xfs_filblks_t temp; /* value for dnew calculations */ - xfs_filblks_t temp2; /* value for dnew calculations */ + xfs_filblks_t temp=0; /* value for dnew calculations */ + xfs_filblks_t temp2=0;/* value for dnew calculations */ int tmp_rval; /* partial logging flags */ enum { /* bit number definitions for state */ LEFT_CONTIG, RIGHT_CONTIG, @@ -839,6 +856,11 @@ xfs_bmap_add_extent_delay_real( goto done; } *dnew = 0; + /* DELTA: Three in-core extents are replaced by one. */ + temp = LEFT.br_startoff; + temp2 = LEFT.br_blockcount + + PREV.br_blockcount + + RIGHT.br_blockcount; break; case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): @@ -872,6 +894,10 @@ xfs_bmap_add_extent_delay_real( goto done; } *dnew = 0; + /* DELTA: Two in-core extents are replaced by one. */ + temp = LEFT.br_startoff; + temp2 = LEFT.br_blockcount + + PREV.br_blockcount; break; case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): @@ -906,6 +932,10 @@ xfs_bmap_add_extent_delay_real( goto done; } *dnew = 0; + /* DELTA: Two in-core extents are replaced by one. */ + temp = PREV.br_startoff; + temp2 = PREV.br_blockcount + + RIGHT.br_blockcount; break; case MASK2(LEFT_FILLING, RIGHT_FILLING): @@ -936,6 +966,9 @@ xfs_bmap_add_extent_delay_real( ASSERT(i == 1); } *dnew = 0; + /* DELTA: The in-core extent described by new changed type. */ + temp = new->br_startoff; + temp2 = new->br_blockcount; break; case MASK2(LEFT_FILLING, LEFT_CONTIG): @@ -978,6 +1011,10 @@ xfs_bmap_add_extent_delay_real( xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx, XFS_DATA_FORK); *dnew = temp; + /* DELTA: The boundary between two in-core extents moved. */ + temp = LEFT.br_startoff; + temp2 = LEFT.br_blockcount + + PREV.br_blockcount; break; case MASK(LEFT_FILLING): @@ -1025,6 +1062,9 @@ xfs_bmap_add_extent_delay_real( xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1, XFS_DATA_FORK); *dnew = temp; + /* DELTA: One in-core extent is split in two. */ + temp = PREV.br_startoff; + temp2 = PREV.br_blockcount; break; case MASK2(RIGHT_FILLING, RIGHT_CONTIG): @@ -1067,6 +1107,10 @@ xfs_bmap_add_extent_delay_real( xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx, XFS_DATA_FORK); *dnew = temp; + /* DELTA: The boundary between two in-core extents moved. */ + temp = PREV.br_startoff; + temp2 = PREV.br_blockcount + + RIGHT.br_blockcount; break; case MASK(RIGHT_FILLING): @@ -1112,6 +1156,9 @@ xfs_bmap_add_extent_delay_real( xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK); *dnew = temp; + /* DELTA: One in-core extent is split in two. */ + temp = PREV.br_startoff; + temp2 = PREV.br_blockcount; break; case 0: @@ -1194,6 +1241,9 @@ xfs_bmap_add_extent_delay_real( xfs_bmap_trace_post_update(fname, "0", ip, idx + 2, XFS_DATA_FORK); *dnew = temp + temp2; + /* DELTA: One in-core extent is split in three. */ + temp = PREV.br_startoff; + temp2 = PREV.br_blockcount; break; case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): @@ -1209,6 +1259,13 @@ xfs_bmap_add_extent_delay_real( ASSERT(0); } *curp = cur; + if (delta) { + temp2 += temp; + if (delta->xed_startoff > temp) + delta->xed_startoff = temp; + if (delta->xed_blockcount < temp2) + delta->xed_blockcount = temp2; + } done: *logflagsp = rval; return error; @@ -1235,7 +1292,8 @@ xfs_bmap_add_extent_unwritten_real( xfs_extnum_t idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp) /* inode logging flags */ + int *logflagsp, /* inode logging flags */ + xfs_extdelta_t *delta) /* Change made to incore extents */ { xfs_btree_cur_t *cur; /* btree cursor */ xfs_bmbt_rec_t *ep; /* extent entry for idx */ @@ -1252,6 +1310,8 @@ xfs_bmap_add_extent_unwritten_real( /* left is 0, right is 1, prev is 2 */ int rval=0; /* return value (logging flags) */ int state = 0;/* state bits, accessed thru macros */ + xfs_filblks_t temp=0; + xfs_filblks_t temp2=0; enum { /* bit number definitions for state */ LEFT_CONTIG, RIGHT_CONTIG, LEFT_FILLING, RIGHT_FILLING, @@ -1380,6 +1440,11 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_blockcount, LEFT.br_state))) goto done; } + /* DELTA: Three in-core extents are replaced by one. */ + temp = LEFT.br_startoff; + temp2 = LEFT.br_blockcount + + PREV.br_blockcount + + RIGHT.br_blockcount; break; case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): @@ -1419,6 +1484,10 @@ xfs_bmap_add_extent_unwritten_real( LEFT.br_state))) goto done; } + /* DELTA: Two in-core extents are replaced by one. */ + temp = LEFT.br_startoff; + temp2 = LEFT.br_blockcount + + PREV.br_blockcount; break; case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): @@ -1459,6 +1528,10 @@ xfs_bmap_add_extent_unwritten_real( newext))) goto done; } + /* DELTA: Two in-core extents are replaced by one. */ + temp = PREV.br_startoff; + temp2 = PREV.br_blockcount + + RIGHT.br_blockcount; break; case MASK2(LEFT_FILLING, RIGHT_FILLING): @@ -1487,6 +1560,9 @@ xfs_bmap_add_extent_unwritten_real( newext))) goto done; } + /* DELTA: The in-core extent described by new changed type. */ + temp = new->br_startoff; + temp2 = new->br_blockcount; break; case MASK2(LEFT_FILLING, LEFT_CONTIG): @@ -1534,6 +1610,10 @@ xfs_bmap_add_extent_unwritten_real( LEFT.br_state)) goto done; } + /* DELTA: The boundary between two in-core extents moved. */ + temp = LEFT.br_startoff; + temp2 = LEFT.br_blockcount + + PREV.br_blockcount; break; case MASK(LEFT_FILLING): @@ -1574,6 +1654,9 @@ xfs_bmap_add_extent_unwritten_real( goto done; ASSERT(i == 1); } + /* DELTA: One in-core extent is split in two. */ + temp = PREV.br_startoff; + temp2 = PREV.br_blockcount; break; case MASK2(RIGHT_FILLING, RIGHT_CONTIG): @@ -1617,6 +1700,10 @@ xfs_bmap_add_extent_unwritten_real( newext))) goto done; } + /* DELTA: The boundary between two in-core extents moved. */ + temp = PREV.br_startoff; + temp2 = PREV.br_blockcount + + RIGHT.br_blockcount; break; case MASK(RIGHT_FILLING): @@ -1657,6 +1744,9 @@ xfs_bmap_add_extent_unwritten_real( goto done; ASSERT(i == 1); } + /* DELTA: One in-core extent is split in two. */ + temp = PREV.br_startoff; + temp2 = PREV.br_blockcount; break; case 0: @@ -1710,6 +1800,9 @@ xfs_bmap_add_extent_unwritten_real( goto done; ASSERT(i == 1); } + /* DELTA: One in-core extent is split in three. */ + temp = PREV.br_startoff; + temp2 = PREV.br_blockcount; break; case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): @@ -1725,6 +1818,13 @@ xfs_bmap_add_extent_unwritten_real( ASSERT(0); } *curp = cur; + if (delta) { + temp2 += temp; + if (delta->xed_startoff > temp) + delta->xed_startoff = temp; + if (delta->xed_blockcount < temp2) + delta->xed_blockcount = temp2; + } done: *logflagsp = rval; return error; @@ -1753,6 +1853,7 @@ xfs_bmap_add_extent_hole_delay( xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp, /* inode logging flags */ + xfs_extdelta_t *delta, /* Change made to incore extents */ int rsvd) /* OK to allocate reserved blocks */ { xfs_bmbt_rec_t *ep; /* extent record for idx */ @@ -1765,7 +1866,8 @@ xfs_bmap_add_extent_hole_delay( xfs_filblks_t oldlen=0; /* old indirect size */ xfs_bmbt_irec_t right; /* right neighbor extent entry */ int state; /* state bits, accessed thru macros */ - xfs_filblks_t temp; /* temp for indirect calculations */ + xfs_filblks_t temp=0; /* temp for indirect calculations */ + xfs_filblks_t temp2=0; enum { /* bit number definitions for state */ LEFT_CONTIG, RIGHT_CONTIG, LEFT_DELAY, RIGHT_DELAY, @@ -1844,6 +1946,9 @@ xfs_bmap_add_extent_hole_delay( XFS_DATA_FORK); xfs_iext_remove(ifp, idx, 1); ip->i_df.if_lastex = idx - 1; + /* DELTA: Two in-core extents were replaced by one. */ + temp2 = temp; + temp = left.br_startoff; break; case MASK(LEFT_CONTIG): @@ -1864,6 +1969,9 @@ xfs_bmap_add_extent_hole_delay( xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, XFS_DATA_FORK); ip->i_df.if_lastex = idx - 1; + /* DELTA: One in-core extent grew into a hole. */ + temp2 = temp; + temp = left.br_startoff; break; case MASK(RIGHT_CONTIG): @@ -1881,6 +1989,9 @@ xfs_bmap_add_extent_hole_delay( NULLSTARTBLOCK((int)newlen), temp, right.br_state); xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK); ip->i_df.if_lastex = idx; + /* DELTA: One in-core extent grew into a hole. */ + temp2 = temp; + temp = new->br_startoff; break; case 0: @@ -1894,6 +2005,9 @@ xfs_bmap_add_extent_hole_delay( XFS_DATA_FORK); xfs_iext_insert(ifp, idx, 1, new); ip->i_df.if_lastex = idx; + /* DELTA: A new in-core extent was added in a hole. */ + temp2 = new->br_blockcount; + temp = new->br_startoff; break; } if (oldlen != newlen) { @@ -1904,6 +2018,13 @@ xfs_bmap_add_extent_hole_delay( * Nothing to do for disk quota accounting here. */ } + if (delta) { + temp2 += temp; + if (delta->xed_startoff > temp) + delta->xed_startoff = temp; + if (delta->xed_blockcount < temp2) + delta->xed_blockcount = temp2; + } *logflagsp = 0; return 0; #undef MASK @@ -1925,6 +2046,7 @@ xfs_bmap_add_extent_hole_real( xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp, /* inode logging flags */ + xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork) /* data or attr fork */ { xfs_bmbt_rec_t *ep; /* pointer to extent entry ins. point */ @@ -1936,7 +2058,10 @@ xfs_bmap_add_extent_hole_real( xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_irec_t left; /* left neighbor extent entry */ xfs_bmbt_irec_t right; /* right neighbor extent entry */ + int rval=0; /* return value (logging flags) */ int state; /* state bits, accessed thru macros */ + xfs_filblks_t temp=0; + xfs_filblks_t temp2=0; enum { /* bit number definitions for state */ LEFT_CONTIG, RIGHT_CONTIG, LEFT_DELAY, RIGHT_DELAY, @@ -1993,6 +2118,7 @@ xfs_bmap_add_extent_hole_real( left.br_blockcount + new->br_blockcount + right.br_blockcount <= MAXEXTLEN)); + error = 0; /* * Select which case we're in here, and implement it. */ @@ -2018,25 +2144,35 @@ xfs_bmap_add_extent_hole_real( XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); if (cur == NULL) { - *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); - return 0; + rval = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); + } else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, + right.br_startoff, + right.br_startblock, + right.br_blockcount, &i))) + goto done; + ASSERT(i == 1); + if ((error = xfs_bmbt_delete(cur, &i))) + goto done; + ASSERT(i == 1); + if ((error = xfs_bmbt_decrement(cur, 0, &i))) + goto done; + ASSERT(i == 1); + if ((error = xfs_bmbt_update(cur, left.br_startoff, + left.br_startblock, + left.br_blockcount + + new->br_blockcount + + right.br_blockcount, + left.br_state))) + goto done; } - *logflagsp = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, right.br_startoff, - right.br_startblock, right.br_blockcount, &i))) - return error; - ASSERT(i == 1); - if ((error = xfs_bmbt_delete(cur, &i))) - return error; - ASSERT(i == 1); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) - return error; - ASSERT(i == 1); - error = xfs_bmbt_update(cur, left.br_startoff, - left.br_startblock, - left.br_blockcount + new->br_blockcount + - right.br_blockcount, left.br_state); - return error; + /* DELTA: Two in-core extents were replaced by one. */ + temp = left.br_startoff; + temp2 = left.br_blockcount + + new->br_blockcount + + right.br_blockcount; + break; case MASK(LEFT_CONTIG): /* @@ -2050,19 +2186,27 @@ xfs_bmap_add_extent_hole_real( xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork); ifp->if_lastex = idx - 1; if (cur == NULL) { - *logflagsp = XFS_ILOG_FEXT(whichfork); - return 0; + rval = XFS_ILOG_FEXT(whichfork); + } else { + rval = 0; + if ((error = xfs_bmbt_lookup_eq(cur, + left.br_startoff, + left.br_startblock, + left.br_blockcount, &i))) + goto done; + ASSERT(i == 1); + if ((error = xfs_bmbt_update(cur, left.br_startoff, + left.br_startblock, + left.br_blockcount + + new->br_blockcount, + left.br_state))) + goto done; } - *logflagsp = 0; - if ((error = xfs_bmbt_lookup_eq(cur, left.br_startoff, - left.br_startblock, left.br_blockcount, &i))) - return error; - ASSERT(i == 1); - error = xfs_bmbt_update(cur, left.br_startoff, - left.br_startblock, - left.br_blockcount + new->br_blockcount, - left.br_state); - return error; + /* DELTA: One in-core extent grew. */ + temp = left.br_startoff; + temp2 = left.br_blockcount + + new->br_blockcount; + break; case MASK(RIGHT_CONTIG): /* @@ -2077,19 +2221,27 @@ xfs_bmap_add_extent_hole_real( xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork); ifp->if_lastex = idx; if (cur == NULL) { - *logflagsp = XFS_ILOG_FEXT(whichfork); - return 0; + rval = XFS_ILOG_FEXT(whichfork); + } else { + rval = 0; + if ((error = xfs_bmbt_lookup_eq(cur, + right.br_startoff, + right.br_startblock, + right.br_blockcount, &i))) + goto done; + ASSERT(i == 1); + if ((error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + + right.br_blockcount, + right.br_state))) + goto done; } - *logflagsp = 0; - if ((error = xfs_bmbt_lookup_eq(cur, right.br_startoff, - right.br_startblock, right.br_blockcount, &i))) - return error; - ASSERT(i == 1); - error = xfs_bmbt_update(cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + right.br_blockcount, - right.br_state); - return error; + /* DELTA: One in-core extent grew. */ + temp = new->br_startoff; + temp2 = new->br_blockcount + + right.br_blockcount; + break; case 0: /* @@ -2104,29 +2256,41 @@ xfs_bmap_add_extent_hole_real( XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); if (cur == NULL) { - *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); - return 0; + rval = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); + } else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, + new->br_startoff, + new->br_startblock, + new->br_blockcount, &i))) + goto done; + ASSERT(i == 0); + cur->bc_rec.b.br_state = new->br_state; + if ((error = xfs_bmbt_insert(cur, &i))) + goto done; + ASSERT(i == 1); } - *logflagsp = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, &i))) - return error; - ASSERT(i == 0); - cur->bc_rec.b.br_state = new->br_state; - if ((error = xfs_bmbt_insert(cur, &i))) - return error; - ASSERT(i == 1); - return 0; + /* DELTA: A new extent was added in a hole. */ + temp = new->br_startoff; + temp2 = new->br_blockcount; + break; + } + if (delta) { + temp2 += temp; + if (delta->xed_startoff > temp) + delta->xed_startoff = temp; + if (delta->xed_blockcount < temp2) + delta->xed_blockcount = temp2; } +done: + *logflagsp = rval; + return error; #undef MASK #undef MASK2 #undef STATE_SET #undef STATE_TEST #undef STATE_SET_TEST #undef SWITCH_STATE - /* NOTREACHED */ - ASSERT(0); - return 0; /* keep gcc quite */ } /* @@ -2598,6 +2762,7 @@ xfs_bmap_btalloc( args.mp = mp; args.fsbno = ap->rval; args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); + args.firstblock = ap->firstblock; blen = 0; if (nullfb) { args.type = XFS_ALLOCTYPE_START_BNO; @@ -2657,7 +2822,7 @@ xfs_bmap_btalloc( else args.minlen = ap->alen; } else if (ap->low) { - args.type = XFS_ALLOCTYPE_FIRST_AG; + args.type = XFS_ALLOCTYPE_START_BNO; args.total = args.minlen = ap->minlen; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; @@ -2669,7 +2834,7 @@ xfs_bmap_btalloc( args.prod = ap->ip->i_d.di_extsize; if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); - } else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) { + } else if (mp->m_sb.sb_blocksize >= NBPP) { args.prod = 1; args.mod = 0; } else { @@ -2885,6 +3050,7 @@ xfs_bmap_del_extent( xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ + xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork, /* data or attr fork */ int rsvd) /* OK to allocate reserved blocks */ { @@ -3193,6 +3359,14 @@ xfs_bmap_del_extent( if (da_old > da_new) xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int)(da_old - da_new), rsvd); + if (delta) { + /* DELTA: report the original extent. */ + if (delta->xed_startoff > got.br_startoff) + delta->xed_startoff = got.br_startoff; + if (delta->xed_blockcount < got.br_startoff+got.br_blockcount) + delta->xed_blockcount = got.br_startoff + + got.br_blockcount; + } done: *logflagsp = flags; return error; @@ -3279,6 +3453,7 @@ xfs_bmap_extents_to_btree( XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); args.tp = tp; args.mp = mp; + args.firstblock = *firstblock; if (*firstblock == NULLFSBLOCK) { args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); @@ -3414,6 +3589,7 @@ xfs_bmap_local_to_extents( args.tp = tp; args.mp = ip->i_mount; + args.firstblock = *firstblock; ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); /* @@ -3753,7 +3929,7 @@ xfs_bunmap_trace( if (ip->i_rwtrace == NULL) return; ktrace_enter(ip->i_rwtrace, - (void *)(__psint_t)XFS_BUNMAPI, + (void *)(__psint_t)XFS_BUNMAP, (void *)ip, (void *)(__psint_t)((ip->i_d.di_size >> 32) & 0xffffffff), (void *)(__psint_t)(ip->i_d.di_size & 0xffffffff), @@ -4087,8 +4263,8 @@ xfs_bmap_finish( if (!XFS_FORCED_SHUTDOWN(mp)) xfs_force_shutdown(mp, (error == EFSCORRUPTED) ? - XFS_CORRUPT_INCORE : - XFS_METADATA_IO_ERROR); + SHUTDOWN_CORRUPT_INCORE : + SHUTDOWN_META_IO_ERROR); return error; } xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock, @@ -4538,7 +4714,8 @@ xfs_bmapi( xfs_extlen_t total, /* total blocks needed */ xfs_bmbt_irec_t *mval, /* output: map values */ int *nmap, /* i/o: mval size/count */ - xfs_bmap_free_t *flist) /* i/o: list extents to free */ + xfs_bmap_free_t *flist, /* i/o: list extents to free */ + xfs_extdelta_t *delta) /* o: change made to incore extents */ { xfs_fsblock_t abno; /* allocated block number */ xfs_extlen_t alen; /* allocated extent length */ @@ -4650,6 +4827,10 @@ xfs_bmapi( end = bno + len; obno = bno; bma.ip = NULL; + if (delta) { + delta->xed_startoff = NULLFILEOFF; + delta->xed_blockcount = 0; + } while (bno < end && n < *nmap) { /* * Reading past eof, act as though there's a hole @@ -4886,8 +5067,8 @@ xfs_bmapi( got.br_state = XFS_EXT_UNWRITTEN; } error = xfs_bmap_add_extent(ip, lastx, &cur, &got, - firstblock, flist, &tmp_logflags, whichfork, - (flags & XFS_BMAPI_RSVBLOCKS)); + firstblock, flist, &tmp_logflags, delta, + whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); logflags |= tmp_logflags; if (error) goto error0; @@ -4983,8 +5164,8 @@ xfs_bmapi( } mval->br_state = XFS_EXT_NORM; error = xfs_bmap_add_extent(ip, lastx, &cur, mval, - firstblock, flist, &tmp_logflags, whichfork, - (flags & XFS_BMAPI_RSVBLOCKS)); + firstblock, flist, &tmp_logflags, delta, + whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); logflags |= tmp_logflags; if (error) goto error0; @@ -5073,7 +5254,14 @@ xfs_bmapi( ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max); error = 0; - + if (delta && delta->xed_startoff != NULLFILEOFF) { + /* A change was actually made. + * Note that delta->xed_blockount is an offset at this + * point and needs to be converted to a block count. + */ + ASSERT(delta->xed_blockcount > delta->xed_startoff); + delta->xed_blockcount -= delta->xed_startoff; + } error0: /* * Log everything. Do this after conversion, there's no point in @@ -5185,6 +5373,8 @@ xfs_bunmapi( xfs_fsblock_t *firstblock, /* first allocated block controls a.g. for allocs */ xfs_bmap_free_t *flist, /* i/o: list extents to free */ + xfs_extdelta_t *delta, /* o: change made to incore + extents */ int *done) /* set if not done yet */ { xfs_btree_cur_t *cur; /* bmap btree cursor */ @@ -5242,6 +5432,10 @@ xfs_bunmapi( bno = start + len - 1; ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); + if (delta) { + delta->xed_startoff = NULLFILEOFF; + delta->xed_blockcount = 0; + } /* * Check to see if the given block number is past the end of the * file, back up to the last block if so... @@ -5340,7 +5534,8 @@ xfs_bunmapi( } del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx, &cur, &del, - firstblock, flist, &logflags, XFS_DATA_FORK, 0); + firstblock, flist, &logflags, delta, + XFS_DATA_FORK, 0); if (error) goto error0; goto nodelete; @@ -5394,7 +5589,7 @@ xfs_bunmapi( prev.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx - 1, &cur, &prev, firstblock, flist, &logflags, - XFS_DATA_FORK, 0); + delta, XFS_DATA_FORK, 0); if (error) goto error0; goto nodelete; @@ -5403,7 +5598,7 @@ xfs_bunmapi( del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx, &cur, &del, firstblock, flist, &logflags, - XFS_DATA_FORK, 0); + delta, XFS_DATA_FORK, 0); if (error) goto error0; goto nodelete; @@ -5456,7 +5651,7 @@ xfs_bunmapi( goto error0; } error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, - &tmp_logflags, whichfork, rsvd); + &tmp_logflags, delta, whichfork, rsvd); logflags |= tmp_logflags; if (error) goto error0; @@ -5513,6 +5708,14 @@ nodelete: ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); error = 0; + if (delta && delta->xed_startoff != NULLFILEOFF) { + /* A change was actually made. + * Note that delta->xed_blockount is an offset at this + * point and needs to be converted to a block count. + */ + ASSERT(delta->xed_blockcount > delta->xed_startoff); + delta->xed_blockcount -= delta->xed_startoff; + } error0: /* * Log everything. Do this after conversion, there's no point in @@ -5556,7 +5759,7 @@ xfs_getbmap( __int64_t fixlen; /* length for -1 case */ int i; /* extent number */ xfs_inode_t *ip; /* xfs incore inode pointer */ - vnode_t *vp; /* corresponding vnode */ + bhv_vnode_t *vp; /* corresponding vnode */ int lock; /* lock state */ xfs_bmbt_irec_t *map; /* buffer for user's data */ xfs_mount_t *mp; /* file system mount point */ @@ -5653,7 +5856,7 @@ xfs_getbmap( if (whichfork == XFS_DATA_FORK && ip->i_delayed_blks) { /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ - VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error); + error = bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF); } ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); @@ -5689,7 +5892,8 @@ xfs_getbmap( nmap = (nexleft > subnex) ? subnex : nexleft; error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), XFS_BB_TO_FSB(mp, bmv->bmv_length), - bmapi_flags, NULL, 0, map, &nmap, NULL); + bmapi_flags, NULL, 0, map, &nmap, + NULL, NULL); if (error) goto unlock_and_return; ASSERT(nmap <= subnex); diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 8e0d73d9ccc4..80e93409b78d 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -26,6 +26,20 @@ struct xfs_mount; struct xfs_trans; /* + * DELTA: describe a change to the in-core extent list. + * + * Internally the use of xed_blockount is somewhat funky. + * xed_blockcount contains an offset much of the time because this + * makes merging changes easier. (xfs_fileoff_t and xfs_filblks_t are + * the same underlying type). + */ +typedef struct xfs_extdelta +{ + xfs_fileoff_t xed_startoff; /* offset of range */ + xfs_filblks_t xed_blockcount; /* blocks in range */ +} xfs_extdelta_t; + +/* * List of extents to be free "later". * The list is kept sorted on xbf_startblock. */ @@ -275,7 +289,9 @@ xfs_bmapi( xfs_extlen_t total, /* total blocks needed */ struct xfs_bmbt_irec *mval, /* output: map values */ int *nmap, /* i/o: mval size/count */ - xfs_bmap_free_t *flist); /* i/o: list extents to free */ + xfs_bmap_free_t *flist, /* i/o: list extents to free */ + xfs_extdelta_t *delta); /* o: change made to incore + extents */ /* * Map file blocks to filesystem blocks, simple version. @@ -309,6 +325,8 @@ xfs_bunmapi( xfs_fsblock_t *firstblock, /* first allocated block controls a.g. for allocs */ xfs_bmap_free_t *flist, /* i/o: list extents to free */ + xfs_extdelta_t *delta, /* o: change made to incore + extents */ int *done); /* set if not done yet */ /* diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index bea44709afbe..18fb7385d719 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -1569,12 +1567,11 @@ xfs_bmbt_split( lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp)); left = XFS_BUF_TO_BMBT_BLOCK(lbp); args.fsbno = cur->bc_private.b.firstblock; + args.firstblock = args.fsbno; if (args.fsbno == NULLFSBLOCK) { args.fsbno = lbno; args.type = XFS_ALLOCTYPE_START_BNO; - } else if (cur->bc_private.b.flist->xbf_low) - args.type = XFS_ALLOCTYPE_FIRST_AG; - else + } else args.type = XFS_ALLOCTYPE_NEAR_BNO; args.mod = args.minleft = args.alignment = args.total = args.isfl = args.userdata = args.minalignslop = 0; @@ -2356,6 +2353,7 @@ xfs_bmbt_newroot( args.userdata = args.minalignslop = 0; args.minlen = args.maxlen = args.prod = 1; args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; + args.firstblock = args.fsbno; if (args.fsbno == NULLFSBLOCK) { #ifdef DEBUG if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) { @@ -2365,9 +2363,7 @@ xfs_bmbt_newroot( #endif args.fsbno = INT_GET(*pp, ARCH_CONVERT); args.type = XFS_ALLOCTYPE_START_BNO; - } else if (args.wasdel) - args.type = XFS_ALLOCTYPE_FIRST_AG; - else + } else args.type = XFS_ALLOCTYPE_NEAR_BNO; if ((error = xfs_alloc_vextent(&args))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 52d5d095fc35..ee2255bd6562 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 5fed15682dda..a4aa53974f76 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -23,7 +23,6 @@ #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_dir.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_buf_item.h" @@ -1030,9 +1029,9 @@ xfs_buf_iodone_callbacks( if ((XFS_BUF_TARGET(bp) != lasttarg) || (time_after(jiffies, (lasttime + 5*HZ)))) { lasttime = jiffies; - prdev("XFS write error in file system meta-data " - "block 0x%llx in %s", - XFS_BUF_TARGET(bp), + cmn_err(CE_ALERT, "Device %s, XFS metadata write error" + " block 0x%llx in %s", + XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); } lasttarg = XFS_BUF_TARGET(bp); @@ -1108,7 +1107,7 @@ xfs_buf_error_relse( XFS_BUF_ERROR(bp,0); xfs_buftrace("BUF_ERROR_RELSE", bp); if (! XFS_FORCED_SHUTDOWN(mp)) - xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); /* * We have to unpin the pinned buffers so do the * callbacks. diff --git a/fs/xfs/xfs_cap.h b/fs/xfs/xfs_cap.h index d0035c6e9514..7a0e482dd436 100644 --- a/fs/xfs/xfs_cap.h +++ b/fs/xfs/xfs_cap.h @@ -49,12 +49,12 @@ typedef struct xfs_cap_set { #include <linux/posix_cap_xattr.h> -struct vnode; +struct bhv_vnode; -extern int xfs_cap_vhascap(struct vnode *); -extern int xfs_cap_vset(struct vnode *, void *, size_t); -extern int xfs_cap_vget(struct vnode *, void *, size_t); -extern int xfs_cap_vremove(struct vnode *vp); +extern int xfs_cap_vhascap(struct bhv_vnode *); +extern int xfs_cap_vset(struct bhv_vnode *, void *, size_t); +extern int xfs_cap_vget(struct bhv_vnode *, void *, size_t); +extern int xfs_cap_vremove(struct bhv_vnode *); #define _CAP_EXISTS xfs_cap_vhascap diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 8988b9051175..32ab61d17ace 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -24,7 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" @@ -32,7 +31,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -43,7 +41,6 @@ #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" -#include "xfs_dir_leaf.h" #include "xfs_dir2_data.h" #include "xfs_dir2_leaf.h" #include "xfs_dir2_block.h" @@ -159,7 +156,7 @@ xfs_da_split(xfs_da_state_t *state) max = state->path.active - 1; ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || - state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp)); + state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); addblk = &state->path.blk[max]; /* initial dummy value */ for (i = max; (i >= 0) && addblk; state->path.active--, i--) { @@ -199,38 +196,7 @@ xfs_da_split(xfs_da_state_t *state) return(error); /* GROT: attr inconsistent */ addblk = newblk; break; - case XFS_DIR_LEAF_MAGIC: - ASSERT(XFS_DIR_IS_V1(state->mp)); - error = xfs_dir_leaf_split(state, oldblk, newblk); - if ((error != 0) && (error != ENOSPC)) { - return(error); /* GROT: dir is inconsistent */ - } - if (!error) { - addblk = newblk; - break; - } - /* - * Entry wouldn't fit, split the leaf again. - */ - state->extravalid = 1; - if (state->inleaf) { - state->extraafter = 0; /* before newblk */ - error = xfs_dir_leaf_split(state, oldblk, - &state->extrablk); - if (error) - return(error); /* GROT: dir incon. */ - addblk = newblk; - } else { - state->extraafter = 1; /* after newblk */ - error = xfs_dir_leaf_split(state, newblk, - &state->extrablk); - if (error) - return(error); /* GROT: dir incon. */ - addblk = newblk; - } - break; case XFS_DIR2_LEAFN_MAGIC: - ASSERT(XFS_DIR_IS_V2(state->mp)); error = xfs_dir2_leafn_split(state, oldblk, newblk); if (error) return error; @@ -363,7 +329,6 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] - (char *)oldroot); } else { - ASSERT(XFS_DIR_IS_V2(mp)); ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); leaf = (xfs_dir2_leaf_t *)oldroot; size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] - @@ -379,8 +344,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, * Set up the new root node. */ error = xfs_da_node_create(args, - args->whichfork == XFS_DATA_FORK && - XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0, + (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0, be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork); if (error) return(error); @@ -427,10 +391,9 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); /* - * With V2 the extra block is data or freespace. + * With V2 dirs the extra block is data or freespace. */ - useextra = state->extravalid && (XFS_DIR_IS_V1(state->mp) || - state->args->whichfork == XFS_ATTR_FORK); + useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK; newcount = 1 + useextra; /* * Do we have to split the node? @@ -624,7 +587,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); ASSERT(newblk->blkno != 0); - if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) + if (state->args->whichfork == XFS_DATA_FORK) ASSERT(newblk->blkno >= mp->m_dirleafblk && newblk->blkno < mp->m_dirfreeblk); @@ -670,7 +633,7 @@ xfs_da_join(xfs_da_state_t *state) save_blk = &state->altpath.blk[ state->path.active-1 ]; ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || - drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp)); + drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); /* * Walk back up the tree joining/deallocating as necessary. @@ -693,17 +656,7 @@ xfs_da_join(xfs_da_state_t *state) return(0); xfs_attr_leaf_unbalance(state, drop_blk, save_blk); break; - case XFS_DIR_LEAF_MAGIC: - ASSERT(XFS_DIR_IS_V1(state->mp)); - error = xfs_dir_leaf_toosmall(state, &action); - if (error) - return(error); - if (action == 0) - return(0); - xfs_dir_leaf_unbalance(state, drop_blk, save_blk); - break; case XFS_DIR2_LEAFN_MAGIC: - ASSERT(XFS_DIR_IS_V2(state->mp)); error = xfs_dir2_leafn_toosmall(state, &action); if (error) return error; @@ -790,7 +743,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) ASSERT(bp != NULL); blkinfo = bp->data; if (be16_to_cpu(oldroot->hdr.level) == 1) { - ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) || + ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC || be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC); } else { ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC); @@ -951,14 +904,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) if (count == 0) return; break; - case XFS_DIR_LEAF_MAGIC: - ASSERT(XFS_DIR_IS_V1(state->mp)); - lasthash = xfs_dir_leaf_lasthash(blk->bp, &count); - if (count == 0) - return; - break; case XFS_DIR2_LEAFN_MAGIC: - ASSERT(XFS_DIR_IS_V2(state->mp)); lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count); if (count == 0) return; @@ -1117,10 +1063,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) * Descend thru the B-tree searching each level for the right * node to use, until the right hashval is found. */ - if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp)) - blkno = state->mp->m_dirleafblk; - else - blkno = 0; + blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0; for (blk = &state->path.blk[0], state->path.active = 1; state->path.active <= XFS_DA_NODE_MAXDEPTH; blk++, state->path.active++) { @@ -1137,7 +1080,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) } curr = blk->bp->data; ASSERT(be16_to_cpu(curr->magic) == XFS_DA_NODE_MAGIC || - be16_to_cpu(curr->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) || + be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC || be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC); /* @@ -1190,16 +1133,10 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) blk->index = probe; blkno = be32_to_cpu(btree->before); } - } - else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) { + } else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) { blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); break; - } - else if (be16_to_cpu(curr->magic) == XFS_DIR_LEAF_MAGIC) { - blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL); - break; - } - else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) { + } else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) { blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); break; } @@ -1212,12 +1149,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) * next leaf and keep searching. */ for (;;) { - if (blk->magic == XFS_DIR_LEAF_MAGIC) { - ASSERT(XFS_DIR_IS_V1(state->mp)); - retval = xfs_dir_leaf_lookup_int(blk->bp, args, - &blk->index); - } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { - ASSERT(XFS_DIR_IS_V2(state->mp)); + if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { retval = xfs_dir2_leafn_lookup_int(blk->bp, args, &blk->index, state); } @@ -1270,7 +1202,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, old_info = old_blk->bp->data; new_info = new_blk->bp->data; ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || - old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) || + old_blk->magic == XFS_DIR2_LEAFN_MAGIC || old_blk->magic == XFS_ATTR_LEAF_MAGIC); ASSERT(old_blk->magic == be16_to_cpu(old_info->magic)); ASSERT(new_blk->magic == be16_to_cpu(new_info->magic)); @@ -1280,12 +1212,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, case XFS_ATTR_LEAF_MAGIC: before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); break; - case XFS_DIR_LEAF_MAGIC: - ASSERT(XFS_DIR_IS_V1(state->mp)); - before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp); - break; case XFS_DIR2_LEAFN_MAGIC: - ASSERT(XFS_DIR_IS_V2(state->mp)); before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp); break; case XFS_DA_NODE_MAGIC: @@ -1404,7 +1331,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, save_info = save_blk->bp->data; drop_info = drop_blk->bp->data; ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || - save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) || + save_blk->magic == XFS_DIR2_LEAFN_MAGIC || save_blk->magic == XFS_ATTR_LEAF_MAGIC); ASSERT(save_blk->magic == be16_to_cpu(save_info->magic)); ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic)); @@ -1529,7 +1456,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, ASSERT(blk->bp != NULL); info = blk->bp->data; ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC || - be16_to_cpu(info->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) || + be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC || be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); blk->magic = be16_to_cpu(info->magic); if (blk->magic == XFS_DA_NODE_MAGIC) { @@ -1548,20 +1475,13 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); break; - case XFS_DIR_LEAF_MAGIC: - ASSERT(XFS_DIR_IS_V1(state->mp)); - blk->hashval = xfs_dir_leaf_lasthash(blk->bp, - NULL); - break; case XFS_DIR2_LEAFN_MAGIC: - ASSERT(XFS_DIR_IS_V2(state->mp)); blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); break; default: ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC || - blk->magic == - XFS_DIRX_LEAF_MAGIC(state->mp)); + blk->magic == XFS_DIR2_LEAFN_MAGIC); break; } } @@ -1620,7 +1540,6 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) xfs_bmbt_irec_t *mapp; xfs_inode_t *dp; int nmap, error, w, count, c, got, i, mapi; - xfs_fsize_t size; xfs_trans_t *tp; xfs_mount_t *mp; @@ -1631,7 +1550,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) /* * For new directories adjust the file offset and block count. */ - if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) { + if (w == XFS_DATA_FORK) { bno = mp->m_dirleafblk; count = mp->m_dirblkfsbs; } else { @@ -1641,10 +1560,9 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) /* * Find a spot in the file space to put the new block. */ - if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) { + if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) return error; - } - if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) + if (w == XFS_DATA_FORK) ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk); /* * Try mapping it in one filesystem block. @@ -1655,7 +1573,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| XFS_BMAPI_CONTIG, args->firstblock, args->total, &map, &nmap, - args->flist))) { + args->flist, NULL))) { return error; } ASSERT(nmap <= 1); @@ -1676,7 +1594,8 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE| XFS_BMAPI_METADATA, args->firstblock, args->total, - &mapp[mapi], &nmap, args->flist))) { + &mapp[mapi], &nmap, args->flist, + NULL))) { kmem_free(mapp, sizeof(*mapp) * count); return error; } @@ -1705,19 +1624,6 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) if (mapp != &map) kmem_free(mapp, sizeof(*mapp) * count); *new_blkno = (xfs_dablk_t)bno; - /* - * For version 1 directories, adjust the file size if it changed. - */ - if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) { - ASSERT(mapi == 1); - if ((error = xfs_bmap_last_offset(tp, dp, &bno, w))) - return error; - size = XFS_FSB_TO_B(mp, bno); - if (size != dp->i_d.di_size) { - dp->i_d.di_size = size; - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - } - } return 0; } @@ -1742,7 +1648,6 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, int error, w, entno, level, dead_level; xfs_da_blkinfo_t *dead_info, *sib_info; xfs_da_intnode_t *par_node, *dead_node; - xfs_dir_leafblock_t *dead_leaf; xfs_dir2_leaf_t *dead_leaf2; xfs_dahash_t dead_hash; @@ -1753,11 +1658,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, w = args->whichfork; ASSERT(w == XFS_DATA_FORK); mp = ip->i_mount; - if (XFS_DIR_IS_V2(mp)) { - lastoff = mp->m_dirfreeblk; - error = xfs_bmap_last_before(tp, ip, &lastoff, w); - } else - error = xfs_bmap_last_offset(tp, ip, &lastoff, w); + lastoff = mp->m_dirfreeblk; + error = xfs_bmap_last_before(tp, ip, &lastoff, w); if (error) return error; if (unlikely(lastoff == 0)) { @@ -1780,14 +1682,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, /* * Get values from the moved block. */ - if (be16_to_cpu(dead_info->magic) == XFS_DIR_LEAF_MAGIC) { - ASSERT(XFS_DIR_IS_V1(mp)); - dead_leaf = (xfs_dir_leafblock_t *)dead_info; - dead_level = 0; - dead_hash = - INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT); - } else if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) { - ASSERT(XFS_DIR_IS_V2(mp)); + if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) { dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; dead_level = 0; dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval); @@ -1842,7 +1737,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, xfs_da_buf_done(sib_buf); sib_buf = NULL; } - par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk; + par_blkno = mp->m_dirleafblk; level = -1; /* * Walk down the tree looking for the parent of the moved block. @@ -1941,8 +1836,6 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, { xfs_inode_t *dp; int done, error, w, count; - xfs_fileoff_t bno; - xfs_fsize_t size; xfs_trans_t *tp; xfs_mount_t *mp; @@ -1950,7 +1843,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, w = args->whichfork; tp = args->trans; mp = dp->i_mount; - if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) + if (w == XFS_DATA_FORK) count = mp->m_dirblkfsbs; else count = 1; @@ -1961,34 +1854,17 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, */ if ((error = xfs_bunmapi(tp, dp, dead_blkno, count, XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA, - 0, args->firstblock, args->flist, + 0, args->firstblock, args->flist, NULL, &done)) == ENOSPC) { if (w != XFS_DATA_FORK) - goto done; + break; if ((error = xfs_da_swap_lastblock(args, &dead_blkno, &dead_buf))) - goto done; - } else if (error) - goto done; - else + break; + } else { break; - } - ASSERT(done); - xfs_da_binval(tp, dead_buf); - /* - * Adjust the directory size for version 1. - */ - if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) { - if ((error = xfs_bmap_last_offset(tp, dp, &bno, w))) - return error; - size = XFS_FSB_TO_B(dp->i_mount, bno); - if (size != dp->i_d.di_size) { - dp->i_d.di_size = size; - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); } } - return 0; -done: xfs_da_binval(tp, dead_buf); return error; } @@ -2049,10 +1925,7 @@ xfs_da_do_buf( xfs_dabuf_t *rbp; mp = dp->i_mount; - if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) - nfsb = mp->m_dirblkfsbs; - else - nfsb = 1; + nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1; mappedbno = *mappedbnop; /* * Caller doesn't have a mapping. -2 means don't complain @@ -2086,7 +1959,7 @@ xfs_da_do_buf( nfsb, XFS_BMAPI_METADATA | XFS_BMAPI_AFLAG(whichfork), - NULL, 0, mapp, &nmap, NULL))) + NULL, 0, mapp, &nmap, NULL, NULL))) goto exit0; } } else { @@ -2198,7 +2071,6 @@ xfs_da_do_buf( magic1 = be32_to_cpu(data->hdr.magic); if (unlikely( XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && - (magic != XFS_DIR_LEAF_MAGIC) && (magic != XFS_ATTR_LEAF_MAGIC) && (magic != XFS_DIR2_LEAF1_MAGIC) && (magic != XFS_DIR2_LEAFN_MAGIC) && diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 243a730d5ec8..4ab865ec8b82 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -36,14 +36,10 @@ struct zone; * level in the Btree, and to identify which type of block this is. */ #define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */ -#define XFS_DIR_LEAF_MAGIC 0xfeeb /* magic number: directory leaf blks */ #define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */ #define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */ #define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */ -#define XFS_DIRX_LEAF_MAGIC(mp) \ - (XFS_DIR_IS_V1(mp) ? XFS_DIR_LEAF_MAGIC : XFS_DIR2_LEAFN_MAGIC) - typedef struct xfs_da_blkinfo { __be32 forw; /* previous block in list */ __be32 back; /* following block in list */ diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 4968a6358e61..80562b60fb95 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -54,24 +52,14 @@ xfs_swapext( xfs_swapext_t __user *sxu) { xfs_swapext_t *sxp; - xfs_inode_t *ip=NULL, *tip=NULL, *ips[2]; - xfs_trans_t *tp; + xfs_inode_t *ip=NULL, *tip=NULL; xfs_mount_t *mp; - xfs_bstat_t *sbp; struct file *fp = NULL, *tfp = NULL; - vnode_t *vp, *tvp; - static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; - int ilf_fields, tilf_fields; + bhv_vnode_t *vp, *tvp; int error = 0; - xfs_ifork_t *tempifp, *ifp, *tifp; - __uint64_t tmp; - int aforkblks = 0; - int taforkblks = 0; - char locked = 0; sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL); - tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); - if (!sxp || !tempifp) { + if (!sxp) { error = XFS_ERROR(ENOMEM); goto error0; } @@ -118,14 +106,56 @@ xfs_swapext( mp = ip->i_mount; - sbp = &sxp->sx_stat; - if (XFS_FORCED_SHUTDOWN(mp)) { error = XFS_ERROR(EIO); goto error0; } - locked = 1; + error = XFS_SWAP_EXTENTS(mp, &ip->i_iocore, &tip->i_iocore, sxp); + + error0: + if (fp != NULL) + fput(fp); + if (tfp != NULL) + fput(tfp); + + if (sxp != NULL) + kmem_free(sxp, sizeof(xfs_swapext_t)); + + return error; +} + +int +xfs_swap_extents( + xfs_inode_t *ip, + xfs_inode_t *tip, + xfs_swapext_t *sxp) +{ + xfs_mount_t *mp; + xfs_inode_t *ips[2]; + xfs_trans_t *tp; + xfs_bstat_t *sbp = &sxp->sx_stat; + bhv_vnode_t *vp, *tvp; + xfs_ifork_t *tempifp, *ifp, *tifp; + int ilf_fields, tilf_fields; + static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; + int error = 0; + int aforkblks = 0; + int taforkblks = 0; + __uint64_t tmp; + char locked = 0; + + mp = ip->i_mount; + + tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); + if (!tempifp) { + error = XFS_ERROR(ENOMEM); + goto error0; + } + + sbp = &sxp->sx_stat; + vp = XFS_ITOV(ip); + tvp = XFS_ITOV(tip); /* Lock in i_ino order */ if (ip->i_ino < tip->i_ino) { @@ -137,6 +167,7 @@ xfs_swapext( } xfs_lock_inodes(ips, 2, 0, lock_flags); + locked = 1; /* Check permissions */ error = xfs_iaccess(ip, S_IWUSR, NULL); @@ -169,7 +200,7 @@ xfs_swapext( if (VN_CACHED(tvp) != 0) { xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1); - VOP_FLUSHINVAL_PAGES(tvp, 0, -1, FI_REMAPF_LOCKED); + bhv_vop_flushinval_pages(tvp, 0, -1, FI_REMAPF_LOCKED); } /* Verify O_DIRECT for ftmp */ @@ -214,7 +245,7 @@ xfs_swapext( /* We need to fail if the file is memory mapped. Once we have tossed * all existing pages, the page fault will have no option * but to go to the filesystem for pages. By making the page fault call - * VOP_READ (or write in the case of autogrow) they block on the iolock + * vop_read (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ if (VN_MAPPED(vp)) { @@ -233,7 +264,7 @@ xfs_swapext( * fields change. */ - VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); + bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); if ((error = xfs_trans_reserve(tp, 0, @@ -360,16 +391,7 @@ xfs_swapext( xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); } - - if (fp != NULL) - fput(fp); - if (tfp != NULL) - fput(tfp); - - if (sxp != NULL) - kmem_free(sxp, sizeof(xfs_swapext_t)); if (tempifp != NULL) kmem_free(tempifp, sizeof(xfs_ifork_t)); - return error; } diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h index f678559abc45..da178205be68 100644 --- a/fs/xfs/xfs_dfrag.h +++ b/fs/xfs/xfs_dfrag.h @@ -48,6 +48,9 @@ typedef struct xfs_swapext */ int xfs_swapext(struct xfs_swapext __user *sx); +int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, + struct xfs_swapext *sxp); + #endif /* __KERNEL__ */ #endif /* __XFS_DFRAG_H__ */ diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 79d0d9e1fbab..b33826961c45 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -85,7 +85,6 @@ typedef struct xfs_dinode union { xfs_bmdr_block_t di_bmbt; /* btree root block */ xfs_bmbt_rec_32_t di_bmx[1]; /* extent list */ - xfs_dir_shortform_t di_dirsf; /* shortform directory */ xfs_dir2_sf_t di_dir2sf; /* shortform directory v2 */ char di_c[1]; /* local contents */ xfs_dev_t di_dev; /* device for S_IFCHR/S_IFBLK */ @@ -257,6 +256,7 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ #define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ +#define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) @@ -270,12 +270,13 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) #define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) #define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) +#define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) #define XFS_DIFLAG_ANY \ (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ - XFS_DIFLAG_EXTSZINHERIT) + XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG) #endif /* __XFS_DINODE_H__ */ diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c deleted file mode 100644 index 9cc702a839a3..000000000000 --- a/fs/xfs/xfs_dir.c +++ /dev/null @@ -1,1217 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_dir.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" -#include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_alloc.h" -#include "xfs_btree.h" -#include "xfs_dir_sf.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_dir_leaf.h" -#include "xfs_error.h" - -/* - * xfs_dir.c - * - * Provide the external interfaces to manage directories. - */ - -/*======================================================================== - * Function prototypes for the kernel. - *========================================================================*/ - -/* - * Functions for the dirops interfaces. - */ -static void xfs_dir_mount(struct xfs_mount *mp); - -static int xfs_dir_isempty(struct xfs_inode *dp); - -static int xfs_dir_init(struct xfs_trans *trans, - struct xfs_inode *dir, - struct xfs_inode *parent_dir); - -static int xfs_dir_createname(struct xfs_trans *trans, - struct xfs_inode *dp, - char *name_string, - int name_len, - xfs_ino_t inode_number, - xfs_fsblock_t *firstblock, - xfs_bmap_free_t *flist, - xfs_extlen_t total); - -static int xfs_dir_lookup(struct xfs_trans *tp, - struct xfs_inode *dp, - char *name_string, - int name_length, - xfs_ino_t *inode_number); - -static int xfs_dir_removename(struct xfs_trans *trans, - struct xfs_inode *dp, - char *name_string, - int name_length, - xfs_ino_t ino, - xfs_fsblock_t *firstblock, - xfs_bmap_free_t *flist, - xfs_extlen_t total); - -static int xfs_dir_getdents(struct xfs_trans *tp, - struct xfs_inode *dp, - struct uio *uiop, - int *eofp); - -static int xfs_dir_replace(struct xfs_trans *tp, - struct xfs_inode *dp, - char *name_string, - int name_length, - xfs_ino_t inode_number, - xfs_fsblock_t *firstblock, - xfs_bmap_free_t *flist, - xfs_extlen_t total); - -static int xfs_dir_canenter(struct xfs_trans *tp, - struct xfs_inode *dp, - char *name_string, - int name_length); - -static int xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, - xfs_dinode_t *dip); - -xfs_dirops_t xfsv1_dirops = { - .xd_mount = xfs_dir_mount, - .xd_isempty = xfs_dir_isempty, - .xd_init = xfs_dir_init, - .xd_createname = xfs_dir_createname, - .xd_lookup = xfs_dir_lookup, - .xd_removename = xfs_dir_removename, - .xd_getdents = xfs_dir_getdents, - .xd_replace = xfs_dir_replace, - .xd_canenter = xfs_dir_canenter, - .xd_shortform_validate_ondisk = xfs_dir_shortform_validate_ondisk, - .xd_shortform_to_single = xfs_dir_shortform_to_leaf, -}; - -/* - * Internal routines when dirsize == XFS_LBSIZE(mp). - */ -STATIC int xfs_dir_leaf_lookup(xfs_da_args_t *args); -STATIC int xfs_dir_leaf_removename(xfs_da_args_t *args, int *number_entries, - int *total_namebytes); -STATIC int xfs_dir_leaf_getdents(xfs_trans_t *trans, xfs_inode_t *dp, - uio_t *uio, int *eofp, - xfs_dirent_t *dbp, - xfs_dir_put_t put); -STATIC int xfs_dir_leaf_replace(xfs_da_args_t *args); - -/* - * Internal routines when dirsize > XFS_LBSIZE(mp). - */ -STATIC int xfs_dir_node_addname(xfs_da_args_t *args); -STATIC int xfs_dir_node_lookup(xfs_da_args_t *args); -STATIC int xfs_dir_node_removename(xfs_da_args_t *args); -STATIC int xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, - uio_t *uio, int *eofp, - xfs_dirent_t *dbp, - xfs_dir_put_t put); -STATIC int xfs_dir_node_replace(xfs_da_args_t *args); - -#if defined(XFS_DIR_TRACE) -ktrace_t *xfs_dir_trace_buf; -#endif - - -/*======================================================================== - * Overall external interface routines. - *========================================================================*/ - -xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot; - -/* - * One-time startup routine called from xfs_init(). - */ -void -xfs_dir_startup(void) -{ - xfs_dir_hash_dot = xfs_da_hashname(".", 1); - xfs_dir_hash_dotdot = xfs_da_hashname("..", 2); -} - -/* - * Initialize directory-related fields in the mount structure. - */ -static void -xfs_dir_mount(xfs_mount_t *mp) -{ - uint shortcount, leafcount, count; - - mp->m_dirversion = 1; - if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { - shortcount = (mp->m_attroffset - - (uint)sizeof(xfs_dir_sf_hdr_t)) / - (uint)sizeof(xfs_dir_sf_entry_t); - leafcount = (XFS_LBSIZE(mp) - - (uint)sizeof(xfs_dir_leaf_hdr_t)) / - ((uint)sizeof(xfs_dir_leaf_entry_t) + - (uint)sizeof(xfs_dir_leaf_name_t)); - } else { - shortcount = (XFS_BMDR_SPACE_CALC(MINABTPTRS) - - (uint)sizeof(xfs_dir_sf_hdr_t)) / - (uint)sizeof(xfs_dir_sf_entry_t); - leafcount = (XFS_LBSIZE(mp) - - (uint)sizeof(xfs_dir_leaf_hdr_t)) / - ((uint)sizeof(xfs_dir_leaf_entry_t) + - (uint)sizeof(xfs_dir_leaf_name_t)); - } - count = shortcount > leafcount ? shortcount : leafcount; - mp->m_dircook_elog = xfs_da_log2_roundup(count + 1); - ASSERT(mp->m_dircook_elog <= mp->m_sb.sb_blocklog); - mp->m_dir_node_ents = mp->m_attr_node_ents = - (XFS_LBSIZE(mp) - (uint)sizeof(xfs_da_node_hdr_t)) / - (uint)sizeof(xfs_da_node_entry_t); - mp->m_dir_magicpct = (XFS_LBSIZE(mp) * 37) / 100; - mp->m_dirblksize = mp->m_sb.sb_blocksize; - mp->m_dirblkfsbs = 1; -} - -/* - * Return 1 if directory contains only "." and "..". - */ -static int -xfs_dir_isempty(xfs_inode_t *dp) -{ - xfs_dir_sf_hdr_t *hdr; - - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - if (dp->i_d.di_size == 0) - return(1); - if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) - return(0); - hdr = (xfs_dir_sf_hdr_t *)dp->i_df.if_u1.if_data; - return(hdr->count == 0); -} - -/* - * Initialize a directory with its "." and ".." entries. - */ -static int -xfs_dir_init(xfs_trans_t *trans, xfs_inode_t *dir, xfs_inode_t *parent_dir) -{ - xfs_da_args_t args; - int error; - - memset((char *)&args, 0, sizeof(args)); - args.dp = dir; - args.trans = trans; - - ASSERT((dir->i_d.di_mode & S_IFMT) == S_IFDIR); - if ((error = xfs_dir_ino_validate(trans->t_mountp, parent_dir->i_ino))) - return error; - - return(xfs_dir_shortform_create(&args, parent_dir->i_ino)); -} - -/* - * Generic handler routine to add a name to a directory. - * Transitions directory from shortform to Btree as necessary. - */ -static int /* error */ -xfs_dir_createname(xfs_trans_t *trans, xfs_inode_t *dp, char *name, - int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock, - xfs_bmap_free_t *flist, xfs_extlen_t total) -{ - xfs_da_args_t args; - int retval, newsize, done; - - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - - if ((retval = xfs_dir_ino_validate(trans->t_mountp, inum))) - return (retval); - - XFS_STATS_INC(xs_dir_create); - /* - * Fill in the arg structure for this request. - */ - args.name = name; - args.namelen = namelen; - args.hashval = xfs_da_hashname(name, namelen); - args.inumber = inum; - args.dp = dp; - args.firstblock = firstblock; - args.flist = flist; - args.total = total; - args.whichfork = XFS_DATA_FORK; - args.trans = trans; - args.justcheck = 0; - args.addname = args.oknoent = 1; - - /* - * Decide on what work routines to call based on the inode size. - */ - done = 0; - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - newsize = XFS_DIR_SF_ENTSIZE_BYNAME(args.namelen); - if ((dp->i_d.di_size + newsize) <= XFS_IFORK_DSIZE(dp)) { - retval = xfs_dir_shortform_addname(&args); - done = 1; - } else { - if (total == 0) - return XFS_ERROR(ENOSPC); - retval = xfs_dir_shortform_to_leaf(&args); - done = retval != 0; - } - } - if (!done && xfs_bmap_one_block(dp, XFS_DATA_FORK)) { - retval = xfs_dir_leaf_addname(&args); - done = retval != ENOSPC; - if (!done) { - if (total == 0) - return XFS_ERROR(ENOSPC); - retval = xfs_dir_leaf_to_node(&args); - done = retval != 0; - } - } - if (!done) { - retval = xfs_dir_node_addname(&args); - } - return(retval); -} - -/* - * Generic handler routine to check if a name can be added to a directory, - * without adding any blocks to the directory. - */ -static int /* error */ -xfs_dir_canenter(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen) -{ - xfs_da_args_t args; - int retval, newsize; - - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - /* - * Fill in the arg structure for this request. - */ - args.name = name; - args.namelen = namelen; - args.hashval = xfs_da_hashname(name, namelen); - args.inumber = 0; - args.dp = dp; - args.firstblock = NULL; - args.flist = NULL; - args.total = 0; - args.whichfork = XFS_DATA_FORK; - args.trans = trans; - args.justcheck = args.addname = args.oknoent = 1; - - /* - * Decide on what work routines to call based on the inode size. - */ - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - newsize = XFS_DIR_SF_ENTSIZE_BYNAME(args.namelen); - if ((dp->i_d.di_size + newsize) <= XFS_IFORK_DSIZE(dp)) - retval = 0; - else - retval = XFS_ERROR(ENOSPC); - } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) { - retval = xfs_dir_leaf_addname(&args); - } else { - retval = xfs_dir_node_addname(&args); - } - return(retval); -} - -/* - * Generic handler routine to remove a name from a directory. - * Transitions directory from Btree to shortform as necessary. - */ -static int /* error */ -xfs_dir_removename(xfs_trans_t *trans, xfs_inode_t *dp, char *name, - int namelen, xfs_ino_t ino, xfs_fsblock_t *firstblock, - xfs_bmap_free_t *flist, xfs_extlen_t total) -{ - xfs_da_args_t args; - int count, totallen, newsize, retval; - - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - XFS_STATS_INC(xs_dir_remove); - /* - * Fill in the arg structure for this request. - */ - args.name = name; - args.namelen = namelen; - args.hashval = xfs_da_hashname(name, namelen); - args.inumber = ino; - args.dp = dp; - args.firstblock = firstblock; - args.flist = flist; - args.total = total; - args.whichfork = XFS_DATA_FORK; - args.trans = trans; - args.justcheck = args.addname = args.oknoent = 0; - - /* - * Decide on what work routines to call based on the inode size. - */ - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - retval = xfs_dir_shortform_removename(&args); - } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) { - retval = xfs_dir_leaf_removename(&args, &count, &totallen); - if (retval == 0) { - newsize = XFS_DIR_SF_ALLFIT(count, totallen); - if (newsize <= XFS_IFORK_DSIZE(dp)) { - retval = xfs_dir_leaf_to_shortform(&args); - } - } - } else { - retval = xfs_dir_node_removename(&args); - } - return(retval); -} - -static int /* error */ -xfs_dir_lookup(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen, - xfs_ino_t *inum) -{ - xfs_da_args_t args; - int retval; - - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - - XFS_STATS_INC(xs_dir_lookup); - /* - * Fill in the arg structure for this request. - */ - args.name = name; - args.namelen = namelen; - args.hashval = xfs_da_hashname(name, namelen); - args.inumber = 0; - args.dp = dp; - args.firstblock = NULL; - args.flist = NULL; - args.total = 0; - args.whichfork = XFS_DATA_FORK; - args.trans = trans; - args.justcheck = args.addname = 0; - args.oknoent = 1; - - /* - * Decide on what work routines to call based on the inode size. - */ - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - retval = xfs_dir_shortform_lookup(&args); - } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) { - retval = xfs_dir_leaf_lookup(&args); - } else { - retval = xfs_dir_node_lookup(&args); - } - if (retval == EEXIST) - retval = 0; - *inum = args.inumber; - return(retval); -} - -/* - * Implement readdir. - */ -static int /* error */ -xfs_dir_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio, int *eofp) -{ - xfs_dirent_t *dbp; - int alignment, retval; - xfs_dir_put_t put; - - XFS_STATS_INC(xs_dir_getdents); - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - - /* - * If our caller has given us a single contiguous memory buffer, - * just work directly within that buffer. If it's in user memory, - * lock it down first. - */ - alignment = sizeof(xfs_off_t) - 1; - if ((uio->uio_iovcnt == 1) && - (((__psint_t)uio->uio_iov[0].iov_base & alignment) == 0) && - ((uio->uio_iov[0].iov_len & alignment) == 0)) { - dbp = NULL; - put = xfs_dir_put_dirent64_direct; - } else { - dbp = kmem_alloc(sizeof(*dbp) + MAXNAMELEN, KM_SLEEP); - put = xfs_dir_put_dirent64_uio; - } - - /* - * Decide on what work routines to call based on the inode size. - */ - *eofp = 0; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - retval = xfs_dir_shortform_getdents(dp, uio, eofp, dbp, put); - } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) { - retval = xfs_dir_leaf_getdents(trans, dp, uio, eofp, dbp, put); - } else { - retval = xfs_dir_node_getdents(trans, dp, uio, eofp, dbp, put); - } - if (dbp != NULL) - kmem_free(dbp, sizeof(*dbp) + MAXNAMELEN); - - return(retval); -} - -static int /* error */ -xfs_dir_replace(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen, - xfs_ino_t inum, xfs_fsblock_t *firstblock, - xfs_bmap_free_t *flist, xfs_extlen_t total) -{ - xfs_da_args_t args; - int retval; - - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - - if ((retval = xfs_dir_ino_validate(trans->t_mountp, inum))) - return retval; - - /* - * Fill in the arg structure for this request. - */ - args.name = name; - args.namelen = namelen; - args.hashval = xfs_da_hashname(name, namelen); - args.inumber = inum; - args.dp = dp; - args.firstblock = firstblock; - args.flist = flist; - args.total = total; - args.whichfork = XFS_DATA_FORK; - args.trans = trans; - args.justcheck = args.addname = args.oknoent = 0; - - /* - * Decide on what work routines to call based on the inode size. - */ - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - retval = xfs_dir_shortform_replace(&args); - } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) { - retval = xfs_dir_leaf_replace(&args); - } else { - retval = xfs_dir_node_replace(&args); - } - - return(retval); -} - -static int -xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, xfs_dinode_t *dp) -{ - xfs_ino_t ino; - int namelen_sum; - int count; - xfs_dir_shortform_t *sf; - xfs_dir_sf_entry_t *sfe; - int i; - - - - if ((INT_GET(dp->di_core.di_mode, ARCH_CONVERT) & S_IFMT) != S_IFDIR) { - return 0; - } - if (INT_GET(dp->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_LOCAL) { - return 0; - } - if (INT_GET(dp->di_core.di_size, ARCH_CONVERT) < sizeof(sf->hdr)) { - xfs_fs_cmn_err(CE_WARN, mp, "Invalid shortform size: dp 0x%p", - dp); - return 1; - } - sf = (xfs_dir_shortform_t *)(&dp->di_u.di_dirsf); - ino = XFS_GET_DIR_INO8(sf->hdr.parent); - if (xfs_dir_ino_validate(mp, ino)) - return 1; - - count = sf->hdr.count; - if ((count < 0) || ((count * 10) > XFS_LITINO(mp))) { - xfs_fs_cmn_err(CE_WARN, mp, - "Invalid shortform count: dp 0x%p", dp); - return(1); - } - - if (count == 0) { - return 0; - } - - namelen_sum = 0; - sfe = &sf->list[0]; - for (i = sf->hdr.count - 1; i >= 0; i--) { - ino = XFS_GET_DIR_INO8(sfe->inumber); - xfs_dir_ino_validate(mp, ino); - if (sfe->namelen >= XFS_LITINO(mp)) { - xfs_fs_cmn_err(CE_WARN, mp, - "Invalid shortform namelen: dp 0x%p", dp); - return 1; - } - namelen_sum += sfe->namelen; - sfe = XFS_DIR_SF_NEXTENTRY(sfe); - } - if (namelen_sum >= XFS_LITINO(mp)) { - xfs_fs_cmn_err(CE_WARN, mp, - "Invalid shortform namelen: dp 0x%p", dp); - return 1; - } - - return 0; -} - -/*======================================================================== - * External routines when dirsize == XFS_LBSIZE(dp->i_mount). - *========================================================================*/ - -/* - * Add a name to the leaf directory structure - * This is the external routine. - */ -int -xfs_dir_leaf_addname(xfs_da_args_t *args) -{ - int index, retval; - xfs_dabuf_t *bp; - - retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp, - XFS_DATA_FORK); - if (retval) - return(retval); - ASSERT(bp != NULL); - - retval = xfs_dir_leaf_lookup_int(bp, args, &index); - if (retval == ENOENT) - retval = xfs_dir_leaf_add(bp, args, index); - xfs_da_buf_done(bp); - return(retval); -} - -/* - * Remove a name from the leaf directory structure - * This is the external routine. - */ -STATIC int -xfs_dir_leaf_removename(xfs_da_args_t *args, int *count, int *totallen) -{ - xfs_dir_leafblock_t *leaf; - int index, retval; - xfs_dabuf_t *bp; - - retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp, - XFS_DATA_FORK); - if (retval) - return(retval); - ASSERT(bp != NULL); - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - retval = xfs_dir_leaf_lookup_int(bp, args, &index); - if (retval == EEXIST) { - (void)xfs_dir_leaf_remove(args->trans, bp, index); - *count = INT_GET(leaf->hdr.count, ARCH_CONVERT); - *totallen = INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); - retval = 0; - } - xfs_da_buf_done(bp); - return(retval); -} - -/* - * Look up a name in a leaf directory structure. - * This is the external routine. - */ -STATIC int -xfs_dir_leaf_lookup(xfs_da_args_t *args) -{ - int index, retval; - xfs_dabuf_t *bp; - - retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp, - XFS_DATA_FORK); - if (retval) - return(retval); - ASSERT(bp != NULL); - retval = xfs_dir_leaf_lookup_int(bp, args, &index); - xfs_da_brelse(args->trans, bp); - return(retval); -} - -/* - * Copy out directory entries for getdents(), for leaf directories. - */ -STATIC int -xfs_dir_leaf_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio, - int *eofp, xfs_dirent_t *dbp, xfs_dir_put_t put) -{ - xfs_dabuf_t *bp; - int retval, eob; - - retval = xfs_da_read_buf(dp->i_transp, dp, 0, -1, &bp, XFS_DATA_FORK); - if (retval) - return(retval); - ASSERT(bp != NULL); - retval = xfs_dir_leaf_getdents_int(bp, dp, 0, uio, &eob, dbp, put, -1); - xfs_da_brelse(trans, bp); - *eofp = (eob == 0); - return(retval); -} - -/* - * Look up a name in a leaf directory structure, replace the inode number. - * This is the external routine. - */ -STATIC int -xfs_dir_leaf_replace(xfs_da_args_t *args) -{ - int index, retval; - xfs_dabuf_t *bp; - xfs_ino_t inum; - xfs_dir_leafblock_t *leaf; - xfs_dir_leaf_entry_t *entry; - xfs_dir_leaf_name_t *namest; - - inum = args->inumber; - retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp, - XFS_DATA_FORK); - if (retval) - return(retval); - ASSERT(bp != NULL); - retval = xfs_dir_leaf_lookup_int(bp, args, &index); - if (retval == EEXIST) { - leaf = bp->data; - entry = &leaf->entries[index]; - namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); - /* XXX - replace assert? */ - XFS_DIR_SF_PUT_DIRINO(&inum, &namest->inumber); - xfs_da_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber))); - xfs_da_buf_done(bp); - retval = 0; - } else - xfs_da_brelse(args->trans, bp); - return(retval); -} - - -/*======================================================================== - * External routines when dirsize > XFS_LBSIZE(mp). - *========================================================================*/ - -/* - * Add a name to a Btree-format directory. - * - * This will involve walking down the Btree, and may involve splitting - * leaf nodes and even splitting intermediate nodes up to and including - * the root node (a special case of an intermediate node). - */ -STATIC int -xfs_dir_node_addname(xfs_da_args_t *args) -{ - xfs_da_state_t *state; - xfs_da_state_blk_t *blk; - int retval, error; - - /* - * Fill in bucket of arguments/results/context to carry around. - */ - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_dir_node_ents; - - /* - * Search to see if name already exists, and get back a pointer - * to where it should go. - */ - error = xfs_da_node_lookup_int(state, &retval); - if (error) - retval = error; - if (retval != ENOENT) - goto error; - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC); - retval = xfs_dir_leaf_add(blk->bp, args, blk->index); - if (retval == 0) { - /* - * Addition succeeded, update Btree hashvals. - */ - if (!args->justcheck) - xfs_da_fixhashpath(state, &state->path); - } else { - /* - * Addition failed, split as many Btree elements as required. - */ - if (args->total == 0) { - ASSERT(retval == ENOSPC); - goto error; - } - retval = xfs_da_split(state); - } -error: - xfs_da_state_free(state); - - return(retval); -} - -/* - * Remove a name from a B-tree directory. - * - * This will involve walking down the Btree, and may involve joining - * leaf nodes and even joining intermediate nodes up to and including - * the root node (a special case of an intermediate node). - */ -STATIC int -xfs_dir_node_removename(xfs_da_args_t *args) -{ - xfs_da_state_t *state; - xfs_da_state_blk_t *blk; - int retval, error; - - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_dir_node_ents; - - /* - * Search to see if name exists, and get back a pointer to it. - */ - error = xfs_da_node_lookup_int(state, &retval); - if (error) - retval = error; - if (retval != EEXIST) { - xfs_da_state_free(state); - return(retval); - } - - /* - * Remove the name and update the hashvals in the tree. - */ - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC); - retval = xfs_dir_leaf_remove(args->trans, blk->bp, blk->index); - xfs_da_fixhashpath(state, &state->path); - - /* - * Check to see if the tree needs to be collapsed. - */ - error = 0; - if (retval) { - error = xfs_da_join(state); - } - - xfs_da_state_free(state); - if (error) - return(error); - return(0); -} - -/* - * Look up a filename in a int directory. - * Use an internal routine to actually do all the work. - */ -STATIC int -xfs_dir_node_lookup(xfs_da_args_t *args) -{ - xfs_da_state_t *state; - int retval, error, i; - - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_dir_node_ents; - - /* - * Search to see if name exists, - * and get back a pointer to it. - */ - error = xfs_da_node_lookup_int(state, &retval); - if (error) { - retval = error; - } - - /* - * If not in a transaction, we have to release all the buffers. - */ - for (i = 0; i < state->path.active; i++) { - xfs_da_brelse(args->trans, state->path.blk[i].bp); - state->path.blk[i].bp = NULL; - } - - xfs_da_state_free(state); - return(retval); -} - -STATIC int -xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio, - int *eofp, xfs_dirent_t *dbp, xfs_dir_put_t put) -{ - xfs_da_intnode_t *node; - xfs_da_node_entry_t *btree; - xfs_dir_leafblock_t *leaf = NULL; - xfs_dablk_t bno, nextbno; - xfs_dahash_t cookhash; - xfs_mount_t *mp; - int error, eob, i; - xfs_dabuf_t *bp; - xfs_daddr_t nextda; - - /* - * Pick up our context. - */ - mp = dp->i_mount; - bp = NULL; - bno = XFS_DA_COOKIE_BNO(mp, uio->uio_offset); - cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset); - - xfs_dir_trace_g_du("node: start", dp, uio); - - /* - * Re-find our place, even if we're confused about what our place is. - * - * First we check the block number from the magic cookie, it is a - * cache of where we ended last time. If we find a leaf block, and - * the starting hashval in that block is less than our desired - * hashval, then we run with it. - */ - if (bno > 0) { - error = xfs_da_read_buf(trans, dp, bno, -2, &bp, XFS_DATA_FORK); - if ((error != 0) && (error != EFSCORRUPTED)) - return(error); - if (bp) - leaf = bp->data; - if (bp && be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC) { - xfs_dir_trace_g_dub("node: block not a leaf", - dp, uio, bno); - xfs_da_brelse(trans, bp); - bp = NULL; - } - if (bp && INT_GET(leaf->entries[0].hashval, ARCH_CONVERT) > cookhash) { - xfs_dir_trace_g_dub("node: leaf hash too large", - dp, uio, bno); - xfs_da_brelse(trans, bp); - bp = NULL; - } - if (bp && - cookhash > INT_GET(leaf->entries[INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT)) { - xfs_dir_trace_g_dub("node: leaf hash too small", - dp, uio, bno); - xfs_da_brelse(trans, bp); - bp = NULL; - } - } - - /* - * If we did not find a leaf block from the blockno in the cookie, - * or we there was no blockno in the cookie (eg: first time thru), - * the we start at the top of the Btree and re-find our hashval. - */ - if (bp == NULL) { - xfs_dir_trace_g_du("node: start at root" , dp, uio); - bno = 0; - for (;;) { - error = xfs_da_read_buf(trans, dp, bno, -1, &bp, - XFS_DATA_FORK); - if (error) - return(error); - if (bp == NULL) - return(XFS_ERROR(EFSCORRUPTED)); - node = bp->data; - if (be16_to_cpu(node->hdr.info.magic) != XFS_DA_NODE_MAGIC) - break; - btree = &node->btree[0]; - xfs_dir_trace_g_dun("node: node detail", dp, uio, node); - for (i = 0; i < be16_to_cpu(node->hdr.count); btree++, i++) { - if (be32_to_cpu(btree->hashval) >= cookhash) { - bno = be32_to_cpu(btree->before); - break; - } - } - if (i == be16_to_cpu(node->hdr.count)) { - xfs_da_brelse(trans, bp); - xfs_dir_trace_g_du("node: hash beyond EOF", - dp, uio); - uio->uio_offset = XFS_DA_MAKE_COOKIE(mp, 0, 0, - XFS_DA_MAXHASH); - *eofp = 1; - return(0); - } - xfs_dir_trace_g_dub("node: going to block", - dp, uio, bno); - xfs_da_brelse(trans, bp); - } - } - ASSERT(cookhash != XFS_DA_MAXHASH); - - /* - * We've dropped down to the (first) leaf block that contains the - * hashval we are interested in. Continue rolling upward thru the - * leaf blocks until we fill up our buffer. - */ - for (;;) { - leaf = bp->data; - if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC)) { - xfs_dir_trace_g_dul("node: not a leaf", dp, uio, leaf); - xfs_da_brelse(trans, bp); - XFS_CORRUPTION_ERROR("xfs_dir_node_getdents(1)", - XFS_ERRLEVEL_LOW, mp, leaf); - return XFS_ERROR(EFSCORRUPTED); - } - xfs_dir_trace_g_dul("node: leaf detail", dp, uio, leaf); - if ((nextbno = be32_to_cpu(leaf->hdr.info.forw))) { - nextda = xfs_da_reada_buf(trans, dp, nextbno, - XFS_DATA_FORK); - } else - nextda = -1; - error = xfs_dir_leaf_getdents_int(bp, dp, bno, uio, &eob, dbp, - put, nextda); - xfs_da_brelse(trans, bp); - bno = nextbno; - if (eob) { - xfs_dir_trace_g_dub("node: E-O-B", dp, uio, bno); - *eofp = 0; - return(error); - } - if (bno == 0) - break; - error = xfs_da_read_buf(trans, dp, bno, nextda, &bp, - XFS_DATA_FORK); - if (error) - return(error); - if (unlikely(bp == NULL)) { - XFS_ERROR_REPORT("xfs_dir_node_getdents(2)", - XFS_ERRLEVEL_LOW, mp); - return(XFS_ERROR(EFSCORRUPTED)); - } - } - *eofp = 1; - xfs_dir_trace_g_du("node: E-O-F", dp, uio); - return(0); -} - -/* - * Look up a filename in an int directory, replace the inode number. - * Use an internal routine to actually do the lookup. - */ -STATIC int -xfs_dir_node_replace(xfs_da_args_t *args) -{ - xfs_da_state_t *state; - xfs_da_state_blk_t *blk; - xfs_dir_leafblock_t *leaf; - xfs_dir_leaf_entry_t *entry; - xfs_dir_leaf_name_t *namest; - xfs_ino_t inum; - int retval, error, i; - xfs_dabuf_t *bp; - - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_dir_node_ents; - inum = args->inumber; - - /* - * Search to see if name exists, - * and get back a pointer to it. - */ - error = xfs_da_node_lookup_int(state, &retval); - if (error) { - retval = error; - } - - if (retval == EEXIST) { - blk = &state->path.blk[state->path.active - 1]; - ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC); - bp = blk->bp; - leaf = bp->data; - entry = &leaf->entries[blk->index]; - namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); - /* XXX - replace assert ? */ - XFS_DIR_SF_PUT_DIRINO(&inum, &namest->inumber); - xfs_da_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber))); - xfs_da_buf_done(bp); - blk->bp = NULL; - retval = 0; - } else { - i = state->path.active - 1; - xfs_da_brelse(args->trans, state->path.blk[i].bp); - state->path.blk[i].bp = NULL; - } - for (i = 0; i < state->path.active - 1; i++) { - xfs_da_brelse(args->trans, state->path.blk[i].bp); - state->path.blk[i].bp = NULL; - } - - xfs_da_state_free(state); - return(retval); -} - -#if defined(XFS_DIR_TRACE) -/* - * Add a trace buffer entry for an inode and a uio. - */ -void -xfs_dir_trace_g_du(char *where, xfs_inode_t *dp, uio_t *uio) -{ - xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DU, where, - (void *)dp, (void *)dp->i_mount, - (void *)((unsigned long)(uio->uio_offset >> 32)), - (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)), - (void *)(unsigned long)uio->uio_resid, - NULL, NULL, NULL, NULL, NULL, NULL, NULL); -} - -/* - * Add a trace buffer entry for an inode and a uio. - */ -void -xfs_dir_trace_g_dub(char *where, xfs_inode_t *dp, uio_t *uio, xfs_dablk_t bno) -{ - xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUB, where, - (void *)dp, (void *)dp->i_mount, - (void *)((unsigned long)(uio->uio_offset >> 32)), - (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)), - (void *)(unsigned long)uio->uio_resid, - (void *)(unsigned long)bno, - NULL, NULL, NULL, NULL, NULL, NULL); -} - -/* - * Add a trace buffer entry for an inode and a uio. - */ -void -xfs_dir_trace_g_dun(char *where, xfs_inode_t *dp, uio_t *uio, - xfs_da_intnode_t *node) -{ - int last = be16_to_cpu(node->hdr.count) - 1; - - xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUN, where, - (void *)dp, (void *)dp->i_mount, - (void *)((unsigned long)(uio->uio_offset >> 32)), - (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)), - (void *)(unsigned long)uio->uio_resid, - (void *)(unsigned long)be32_to_cpu(node->hdr.info.forw), - (void *)(unsigned long) - be16_to_cpu(node->hdr.count), - (void *)(unsigned long) - be32_to_cpu(node->btree[0].hashval), - (void *)(unsigned long) - be32_to_cpu(node->btree[last].hashval), - NULL, NULL, NULL); -} - -/* - * Add a trace buffer entry for an inode and a uio. - */ -void -xfs_dir_trace_g_dul(char *where, xfs_inode_t *dp, uio_t *uio, - xfs_dir_leafblock_t *leaf) -{ - int last = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1; - - xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUL, where, - (void *)dp, (void *)dp->i_mount, - (void *)((unsigned long)(uio->uio_offset >> 32)), - (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)), - (void *)(unsigned long)uio->uio_resid, - (void *)(unsigned long)be32_to_cpu(leaf->hdr.info.forw), - (void *)(unsigned long) - INT_GET(leaf->hdr.count, ARCH_CONVERT), - (void *)(unsigned long) - INT_GET(leaf->entries[0].hashval, ARCH_CONVERT), - (void *)(unsigned long) - INT_GET(leaf->entries[last].hashval, ARCH_CONVERT), - NULL, NULL, NULL); -} - -/* - * Add a trace buffer entry for an inode and a uio. - */ -void -xfs_dir_trace_g_due(char *where, xfs_inode_t *dp, uio_t *uio, - xfs_dir_leaf_entry_t *entry) -{ - xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUE, where, - (void *)dp, (void *)dp->i_mount, - (void *)((unsigned long)(uio->uio_offset >> 32)), - (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)), - (void *)(unsigned long)uio->uio_resid, - (void *)(unsigned long) - INT_GET(entry->hashval, ARCH_CONVERT), - NULL, NULL, NULL, NULL, NULL, NULL); -} - -/* - * Add a trace buffer entry for an inode and a uio. - */ -void -xfs_dir_trace_g_duc(char *where, xfs_inode_t *dp, uio_t *uio, xfs_off_t cookie) -{ - xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUC, where, - (void *)dp, (void *)dp->i_mount, - (void *)((unsigned long)(uio->uio_offset >> 32)), - (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)), - (void *)(unsigned long)uio->uio_resid, - (void *)((unsigned long)(cookie >> 32)), - (void *)((unsigned long)(cookie & 0xFFFFFFFF)), - NULL, NULL, NULL, NULL, NULL); -} - -/* - * Add a trace buffer entry for the arguments given to the routine, - * generic form. - */ -void -xfs_dir_trace_enter(int type, char *where, - void * a0, void * a1, - void * a2, void * a3, - void * a4, void * a5, - void * a6, void * a7, - void * a8, void * a9, - void * a10, void * a11) -{ - ASSERT(xfs_dir_trace_buf); - ktrace_enter(xfs_dir_trace_buf, (void *)(unsigned long)type, - (void *)where, - (void *)a0, (void *)a1, (void *)a2, - (void *)a3, (void *)a4, (void *)a5, - (void *)a6, (void *)a7, (void *)a8, - (void *)a9, (void *)a10, (void *)a11, - NULL, NULL); -} -#endif /* XFS_DIR_TRACE */ diff --git a/fs/xfs/xfs_dir.h b/fs/xfs/xfs_dir.h deleted file mode 100644 index 8cc8afb9f6c0..000000000000 --- a/fs/xfs/xfs_dir.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR_H__ -#define __XFS_DIR_H__ - -/* - * Large directories are structured around Btrees where all the data - * elements are in the leaf nodes. Filenames are hashed into an int, - * then that int is used as the index into the Btree. Since the hashval - * of a filename may not be unique, we may have duplicate keys. The - * internal links in the Btree are logical block offsets into the file. - * - * Small directories use a different format and are packed as tightly - * as possible so as to fit into the literal area of the inode. - */ - -/*======================================================================== - * Function prototypes for the kernel. - *========================================================================*/ - -struct uio; -struct xfs_bmap_free; -struct xfs_da_args; -struct xfs_dinode; -struct xfs_inode; -struct xfs_mount; -struct xfs_trans; - -/* - * Directory function types. - * Put in structures (xfs_dirops_t) for v1 and v2 directories. - */ -typedef void (*xfs_dir_mount_t)(struct xfs_mount *mp); -typedef int (*xfs_dir_isempty_t)(struct xfs_inode *dp); -typedef int (*xfs_dir_init_t)(struct xfs_trans *tp, - struct xfs_inode *dp, - struct xfs_inode *pdp); -typedef int (*xfs_dir_createname_t)(struct xfs_trans *tp, - struct xfs_inode *dp, - char *name, - int namelen, - xfs_ino_t inum, - xfs_fsblock_t *first, - struct xfs_bmap_free *flist, - xfs_extlen_t total); -typedef int (*xfs_dir_lookup_t)(struct xfs_trans *tp, - struct xfs_inode *dp, - char *name, - int namelen, - xfs_ino_t *inum); -typedef int (*xfs_dir_removename_t)(struct xfs_trans *tp, - struct xfs_inode *dp, - char *name, - int namelen, - xfs_ino_t ino, - xfs_fsblock_t *first, - struct xfs_bmap_free *flist, - xfs_extlen_t total); -typedef int (*xfs_dir_getdents_t)(struct xfs_trans *tp, - struct xfs_inode *dp, - struct uio *uio, - int *eofp); -typedef int (*xfs_dir_replace_t)(struct xfs_trans *tp, - struct xfs_inode *dp, - char *name, - int namelen, - xfs_ino_t inum, - xfs_fsblock_t *first, - struct xfs_bmap_free *flist, - xfs_extlen_t total); -typedef int (*xfs_dir_canenter_t)(struct xfs_trans *tp, - struct xfs_inode *dp, - char *name, - int namelen); -typedef int (*xfs_dir_shortform_validate_ondisk_t)(struct xfs_mount *mp, - struct xfs_dinode *dip); -typedef int (*xfs_dir_shortform_to_single_t)(struct xfs_da_args *args); - -typedef struct xfs_dirops { - xfs_dir_mount_t xd_mount; - xfs_dir_isempty_t xd_isempty; - xfs_dir_init_t xd_init; - xfs_dir_createname_t xd_createname; - xfs_dir_lookup_t xd_lookup; - xfs_dir_removename_t xd_removename; - xfs_dir_getdents_t xd_getdents; - xfs_dir_replace_t xd_replace; - xfs_dir_canenter_t xd_canenter; - xfs_dir_shortform_validate_ondisk_t xd_shortform_validate_ondisk; - xfs_dir_shortform_to_single_t xd_shortform_to_single; -} xfs_dirops_t; - -/* - * Overall external interface routines. - */ -void xfs_dir_startup(void); /* called exactly once */ - -#define XFS_DIR_MOUNT(mp) \ - ((mp)->m_dirops.xd_mount(mp)) -#define XFS_DIR_ISEMPTY(mp,dp) \ - ((mp)->m_dirops.xd_isempty(dp)) -#define XFS_DIR_INIT(mp,tp,dp,pdp) \ - ((mp)->m_dirops.xd_init(tp,dp,pdp)) -#define XFS_DIR_CREATENAME(mp,tp,dp,name,namelen,inum,first,flist,total) \ - ((mp)->m_dirops.xd_createname(tp,dp,name,namelen,inum,first,flist,\ - total)) -#define XFS_DIR_LOOKUP(mp,tp,dp,name,namelen,inum) \ - ((mp)->m_dirops.xd_lookup(tp,dp,name,namelen,inum)) -#define XFS_DIR_REMOVENAME(mp,tp,dp,name,namelen,ino,first,flist,total) \ - ((mp)->m_dirops.xd_removename(tp,dp,name,namelen,ino,first,flist,total)) -#define XFS_DIR_GETDENTS(mp,tp,dp,uio,eofp) \ - ((mp)->m_dirops.xd_getdents(tp,dp,uio,eofp)) -#define XFS_DIR_REPLACE(mp,tp,dp,name,namelen,inum,first,flist,total) \ - ((mp)->m_dirops.xd_replace(tp,dp,name,namelen,inum,first,flist,total)) -#define XFS_DIR_CANENTER(mp,tp,dp,name,namelen) \ - ((mp)->m_dirops.xd_canenter(tp,dp,name,namelen)) -#define XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp,dip) \ - ((mp)->m_dirops.xd_shortform_validate_ondisk(mp,dip)) -#define XFS_DIR_SHORTFORM_TO_SINGLE(mp,args) \ - ((mp)->m_dirops.xd_shortform_to_single(args)) - -#define XFS_DIR_IS_V1(mp) ((mp)->m_dirversion == 1) -#define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2) -extern xfs_dirops_t xfsv1_dirops; -extern xfs_dirops_t xfsv2_dirops; - -#endif /* __XFS_DIR_H__ */ diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 022c8398ab62..8edbe1adb95b 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -24,21 +24,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" -#include "xfs_dir_leaf.h" #include "xfs_dir2_data.h" #include "xfs_dir2_leaf.h" #include "xfs_dir2_block.h" @@ -46,69 +43,14 @@ #include "xfs_dir2_trace.h" #include "xfs_error.h" -/* - * Declarations for interface routines. - */ -static void xfs_dir2_mount(xfs_mount_t *mp); -static int xfs_dir2_isempty(xfs_inode_t *dp); -static int xfs_dir2_init(xfs_trans_t *tp, xfs_inode_t *dp, - xfs_inode_t *pdp); -static int xfs_dir2_createname(xfs_trans_t *tp, xfs_inode_t *dp, - char *name, int namelen, xfs_ino_t inum, - xfs_fsblock_t *first, - xfs_bmap_free_t *flist, xfs_extlen_t total); -static int xfs_dir2_lookup(xfs_trans_t *tp, xfs_inode_t *dp, char *name, - int namelen, xfs_ino_t *inum); -static int xfs_dir2_removename(xfs_trans_t *tp, xfs_inode_t *dp, - char *name, int namelen, xfs_ino_t ino, - xfs_fsblock_t *first, - xfs_bmap_free_t *flist, xfs_extlen_t total); -static int xfs_dir2_getdents(xfs_trans_t *tp, xfs_inode_t *dp, uio_t *uio, - int *eofp); -static int xfs_dir2_replace(xfs_trans_t *tp, xfs_inode_t *dp, char *name, - int namelen, xfs_ino_t inum, - xfs_fsblock_t *first, xfs_bmap_free_t *flist, - xfs_extlen_t total); -static int xfs_dir2_canenter(xfs_trans_t *tp, xfs_inode_t *dp, char *name, - int namelen); -static int xfs_dir2_shortform_validate_ondisk(xfs_mount_t *mp, - xfs_dinode_t *dip); - -/* - * Utility routine declarations. - */ static int xfs_dir2_put_dirent64_direct(xfs_dir2_put_args_t *pa); static int xfs_dir2_put_dirent64_uio(xfs_dir2_put_args_t *pa); -/* - * Directory operations vector. - */ -xfs_dirops_t xfsv2_dirops = { - .xd_mount = xfs_dir2_mount, - .xd_isempty = xfs_dir2_isempty, - .xd_init = xfs_dir2_init, - .xd_createname = xfs_dir2_createname, - .xd_lookup = xfs_dir2_lookup, - .xd_removename = xfs_dir2_removename, - .xd_getdents = xfs_dir2_getdents, - .xd_replace = xfs_dir2_replace, - .xd_canenter = xfs_dir2_canenter, - .xd_shortform_validate_ondisk = xfs_dir2_shortform_validate_ondisk, - .xd_shortform_to_single = xfs_dir2_sf_to_block, -}; - -/* - * Interface routines. - */ - -/* - * Initialize directory-related fields in the mount structure. - */ -static void -xfs_dir2_mount( - xfs_mount_t *mp) /* filesystem mount point */ +void +xfs_dir_mount( + xfs_mount_t *mp) { - mp->m_dirversion = 2; + ASSERT(XFS_SB_VERSION_HASDIRV2(&mp->m_sb)); ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= XFS_MAX_BLOCKSIZE); mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); @@ -128,19 +70,15 @@ xfs_dir2_mount( /* * Return 1 if directory contains only "." and "..". */ -static int /* return code */ -xfs_dir2_isempty( - xfs_inode_t *dp) /* incore inode structure */ +int +xfs_dir_isempty( + xfs_inode_t *dp) { - xfs_dir2_sf_t *sfp; /* shortform directory structure */ + xfs_dir2_sf_t *sfp; ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - /* - * Might happen during shutdown. - */ - if (dp->i_d.di_size == 0) { + if (dp->i_d.di_size == 0) /* might happen during shutdown. */ return 1; - } if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) return 0; sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; @@ -148,53 +86,83 @@ xfs_dir2_isempty( } /* + * Validate a given inode number. + */ +int +xfs_dir_ino_validate( + xfs_mount_t *mp, + xfs_ino_t ino) +{ + xfs_agblock_t agblkno; + xfs_agino_t agino; + xfs_agnumber_t agno; + int ino_ok; + int ioff; + + agno = XFS_INO_TO_AGNO(mp, ino); + agblkno = XFS_INO_TO_AGBNO(mp, ino); + ioff = XFS_INO_TO_OFFSET(mp, ino); + agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff); + ino_ok = + agno < mp->m_sb.sb_agcount && + agblkno < mp->m_sb.sb_agblocks && + agblkno != 0 && + ioff < (1 << mp->m_sb.sb_inopblog) && + XFS_AGINO_TO_INO(mp, agno, agino) == ino; + if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, + XFS_RANDOM_DIR_INO_VALIDATE))) { + xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx", + (unsigned long long) ino); + XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); + return XFS_ERROR(EFSCORRUPTED); + } + return 0; +} + +/* * Initialize a directory with its "." and ".." entries. */ -static int /* error */ -xfs_dir2_init( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *dp, /* incore directory inode */ - xfs_inode_t *pdp) /* incore parent directory inode */ +int +xfs_dir_init( + xfs_trans_t *tp, + xfs_inode_t *dp, + xfs_inode_t *pdp) { - xfs_da_args_t args; /* operation arguments */ - int error; /* error return value */ + xfs_da_args_t args; + int error; memset((char *)&args, 0, sizeof(args)); args.dp = dp; args.trans = tp; ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) { + if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) return error; - } return xfs_dir2_sf_create(&args, pdp->i_ino); } /* Enter a name in a directory. */ -static int /* error */ -xfs_dir2_createname( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *dp, /* incore directory inode */ - char *name, /* new entry name */ - int namelen, /* new entry name length */ +int +xfs_dir_createname( + xfs_trans_t *tp, + xfs_inode_t *dp, + char *name, + int namelen, xfs_ino_t inum, /* new entry inode number */ xfs_fsblock_t *first, /* bmap's firstblock */ xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_extlen_t total) /* bmap's total block count */ { - xfs_da_args_t args; /* operation arguments */ - int rval; /* return value */ + xfs_da_args_t args; + int rval; int v; /* type-checking value */ ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) { + if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) return rval; - } XFS_STATS_INC(xs_dir_create); - /* - * Fill in the arg structure for this request. - */ + args.name = name; args.namelen = namelen; args.hashval = xfs_da_hashname(name, namelen); @@ -207,18 +175,16 @@ xfs_dir2_createname( args.trans = tp; args.justcheck = 0; args.addname = args.oknoent = 1; - /* - * Decide on what work routines to call based on the inode size. - */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_addname(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { + else if ((rval = xfs_dir2_isblock(tp, dp, &v))) return rval; - } else if (v) + else if (v) rval = xfs_dir2_block_addname(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) { + else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) return rval; - } else if (v) + else if (v) rval = xfs_dir2_leaf_addname(&args); else rval = xfs_dir2_node_addname(&args); @@ -228,24 +194,21 @@ xfs_dir2_createname( /* * Lookup a name in a directory, give back the inode number. */ -static int /* error */ -xfs_dir2_lookup( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *dp, /* incore directory inode */ - char *name, /* lookup name */ - int namelen, /* lookup name length */ +int +xfs_dir_lookup( + xfs_trans_t *tp, + xfs_inode_t *dp, + char *name, + int namelen, xfs_ino_t *inum) /* out: inode number */ { - xfs_da_args_t args; /* operation arguments */ - int rval; /* return value */ + xfs_da_args_t args; + int rval; int v; /* type-checking value */ ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); XFS_STATS_INC(xs_dir_lookup); - /* - * Fill in the arg structure for this request. - */ args.name = name; args.namelen = namelen; args.hashval = xfs_da_hashname(name, namelen); @@ -258,18 +221,16 @@ xfs_dir2_lookup( args.trans = tp; args.justcheck = args.addname = 0; args.oknoent = 1; - /* - * Decide on what work routines to call based on the inode size. - */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_lookup(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { + else if ((rval = xfs_dir2_isblock(tp, dp, &v))) return rval; - } else if (v) + else if (v) rval = xfs_dir2_block_lookup(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) { + else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) return rval; - } else if (v) + else if (v) rval = xfs_dir2_leaf_lookup(&args); else rval = xfs_dir2_node_lookup(&args); @@ -283,26 +244,24 @@ xfs_dir2_lookup( /* * Remove an entry from a directory. */ -static int /* error */ -xfs_dir2_removename( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *dp, /* incore directory inode */ - char *name, /* name of entry to remove */ - int namelen, /* name length of entry to remove */ - xfs_ino_t ino, /* inode number of entry to remove */ +int +xfs_dir_removename( + xfs_trans_t *tp, + xfs_inode_t *dp, + char *name, + int namelen, + xfs_ino_t ino, xfs_fsblock_t *first, /* bmap's firstblock */ xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_extlen_t total) /* bmap's total block count */ { - xfs_da_args_t args; /* operation arguments */ - int rval; /* return value */ + xfs_da_args_t args; + int rval; int v; /* type-checking value */ ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); XFS_STATS_INC(xs_dir_remove); - /* - * Fill in the arg structure for this request. - */ + args.name = name; args.namelen = namelen; args.hashval = xfs_da_hashname(name, namelen); @@ -314,18 +273,16 @@ xfs_dir2_removename( args.whichfork = XFS_DATA_FORK; args.trans = tp; args.justcheck = args.addname = args.oknoent = 0; - /* - * Decide on what work routines to call based on the inode size. - */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_removename(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { + else if ((rval = xfs_dir2_isblock(tp, dp, &v))) return rval; - } else if (v) + else if (v) rval = xfs_dir2_block_removename(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) { + else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) return rval; - } else if (v) + else if (v) rval = xfs_dir2_leaf_removename(&args); else rval = xfs_dir2_node_removename(&args); @@ -335,10 +292,10 @@ xfs_dir2_removename( /* * Read a directory. */ -static int /* error */ -xfs_dir2_getdents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *dp, /* incore directory inode */ +int +xfs_dir_getdents( + xfs_trans_t *tp, + xfs_inode_t *dp, uio_t *uio, /* caller's buffer control */ int *eofp) /* out: eof reached */ { @@ -367,14 +324,11 @@ xfs_dir2_getdents( } *eofp = 0; - /* - * Decide on what work routines to call based on the inode size. - */ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_getdents(dp, uio, eofp, dbp, put); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { + else if ((rval = xfs_dir2_isblock(tp, dp, &v))) ; - } else if (v) + else if (v) rval = xfs_dir2_block_getdents(tp, dp, uio, eofp, dbp, put); else rval = xfs_dir2_leaf_getdents(tp, dp, uio, eofp, dbp, put); @@ -386,29 +340,26 @@ xfs_dir2_getdents( /* * Replace the inode number of a directory entry. */ -static int /* error */ -xfs_dir2_replace( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *dp, /* incore directory inode */ +int +xfs_dir_replace( + xfs_trans_t *tp, + xfs_inode_t *dp, char *name, /* name of entry to replace */ - int namelen, /* name length of entry to replace */ + int namelen, xfs_ino_t inum, /* new inode number */ xfs_fsblock_t *first, /* bmap's firstblock */ xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_extlen_t total) /* bmap's total block count */ { - xfs_da_args_t args; /* operation arguments */ - int rval; /* return value */ + xfs_da_args_t args; + int rval; int v; /* type-checking value */ ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) { + if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) return rval; - } - /* - * Fill in the arg structure for this request. - */ + args.name = name; args.namelen = namelen; args.hashval = xfs_da_hashname(name, namelen); @@ -420,18 +371,16 @@ xfs_dir2_replace( args.whichfork = XFS_DATA_FORK; args.trans = tp; args.justcheck = args.addname = args.oknoent = 0; - /* - * Decide on what work routines to call based on the inode size. - */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_replace(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { + else if ((rval = xfs_dir2_isblock(tp, dp, &v))) return rval; - } else if (v) + else if (v) rval = xfs_dir2_block_replace(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) { + else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) return rval; - } else if (v) + else if (v) rval = xfs_dir2_leaf_replace(&args); else rval = xfs_dir2_node_replace(&args); @@ -441,21 +390,19 @@ xfs_dir2_replace( /* * See if this entry can be added to the directory without allocating space. */ -static int /* error */ -xfs_dir2_canenter( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *dp, /* incore directory inode */ +int +xfs_dir_canenter( + xfs_trans_t *tp, + xfs_inode_t *dp, char *name, /* name of entry to add */ - int namelen) /* name length of entry to add */ + int namelen) { - xfs_da_args_t args; /* operation arguments */ - int rval; /* return value */ + xfs_da_args_t args; + int rval; int v; /* type-checking value */ ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - /* - * Fill in the arg structure for this request. - */ + args.name = name; args.namelen = namelen; args.hashval = xfs_da_hashname(name, namelen); @@ -467,18 +414,16 @@ xfs_dir2_canenter( args.whichfork = XFS_DATA_FORK; args.trans = tp; args.justcheck = args.addname = args.oknoent = 1; - /* - * Decide on what work routines to call based on the inode size. - */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_addname(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { + else if ((rval = xfs_dir2_isblock(tp, dp, &v))) return rval; - } else if (v) + else if (v) rval = xfs_dir2_block_addname(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) { + else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) return rval; - } else if (v) + else if (v) rval = xfs_dir2_leaf_addname(&args); else rval = xfs_dir2_node_addname(&args); @@ -486,19 +431,6 @@ xfs_dir2_canenter( } /* - * Dummy routine for shortform inode validation. - * Can't really do this. - */ -/* ARGSUSED */ -static int /* error */ -xfs_dir2_shortform_validate_ondisk( - xfs_mount_t *mp, /* filesystem mount point */ - xfs_dinode_t *dip) /* ondisk inode */ -{ - return 0; -} - -/* * Utility routines. */ @@ -507,24 +439,24 @@ xfs_dir2_shortform_validate_ondisk( * This routine is for data and free blocks, not leaf/node blocks * which are handled by xfs_da_grow_inode. */ -int /* error */ +int xfs_dir2_grow_inode( - xfs_da_args_t *args, /* operation arguments */ + xfs_da_args_t *args, int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ xfs_dir2_db_t *dbp) /* out: block number added */ { xfs_fileoff_t bno; /* directory offset of new block */ int count; /* count of filesystem blocks */ xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return value */ + int error; int got; /* blocks actually mapped */ - int i; /* temp mapping index */ + int i; xfs_bmbt_irec_t map; /* single structure for bmap */ int mapi; /* mapping index */ xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */ - xfs_mount_t *mp; /* filesystem mount point */ + xfs_mount_t *mp; int nmap; /* number of bmap entries */ - xfs_trans_t *tp; /* transaction pointer */ + xfs_trans_t *tp; xfs_dir2_trace_args_s("grow_inode", args, space); dp = args->dp; @@ -538,9 +470,8 @@ xfs_dir2_grow_inode( /* * Find the first hole for our block. */ - if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK))) { + if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK))) return error; - } nmap = 1; ASSERT(args->firstblock != NULL); /* @@ -549,13 +480,9 @@ xfs_dir2_grow_inode( if ((error = xfs_bmapi(tp, dp, bno, count, XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, args->firstblock, args->total, &map, &nmap, - args->flist))) { + args->flist, NULL))) return error; - } ASSERT(nmap <= 1); - /* - * Got it in 1. - */ if (nmap == 1) { mapp = ↦ mapi = 1; @@ -585,7 +512,8 @@ xfs_dir2_grow_inode( if ((error = xfs_bmapi(tp, dp, b, c, XFS_BMAPI_WRITE|XFS_BMAPI_METADATA, args->firstblock, args->total, - &mapp[mapi], &nmap, args->flist))) { + &mapp[mapi], &nmap, args->flist, + NULL))) { kmem_free(mapp, sizeof(*mapp) * count); return error; } @@ -645,20 +573,19 @@ xfs_dir2_grow_inode( /* * See if the directory is a single-block form directory. */ -int /* error */ +int xfs_dir2_isblock( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *dp, /* incore directory inode */ + xfs_trans_t *tp, + xfs_inode_t *dp, int *vp) /* out: 1 is block, 0 is not block */ { xfs_fileoff_t last; /* last file offset */ - xfs_mount_t *mp; /* filesystem mount point */ - int rval; /* return value */ + xfs_mount_t *mp; + int rval; mp = dp->i_mount; - if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) { + if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) return rval; - } rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize; ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize); *vp = rval; @@ -668,20 +595,19 @@ xfs_dir2_isblock( /* * See if the directory is a single-leaf form directory. */ -int /* error */ +int xfs_dir2_isleaf( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *dp, /* incore directory inode */ + xfs_trans_t *tp, + xfs_inode_t *dp, int *vp) /* out: 1 is leaf, 0 is not leaf */ { xfs_fileoff_t last; /* last file offset */ - xfs_mount_t *mp; /* filesystem mount point */ - int rval; /* return value */ + xfs_mount_t *mp; + int rval; mp = dp->i_mount; - if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) { + if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) return rval; - } *vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog); return 0; } @@ -689,9 +615,9 @@ xfs_dir2_isleaf( /* * Getdents put routine for 64-bit ABI, direct form. */ -static int /* error */ +static int xfs_dir2_put_dirent64_direct( - xfs_dir2_put_args_t *pa) /* argument bundle */ + xfs_dir2_put_args_t *pa) { xfs_dirent_t *idbp; /* dirent pointer */ iovec_t *iovp; /* io vector */ @@ -726,9 +652,9 @@ xfs_dir2_put_dirent64_direct( /* * Getdents put routine for 64-bit ABI, uio form. */ -static int /* error */ +static int xfs_dir2_put_dirent64_uio( - xfs_dir2_put_args_t *pa) /* argument bundle */ + xfs_dir2_put_args_t *pa) { xfs_dirent_t *idbp; /* dirent pointer */ int namelen; /* entry name length */ @@ -764,17 +690,17 @@ xfs_dir2_put_dirent64_uio( */ int xfs_dir2_shrink_inode( - xfs_da_args_t *args, /* operation arguments */ - xfs_dir2_db_t db, /* directory block number */ - xfs_dabuf_t *bp) /* block's buffer */ + xfs_da_args_t *args, + xfs_dir2_db_t db, + xfs_dabuf_t *bp) { xfs_fileoff_t bno; /* directory file offset */ xfs_dablk_t da; /* directory file offset */ int done; /* bunmap is finished */ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return value */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_trans_t *tp; /* transaction pointer */ + xfs_inode_t *dp; + int error; + xfs_mount_t *mp; + xfs_trans_t *tp; xfs_dir2_trace_args_db("shrink_inode", args, db, bp); dp = args->dp; @@ -786,7 +712,7 @@ xfs_dir2_shrink_inode( */ if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs, XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, - &done))) { + NULL, &done))) { /* * ENOSPC actually can happen if we're in a removename with * no space reservation, and the resulting block removal diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h index 7dd364b1e038..86560b6f794c 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/xfs_dir2.h @@ -22,7 +22,9 @@ struct uio; struct xfs_dabuf; struct xfs_da_args; struct xfs_dir2_put_args; +struct xfs_bmap_free; struct xfs_inode; +struct xfs_mount; struct xfs_trans; /* @@ -73,7 +75,35 @@ typedef struct xfs_dir2_put_args { } xfs_dir2_put_args_t; /* - * Other interfaces used by the rest of the dir v2 code. + * Generic directory interface routines + */ +extern void xfs_dir_startup(void); +extern void xfs_dir_mount(struct xfs_mount *mp); +extern int xfs_dir_isempty(struct xfs_inode *dp); +extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_inode *pdp); +extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, + char *name, int namelen, xfs_ino_t inum, + xfs_fsblock_t *first, + struct xfs_bmap_free *flist, xfs_extlen_t tot); +extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, + char *name, int namelen, xfs_ino_t *inum); +extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, + char *name, int namelen, xfs_ino_t ino, + xfs_fsblock_t *first, + struct xfs_bmap_free *flist, xfs_extlen_t tot); +extern int xfs_dir_getdents(struct xfs_trans *tp, struct xfs_inode *dp, + uio_t *uio, int *eofp); +extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, + char *name, int namelen, xfs_ino_t inum, + xfs_fsblock_t *first, + struct xfs_bmap_free *flist, xfs_extlen_t tot); +extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, + char *name, int namelen); +extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); + +/* + * Utility routines for v2 directories. */ extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, xfs_dir2_db_t *dbp); diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 972ded595476..9d7438bba30d 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -22,19 +22,16 @@ #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_dir_leaf.h" #include "xfs_dir2_data.h" #include "xfs_dir2_leaf.h" #include "xfs_dir2_block.h" @@ -51,6 +48,18 @@ static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **bpp, int *entno); static int xfs_dir2_block_sort(const void *a, const void *b); +static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot; + +/* + * One-time startup routine called from xfs_init(). + */ +void +xfs_dir_startup(void) +{ + xfs_dir_hash_dot = xfs_da_hashname(".", 1); + xfs_dir_hash_dotdot = xfs_da_hashname("..", 2); +} + /* * Add an entry to a block directory. */ @@ -400,7 +409,7 @@ xfs_dir2_block_addname( /* * Create the new data entry. */ - INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); + dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, args->namelen); tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); @@ -508,7 +517,7 @@ xfs_dir2_block_getdents( p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, ptr - (char *)block); - p.ino = INT_GET(dep->inumber, ARCH_CONVERT); + p.ino = be64_to_cpu(dep->inumber); #if XFS_BIG_INUMS p.ino += mp->m_inoadd; #endif @@ -626,7 +635,7 @@ xfs_dir2_block_lookup( /* * Fill in inode number, release the block. */ - args->inumber = INT_GET(dep->inumber, ARCH_CONVERT); + args->inumber = be64_to_cpu(dep->inumber); xfs_da_brelse(args->trans, bp); return XFS_ERROR(EEXIST); } @@ -844,11 +853,11 @@ xfs_dir2_block_replace( */ dep = (xfs_dir2_data_entry_t *) ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address))); - ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) != args->inumber); + ASSERT(be64_to_cpu(dep->inumber) != args->inumber); /* * Change the inode number to the new value. */ - INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); + dep->inumber = cpu_to_be64(args->inumber); xfs_dir2_data_log_entry(args->trans, bp, dep); xfs_dir2_data_check(dp, bp); xfs_da_buf_done(bp); @@ -1130,7 +1139,7 @@ xfs_dir2_sf_to_block( */ dep = (xfs_dir2_data_entry_t *) ((char *)block + XFS_DIR2_DATA_DOT_OFFSET); - INT_SET(dep->inumber, ARCH_CONVERT, dp->i_ino); + dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); @@ -1144,7 +1153,7 @@ xfs_dir2_sf_to_block( */ dep = (xfs_dir2_data_entry_t *) ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); - INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent)); + dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); @@ -1193,7 +1202,7 @@ xfs_dir2_sf_to_block( * Copy a real entry. */ dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); - INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER(sfp, + dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep))); dep->namelen = sfep->namelen; memcpy(dep->name, sfep->name, dep->namelen); diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index bb3d03ff002b..f7c799217072 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -22,18 +22,15 @@ #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_dir_leaf.h" #include "xfs_dir2_data.h" #include "xfs_dir2_leaf.h" #include "xfs_dir2_block.h" @@ -133,7 +130,7 @@ xfs_dir2_data_check( */ dep = (xfs_dir2_data_entry_t *)p; ASSERT(dep->namelen != 0); - ASSERT(xfs_dir_ino_validate(mp, INT_GET(dep->inumber, ARCH_CONVERT)) == 0); + ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0); ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) == (char *)dep - (char *)d); count++; diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h index 0847cbb53e17..a6ae2d21c40a 100644 --- a/fs/xfs/xfs_dir2_data.h +++ b/fs/xfs/xfs_dir2_data.h @@ -85,11 +85,11 @@ typedef struct xfs_dir2_data_hdr { * Tag appears as the last 2 bytes. */ typedef struct xfs_dir2_data_entry { - xfs_ino_t inumber; /* inode number */ - __uint8_t namelen; /* name length */ - __uint8_t name[1]; /* name bytes, no null */ + __be64 inumber; /* inode number */ + __u8 namelen; /* name length */ + __u8 name[1]; /* name bytes, no null */ /* variable offset */ - xfs_dir2_data_off_t tag; /* starting offset of us */ + __be16 tag; /* starting offset of us */ } xfs_dir2_data_entry_t; /* diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 0f5e2f2ce6ec..b1cf1fbf423d 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_attr_sf.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" @@ -407,7 +405,7 @@ xfs_dir2_leaf_addname( * Initialize our new entry (at last). */ dep = (xfs_dir2_data_entry_t *)dup; - INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); + dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); @@ -884,7 +882,7 @@ xfs_dir2_leaf_getdents( XFS_DIR2_BYTE_TO_DA(mp, XFS_DIR2_LEAF_OFFSET) - map_off, XFS_BMAPI_METADATA, NULL, 0, - &map[map_valid], &nmap, NULL); + &map[map_valid], &nmap, NULL, NULL); /* * Don't know if we should ignore this or * try to return an error. @@ -1098,7 +1096,7 @@ xfs_dir2_leaf_getdents( p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length); - p->ino = INT_GET(dep->inumber, ARCH_CONVERT); + p->ino = be64_to_cpu(dep->inumber); #if XFS_BIG_INUMS p->ino += mp->m_inoadd; #endif @@ -1319,7 +1317,7 @@ xfs_dir2_leaf_lookup( /* * Return the found inode number. */ - args->inumber = INT_GET(dep->inumber, ARCH_CONVERT); + args->inumber = be64_to_cpu(dep->inumber); xfs_da_brelse(tp, dbp); xfs_da_brelse(tp, lbp); return XFS_ERROR(EEXIST); @@ -1606,11 +1604,11 @@ xfs_dir2_leaf_replace( dep = (xfs_dir2_data_entry_t *) ((char *)dbp->data + XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address))); - ASSERT(args->inumber != INT_GET(dep->inumber, ARCH_CONVERT)); + ASSERT(args->inumber != be64_to_cpu(dep->inumber)); /* * Put the new inode number in, log it. */ - INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); + dep->inumber = cpu_to_be64(args->inumber); tp = args->trans; xfs_dir2_data_log_entry(tp, dbp, dep); xfs_da_buf_done(dbp); diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index ac511ab9c52d..9ca71719b683 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -22,13 +22,11 @@ #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -505,7 +503,6 @@ xfs_dir2_leafn_lookup_int( XFS_DATA_FORK))) { return error; } - curfdb = newfdb; free = curbp->data; ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); @@ -527,8 +524,11 @@ xfs_dir2_leafn_lookup_int( if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) { XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", XFS_ERRLEVEL_LOW, mp); + if (curfdb != newfdb) + xfs_da_brelse(tp, curbp); return XFS_ERROR(EFSCORRUPTED); } + curfdb = newfdb; if (be16_to_cpu(free->bests[fi]) >= length) { *indexp = index; state->extravalid = 1; @@ -580,7 +580,7 @@ xfs_dir2_leafn_lookup_int( if (dep->namelen == args->namelen && dep->name[0] == args->name[0] && memcmp(dep->name, args->name, args->namelen) == 0) { - args->inumber = INT_GET(dep->inumber, ARCH_CONVERT); + args->inumber = be64_to_cpu(dep->inumber); *indexp = index; state->extravalid = 1; state->extrablk.bp = curbp; @@ -970,7 +970,7 @@ xfs_dir2_leafn_remove( /* * One less used entry in the free table. */ - free->hdr.nused = cpu_to_be32(-1); + be32_add(&free->hdr.nused, -1); xfs_dir2_free_log_header(tp, fbp); /* * If this was the last entry in the table, we can @@ -1695,7 +1695,7 @@ xfs_dir2_node_addname_int( * Fill in the new entry and log it. */ dep = (xfs_dir2_data_entry_t *)dup; - INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); + dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); @@ -1905,11 +1905,11 @@ xfs_dir2_node_replace( dep = (xfs_dir2_data_entry_t *) ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address))); - ASSERT(inum != INT_GET(dep->inumber, ARCH_CONVERT)); + ASSERT(inum != be64_to_cpu(dep->inumber)); /* * Fill in the new inode number and log the entry. */ - INT_SET(dep->inumber, ARCH_CONVERT, inum); + dep->inumber = cpu_to_be64(inum); xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep); rval = 0; } diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index d98a41d1fe63..0cd77b17bf92 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -22,19 +22,16 @@ #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_dir_leaf.h" #include "xfs_error.h" #include "xfs_dir2_data.h" #include "xfs_dir2_leaf.h" @@ -117,13 +114,13 @@ xfs_dir2_block_sfsize( dep->name[0] == '.' && dep->name[1] == '.'; #if XFS_BIG_INUMS if (!isdot) - i8count += INT_GET(dep->inumber, ARCH_CONVERT) > XFS_DIR2_MAX_SHORT_INUM; + i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM; #endif if (!isdot && !isdotdot) { count++; namelen += dep->namelen; } else if (isdotdot) - parent = INT_GET(dep->inumber, ARCH_CONVERT); + parent = be64_to_cpu(dep->inumber); /* * Calculate the new size, see if we should give up yet. */ @@ -229,13 +226,13 @@ xfs_dir2_block_to_sf( * Skip . */ if (dep->namelen == 1 && dep->name[0] == '.') - ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) == dp->i_ino); + ASSERT(be64_to_cpu(dep->inumber) == dp->i_ino); /* * Skip .., but make sure the inode number is right. */ else if (dep->namelen == 2 && dep->name[0] == '.' && dep->name[1] == '.') - ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) == + ASSERT(be64_to_cpu(dep->inumber) == XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent)); /* * Normal entry, copy it into shortform. @@ -246,7 +243,7 @@ xfs_dir2_block_to_sf( (xfs_dir2_data_aoff_t) ((char *)dep - (char *)block)); memcpy(sfep->name, dep->name, dep->namelen); - temp=INT_GET(dep->inumber, ARCH_CONVERT); + temp = be64_to_cpu(dep->inumber); XFS_DIR2_SF_PUT_INUMBER(sfp, &temp, XFS_DIR2_SF_INUMBERP(sfep)); sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c index c626943b4112..f3fb2ffd6f5c 100644 --- a/fs/xfs/xfs_dir2_trace.c +++ b/fs/xfs/xfs_dir2_trace.c @@ -19,11 +19,9 @@ #include "xfs_fs.h" #include "xfs_types.h" #include "xfs_inum.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c deleted file mode 100644 index 6d711869262f..000000000000 --- a/fs/xfs/xfs_dir_leaf.c +++ /dev/null @@ -1,2213 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_dir.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" -#include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_alloc.h" -#include "xfs_btree.h" -#include "xfs_bmap.h" -#include "xfs_dir_leaf.h" -#include "xfs_error.h" - -/* - * xfs_dir_leaf.c - * - * Routines to implement leaf blocks of directories as Btrees of hashed names. - */ - -/*======================================================================== - * Function prototypes for the kernel. - *========================================================================*/ - -/* - * Routines used for growing the Btree. - */ -STATIC void xfs_dir_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args, - int insertion_index, - int freemap_index); -STATIC int xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer, - int musthave, int justcheck); -STATIC void xfs_dir_leaf_rebalance(xfs_da_state_t *state, - xfs_da_state_blk_t *blk1, - xfs_da_state_blk_t *blk2); -STATIC int xfs_dir_leaf_figure_balance(xfs_da_state_t *state, - xfs_da_state_blk_t *leaf_blk_1, - xfs_da_state_blk_t *leaf_blk_2, - int *number_entries_in_blk1, - int *number_namebytes_in_blk1); - -STATIC int xfs_dir_leaf_create(struct xfs_da_args *args, - xfs_dablk_t which_block, - struct xfs_dabuf **bpp); - -/* - * Utility routines. - */ -STATIC void xfs_dir_leaf_moveents(xfs_dir_leafblock_t *src_leaf, - int src_start, - xfs_dir_leafblock_t *dst_leaf, - int dst_start, int move_count, - xfs_mount_t *mp); - - -/*======================================================================== - * External routines when dirsize < XFS_IFORK_DSIZE(dp). - *========================================================================*/ - - -/* - * Validate a given inode number. - */ -int -xfs_dir_ino_validate(xfs_mount_t *mp, xfs_ino_t ino) -{ - xfs_agblock_t agblkno; - xfs_agino_t agino; - xfs_agnumber_t agno; - int ino_ok; - int ioff; - - agno = XFS_INO_TO_AGNO(mp, ino); - agblkno = XFS_INO_TO_AGBNO(mp, ino); - ioff = XFS_INO_TO_OFFSET(mp, ino); - agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff); - ino_ok = - agno < mp->m_sb.sb_agcount && - agblkno < mp->m_sb.sb_agblocks && - agblkno != 0 && - ioff < (1 << mp->m_sb.sb_inopblog) && - XFS_AGINO_TO_INO(mp, agno, agino) == ino; - if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, - XFS_RANDOM_DIR_INO_VALIDATE))) { - xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx", - (unsigned long long) ino); - XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); - } - return 0; -} - -/* - * Create the initial contents of a shortform directory. - */ -int -xfs_dir_shortform_create(xfs_da_args_t *args, xfs_ino_t parent) -{ - xfs_dir_sf_hdr_t *hdr; - xfs_inode_t *dp; - - dp = args->dp; - ASSERT(dp != NULL); - ASSERT(dp->i_d.di_size == 0); - if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) { - dp->i_df.if_flags &= ~XFS_IFEXTENTS; /* just in case */ - dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); - dp->i_df.if_flags |= XFS_IFINLINE; - } - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - ASSERT(dp->i_df.if_bytes == 0); - xfs_idata_realloc(dp, sizeof(*hdr), XFS_DATA_FORK); - hdr = (xfs_dir_sf_hdr_t *)dp->i_df.if_u1.if_data; - XFS_DIR_SF_PUT_DIRINO(&parent, &hdr->parent); - - hdr->count = 0; - dp->i_d.di_size = sizeof(*hdr); - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); - return 0; -} - -/* - * Add a name to the shortform directory structure. - * Overflow from the inode has already been checked for. - */ -int -xfs_dir_shortform_addname(xfs_da_args_t *args) -{ - xfs_dir_shortform_t *sf; - xfs_dir_sf_entry_t *sfe; - int i, offset, size; - xfs_inode_t *dp; - - dp = args->dp; - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Catch the case where the conversion from shortform to leaf - * failed part way through. - */ - if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); - sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data; - sfe = &sf->list[0]; - for (i = sf->hdr.count-1; i >= 0; i--) { - if (sfe->namelen == args->namelen && - args->name[0] == sfe->name[0] && - memcmp(args->name, sfe->name, args->namelen) == 0) - return XFS_ERROR(EEXIST); - sfe = XFS_DIR_SF_NEXTENTRY(sfe); - } - - offset = (int)((char *)sfe - (char *)sf); - size = XFS_DIR_SF_ENTSIZE_BYNAME(args->namelen); - xfs_idata_realloc(dp, size, XFS_DATA_FORK); - sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data; - sfe = (xfs_dir_sf_entry_t *)((char *)sf + offset); - - XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber); - sfe->namelen = args->namelen; - memcpy(sfe->name, args->name, sfe->namelen); - sf->hdr.count++; - - dp->i_d.di_size += size; - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); - - return 0; -} - -/* - * Remove a name from the shortform directory structure. - */ -int -xfs_dir_shortform_removename(xfs_da_args_t *args) -{ - xfs_dir_shortform_t *sf; - xfs_dir_sf_entry_t *sfe; - int base, size = 0, i; - xfs_inode_t *dp; - - dp = args->dp; - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Catch the case where the conversion from shortform to leaf - * failed part way through. - */ - if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); - base = sizeof(xfs_dir_sf_hdr_t); - sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data; - sfe = &sf->list[0]; - for (i = sf->hdr.count-1; i >= 0; i--) { - size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe); - if (sfe->namelen == args->namelen && - sfe->name[0] == args->name[0] && - memcmp(sfe->name, args->name, args->namelen) == 0) - break; - base += size; - sfe = XFS_DIR_SF_NEXTENTRY(sfe); - } - if (i < 0) { - ASSERT(args->oknoent); - return XFS_ERROR(ENOENT); - } - - if ((base + size) != dp->i_d.di_size) { - memmove(&((char *)sf)[base], &((char *)sf)[base+size], - dp->i_d.di_size - (base+size)); - } - sf->hdr.count--; - - xfs_idata_realloc(dp, -size, XFS_DATA_FORK); - dp->i_d.di_size -= size; - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); - - return 0; -} - -/* - * Look up a name in a shortform directory structure. - */ -int -xfs_dir_shortform_lookup(xfs_da_args_t *args) -{ - xfs_dir_shortform_t *sf; - xfs_dir_sf_entry_t *sfe; - int i; - xfs_inode_t *dp; - - dp = args->dp; - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Catch the case where the conversion from shortform to leaf - * failed part way through. - */ - if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); - sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data; - if (args->namelen == 2 && - args->name[0] == '.' && args->name[1] == '.') { - XFS_DIR_SF_GET_DIRINO(&sf->hdr.parent, &args->inumber); - return(XFS_ERROR(EEXIST)); - } - if (args->namelen == 1 && args->name[0] == '.') { - args->inumber = dp->i_ino; - return(XFS_ERROR(EEXIST)); - } - sfe = &sf->list[0]; - for (i = sf->hdr.count-1; i >= 0; i--) { - if (sfe->namelen == args->namelen && - sfe->name[0] == args->name[0] && - memcmp(args->name, sfe->name, args->namelen) == 0) { - XFS_DIR_SF_GET_DIRINO(&sfe->inumber, &args->inumber); - return(XFS_ERROR(EEXIST)); - } - sfe = XFS_DIR_SF_NEXTENTRY(sfe); - } - ASSERT(args->oknoent); - return(XFS_ERROR(ENOENT)); -} - -/* - * Convert from using the shortform to the leaf. - */ -int -xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs) -{ - xfs_inode_t *dp; - xfs_dir_shortform_t *sf; - xfs_dir_sf_entry_t *sfe; - xfs_da_args_t args; - xfs_ino_t inumber; - char *tmpbuffer; - int retval, i, size; - xfs_dablk_t blkno; - xfs_dabuf_t *bp; - - dp = iargs->dp; - /* - * Catch the case where the conversion from shortform to leaf - * failed part way through. - */ - if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); - size = dp->i_df.if_bytes; - tmpbuffer = kmem_alloc(size, KM_SLEEP); - ASSERT(tmpbuffer != NULL); - - memcpy(tmpbuffer, dp->i_df.if_u1.if_data, size); - - sf = (xfs_dir_shortform_t *)tmpbuffer; - XFS_DIR_SF_GET_DIRINO(&sf->hdr.parent, &inumber); - - xfs_idata_realloc(dp, -size, XFS_DATA_FORK); - dp->i_d.di_size = 0; - xfs_trans_log_inode(iargs->trans, dp, XFS_ILOG_CORE); - retval = xfs_da_grow_inode(iargs, &blkno); - if (retval) - goto out; - - ASSERT(blkno == 0); - retval = xfs_dir_leaf_create(iargs, blkno, &bp); - if (retval) - goto out; - xfs_da_buf_done(bp); - - args.name = "."; - args.namelen = 1; - args.hashval = xfs_dir_hash_dot; - args.inumber = dp->i_ino; - args.dp = dp; - args.firstblock = iargs->firstblock; - args.flist = iargs->flist; - args.total = iargs->total; - args.whichfork = XFS_DATA_FORK; - args.trans = iargs->trans; - args.justcheck = 0; - args.addname = args.oknoent = 1; - retval = xfs_dir_leaf_addname(&args); - if (retval) - goto out; - - args.name = ".."; - args.namelen = 2; - args.hashval = xfs_dir_hash_dotdot; - args.inumber = inumber; - retval = xfs_dir_leaf_addname(&args); - if (retval) - goto out; - - sfe = &sf->list[0]; - for (i = 0; i < sf->hdr.count; i++) { - args.name = (char *)(sfe->name); - args.namelen = sfe->namelen; - args.hashval = xfs_da_hashname((char *)(sfe->name), - sfe->namelen); - XFS_DIR_SF_GET_DIRINO(&sfe->inumber, &args.inumber); - retval = xfs_dir_leaf_addname(&args); - if (retval) - goto out; - sfe = XFS_DIR_SF_NEXTENTRY(sfe); - } - retval = 0; - -out: - kmem_free(tmpbuffer, size); - return retval; -} - -STATIC int -xfs_dir_shortform_compare(const void *a, const void *b) -{ - xfs_dir_sf_sort_t *sa, *sb; - - sa = (xfs_dir_sf_sort_t *)a; - sb = (xfs_dir_sf_sort_t *)b; - if (sa->hash < sb->hash) - return -1; - else if (sa->hash > sb->hash) - return 1; - else - return sa->entno - sb->entno; -} - -/* - * Copy out directory entries for getdents(), for shortform directories. - */ -/*ARGSUSED*/ -int -xfs_dir_shortform_getdents(xfs_inode_t *dp, uio_t *uio, int *eofp, - xfs_dirent_t *dbp, xfs_dir_put_t put) -{ - xfs_dir_shortform_t *sf; - xfs_dir_sf_entry_t *sfe; - int retval, i, sbsize, nsbuf, lastresid=0, want_entno; - xfs_mount_t *mp; - xfs_dahash_t cookhash, hash; - xfs_dir_put_args_t p; - xfs_dir_sf_sort_t *sbuf, *sbp; - - mp = dp->i_mount; - sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data; - cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset); - want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset); - nsbuf = sf->hdr.count + 2; - sbsize = (nsbuf + 1) * sizeof(*sbuf); - sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP); - - xfs_dir_trace_g_du("sf: start", dp, uio); - - /* - * Collect all the entries into the buffer. - * Entry 0 is . - */ - sbp->entno = 0; - sbp->seqno = 0; - sbp->hash = xfs_dir_hash_dot; - sbp->ino = dp->i_ino; - sbp->name = "."; - sbp->namelen = 1; - sbp++; - - /* - * Entry 1 is .. - */ - sbp->entno = 1; - sbp->seqno = 0; - sbp->hash = xfs_dir_hash_dotdot; - sbp->ino = XFS_GET_DIR_INO8(sf->hdr.parent); - sbp->name = ".."; - sbp->namelen = 2; - sbp++; - - /* - * Scan the directory data for the rest of the entries. - */ - for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { - - if (unlikely( - ((char *)sfe < (char *)sf) || - ((char *)sfe >= ((char *)sf + dp->i_df.if_bytes)))) { - xfs_dir_trace_g_du("sf: corrupted", dp, uio); - XFS_CORRUPTION_ERROR("xfs_dir_shortform_getdents", - XFS_ERRLEVEL_LOW, mp, sfe); - kmem_free(sbuf, sbsize); - return XFS_ERROR(EFSCORRUPTED); - } - - sbp->entno = i + 2; - sbp->seqno = 0; - sbp->hash = xfs_da_hashname((char *)sfe->name, sfe->namelen); - sbp->ino = XFS_GET_DIR_INO8(sfe->inumber); - sbp->name = (char *)sfe->name; - sbp->namelen = sfe->namelen; - sfe = XFS_DIR_SF_NEXTENTRY(sfe); - sbp++; - } - - /* - * Sort the entries on hash then entno. - */ - xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_dir_shortform_compare); - /* - * Stuff in last entry. - */ - sbp->entno = nsbuf; - sbp->hash = XFS_DA_MAXHASH; - sbp->seqno = 0; - /* - * Figure out the sequence numbers in case there's a hash duplicate. - */ - for (hash = sbuf->hash, sbp = sbuf + 1; - sbp < &sbuf[nsbuf + 1]; sbp++) { - if (sbp->hash == hash) - sbp->seqno = sbp[-1].seqno + 1; - else - hash = sbp->hash; - } - - /* - * Set up put routine. - */ - p.dbp = dbp; - p.put = put; - p.uio = uio; - - /* - * Find our place. - */ - for (sbp = sbuf; sbp < &sbuf[nsbuf + 1]; sbp++) { - if (sbp->hash > cookhash || - (sbp->hash == cookhash && sbp->seqno >= want_entno)) - break; - } - - /* - * Did we fail to find anything? We stop at the last entry, - * the one we put maxhash into. - */ - if (sbp == &sbuf[nsbuf]) { - kmem_free(sbuf, sbsize); - xfs_dir_trace_g_du("sf: hash beyond end", dp, uio); - uio->uio_offset = XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH); - *eofp = 1; - return 0; - } - - /* - * Loop putting entries into the user buffer. - */ - while (sbp < &sbuf[nsbuf]) { - /* - * Save the first resid in a run of equal-hashval entries - * so that we can back them out if they don't all fit. - */ - if (sbp->seqno == 0 || sbp == sbuf) - lastresid = uio->uio_resid; - XFS_PUT_COOKIE(p.cook, mp, 0, sbp[1].seqno, sbp[1].hash); - p.ino = sbp->ino; -#if XFS_BIG_INUMS - p.ino += mp->m_inoadd; -#endif - p.name = sbp->name; - p.namelen = sbp->namelen; - retval = p.put(&p); - if (!p.done) { - uio->uio_offset = - XFS_DA_MAKE_COOKIE(mp, 0, 0, sbp->hash); - kmem_free(sbuf, sbsize); - uio->uio_resid = lastresid; - xfs_dir_trace_g_du("sf: E-O-B", dp, uio); - return retval; - } - sbp++; - } - kmem_free(sbuf, sbsize); - uio->uio_offset = p.cook.o; - *eofp = 1; - xfs_dir_trace_g_du("sf: E-O-F", dp, uio); - return 0; -} - -/* - * Look up a name in a shortform directory structure, replace the inode number. - */ -int -xfs_dir_shortform_replace(xfs_da_args_t *args) -{ - xfs_dir_shortform_t *sf; - xfs_dir_sf_entry_t *sfe; - xfs_inode_t *dp; - int i; - - dp = args->dp; - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Catch the case where the conversion from shortform to leaf - * failed part way through. - */ - if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); - sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data; - if (args->namelen == 2 && - args->name[0] == '.' && args->name[1] == '.') { - /* XXX - replace assert? */ - XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sf->hdr.parent); - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); - return 0; - } - ASSERT(args->namelen != 1 || args->name[0] != '.'); - sfe = &sf->list[0]; - for (i = sf->hdr.count-1; i >= 0; i--) { - if (sfe->namelen == args->namelen && - sfe->name[0] == args->name[0] && - memcmp(args->name, sfe->name, args->namelen) == 0) { - ASSERT(memcmp((char *)&args->inumber, - (char *)&sfe->inumber, sizeof(xfs_ino_t))); - XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber); - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); - return 0; - } - sfe = XFS_DIR_SF_NEXTENTRY(sfe); - } - ASSERT(args->oknoent); - return XFS_ERROR(ENOENT); -} - -/* - * Convert a leaf directory to shortform structure - */ -int -xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs) -{ - xfs_dir_leafblock_t *leaf; - xfs_dir_leaf_hdr_t *hdr; - xfs_dir_leaf_entry_t *entry; - xfs_dir_leaf_name_t *namest; - xfs_da_args_t args; - xfs_inode_t *dp; - xfs_ino_t parent = 0; - char *tmpbuffer; - int retval, i; - xfs_dabuf_t *bp; - - dp = iargs->dp; - tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); - ASSERT(tmpbuffer != NULL); - - retval = xfs_da_read_buf(iargs->trans, iargs->dp, 0, -1, &bp, - XFS_DATA_FORK); - if (retval) - goto out; - ASSERT(bp != NULL); - memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount)); - leaf = (xfs_dir_leafblock_t *)tmpbuffer; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - memset(bp->data, 0, XFS_LBSIZE(dp->i_mount)); - - /* - * Find and special case the parent inode number - */ - hdr = &leaf->hdr; - entry = &leaf->entries[0]; - for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) { - namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); - if ((entry->namelen == 2) && - (namest->name[0] == '.') && - (namest->name[1] == '.')) { - XFS_DIR_SF_GET_DIRINO(&namest->inumber, &parent); - entry->nameidx = 0; - } else if ((entry->namelen == 1) && (namest->name[0] == '.')) { - entry->nameidx = 0; - } - } - retval = xfs_da_shrink_inode(iargs, 0, bp); - if (retval) - goto out; - retval = xfs_dir_shortform_create(iargs, parent); - if (retval) - goto out; - - /* - * Copy the rest of the filenames - */ - entry = &leaf->entries[0]; - args.dp = dp; - args.firstblock = iargs->firstblock; - args.flist = iargs->flist; - args.total = iargs->total; - args.whichfork = XFS_DATA_FORK; - args.trans = iargs->trans; - args.justcheck = 0; - args.addname = args.oknoent = 1; - for (i = 0; i < INT_GET(hdr->count, ARCH_CONVERT); entry++, i++) { - if (!entry->nameidx) - continue; - namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); - args.name = (char *)(namest->name); - args.namelen = entry->namelen; - args.hashval = INT_GET(entry->hashval, ARCH_CONVERT); - XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args.inumber); - xfs_dir_shortform_addname(&args); - } - -out: - kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount)); - return retval; -} - -/* - * Convert from using a single leaf to a root node and a leaf. - */ -int -xfs_dir_leaf_to_node(xfs_da_args_t *args) -{ - xfs_dir_leafblock_t *leaf; - xfs_da_intnode_t *node; - xfs_inode_t *dp; - xfs_dabuf_t *bp1, *bp2; - xfs_dablk_t blkno; - int retval; - - dp = args->dp; - retval = xfs_da_grow_inode(args, &blkno); - ASSERT(blkno == 1); - if (retval) - return retval; - retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1, - XFS_DATA_FORK); - if (retval) - return retval; - ASSERT(bp1 != NULL); - retval = xfs_da_get_buf(args->trans, args->dp, 1, -1, &bp2, - XFS_DATA_FORK); - if (retval) { - xfs_da_buf_done(bp1); - return retval; - } - ASSERT(bp2 != NULL); - memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount)); - xfs_da_buf_done(bp1); - xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1); - - /* - * Set up the new root node. - */ - retval = xfs_da_node_create(args, 0, 1, &bp1, XFS_DATA_FORK); - if (retval) { - xfs_da_buf_done(bp2); - return retval; - } - node = bp1->data; - leaf = bp2->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - node->btree[0].hashval = cpu_to_be32( - INT_GET(leaf->entries[ - INT_GET(leaf->hdr.count, ARCH_CONVERT)-1].hashval, ARCH_CONVERT)); - xfs_da_buf_done(bp2); - node->btree[0].before = cpu_to_be32(blkno); - node->hdr.count = cpu_to_be16(1); - xfs_da_log_buf(args->trans, bp1, - XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0]))); - xfs_da_buf_done(bp1); - - return retval; -} - - -/*======================================================================== - * Routines used for growing the Btree. - *========================================================================*/ - -/* - * Create the initial contents of a leaf directory - * or a leaf in a node directory. - */ -STATIC int -xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) -{ - xfs_dir_leafblock_t *leaf; - xfs_dir_leaf_hdr_t *hdr; - xfs_inode_t *dp; - xfs_dabuf_t *bp; - int retval; - - dp = args->dp; - ASSERT(dp != NULL); - retval = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp, XFS_DATA_FORK); - if (retval) - return retval; - ASSERT(bp != NULL); - leaf = bp->data; - memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount)); - hdr = &leaf->hdr; - hdr->info.magic = cpu_to_be16(XFS_DIR_LEAF_MAGIC); - INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount)); - if (!hdr->firstused) - INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount) - 1); - INT_SET(hdr->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t)); - INT_SET(hdr->freemap[0].size, ARCH_CONVERT, INT_GET(hdr->firstused, ARCH_CONVERT) - INT_GET(hdr->freemap[0].base, ARCH_CONVERT)); - - xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1); - - *bpp = bp; - return 0; -} - -/* - * Split the leaf node, rebalance, then add the new entry. - */ -int -xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, - xfs_da_state_blk_t *newblk) -{ - xfs_dablk_t blkno; - xfs_da_args_t *args; - int error; - - /* - * Allocate space for a new leaf node. - */ - args = state->args; - ASSERT(args != NULL); - ASSERT(oldblk->magic == XFS_DIR_LEAF_MAGIC); - error = xfs_da_grow_inode(args, &blkno); - if (error) - return error; - error = xfs_dir_leaf_create(args, blkno, &newblk->bp); - if (error) - return error; - newblk->blkno = blkno; - newblk->magic = XFS_DIR_LEAF_MAGIC; - - /* - * Rebalance the entries across the two leaves. - */ - xfs_dir_leaf_rebalance(state, oldblk, newblk); - error = xfs_da_blk_link(state, oldblk, newblk); - if (error) - return error; - - /* - * Insert the new entry in the correct block. - */ - if (state->inleaf) { - error = xfs_dir_leaf_add(oldblk->bp, args, oldblk->index); - } else { - error = xfs_dir_leaf_add(newblk->bp, args, newblk->index); - } - - /* - * Update last hashval in each block since we added the name. - */ - oldblk->hashval = xfs_dir_leaf_lasthash(oldblk->bp, NULL); - newblk->hashval = xfs_dir_leaf_lasthash(newblk->bp, NULL); - return error; -} - -/* - * Add a name to the leaf directory structure. - * - * Must take into account fragmented leaves and leaves where spacemap has - * lost some freespace information (ie: holes). - */ -int -xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index) -{ - xfs_dir_leafblock_t *leaf; - xfs_dir_leaf_hdr_t *hdr; - xfs_dir_leaf_map_t *map; - int tablesize, entsize, sum, i, tmp, error; - - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - ASSERT((index >= 0) && (index <= INT_GET(leaf->hdr.count, ARCH_CONVERT))); - hdr = &leaf->hdr; - entsize = XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen); - - /* - * Search through freemap for first-fit on new name length. - * (may need to figure in size of entry struct too) - */ - tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1) * (uint)sizeof(xfs_dir_leaf_entry_t) - + (uint)sizeof(xfs_dir_leaf_hdr_t); - map = &hdr->freemap[XFS_DIR_LEAF_MAPSIZE-1]; - for (sum = 0, i = XFS_DIR_LEAF_MAPSIZE-1; i >= 0; map--, i--) { - if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) { - sum += INT_GET(map->size, ARCH_CONVERT); - continue; - } - if (!map->size) - continue; /* no space in this map */ - tmp = entsize; - if (INT_GET(map->base, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT)) - tmp += (uint)sizeof(xfs_dir_leaf_entry_t); - if (INT_GET(map->size, ARCH_CONVERT) >= tmp) { - if (!args->justcheck) - xfs_dir_leaf_add_work(bp, args, index, i); - return 0; - } - sum += INT_GET(map->size, ARCH_CONVERT); - } - - /* - * If there are no holes in the address space of the block, - * and we don't have enough freespace, then compaction will do us - * no good and we should just give up. - */ - if (!hdr->holes && (sum < entsize)) - return XFS_ERROR(ENOSPC); - - /* - * Compact the entries to coalesce free space. - * Pass the justcheck flag so the checking pass can return - * an error, without changing anything, if it won't fit. - */ - error = xfs_dir_leaf_compact(args->trans, bp, - args->total == 0 ? - entsize + - (uint)sizeof(xfs_dir_leaf_entry_t) : 0, - args->justcheck); - if (error) - return error; - /* - * After compaction, the block is guaranteed to have only one - * free region, in freemap[0]. If it is not big enough, give up. - */ - if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT) < - (entsize + (uint)sizeof(xfs_dir_leaf_entry_t))) - return XFS_ERROR(ENOSPC); - - if (!args->justcheck) - xfs_dir_leaf_add_work(bp, args, index, 0); - return 0; -} - -/* - * Add a name to a leaf directory structure. - */ -STATIC void -xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index, - int mapindex) -{ - xfs_dir_leafblock_t *leaf; - xfs_dir_leaf_hdr_t *hdr; - xfs_dir_leaf_entry_t *entry; - xfs_dir_leaf_name_t *namest; - xfs_dir_leaf_map_t *map; - /* REFERENCED */ - xfs_mount_t *mp; - int tmp, i; - - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - hdr = &leaf->hdr; - ASSERT((mapindex >= 0) && (mapindex < XFS_DIR_LEAF_MAPSIZE)); - ASSERT((index >= 0) && (index <= INT_GET(hdr->count, ARCH_CONVERT))); - - /* - * Force open some space in the entry array and fill it in. - */ - entry = &leaf->entries[index]; - if (index < INT_GET(hdr->count, ARCH_CONVERT)) { - tmp = INT_GET(hdr->count, ARCH_CONVERT) - index; - tmp *= (uint)sizeof(xfs_dir_leaf_entry_t); - memmove(entry + 1, entry, tmp); - xfs_da_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry))); - } - INT_MOD(hdr->count, ARCH_CONVERT, +1); - - /* - * Allocate space for the new string (at the end of the run). - */ - map = &hdr->freemap[mapindex]; - mp = args->trans->t_mountp; - ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp)); - ASSERT(INT_GET(map->size, ARCH_CONVERT) >= XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen)); - ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp)); - INT_MOD(map->size, ARCH_CONVERT, -(XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen))); - INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)); - INT_SET(entry->hashval, ARCH_CONVERT, args->hashval); - entry->namelen = args->namelen; - xfs_da_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); - - /* - * Copy the string and inode number into the new space. - */ - namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); - XFS_DIR_SF_PUT_DIRINO(&args->inumber, &namest->inumber); - memcpy(namest->name, args->name, args->namelen); - xfs_da_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, namest, XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry))); - - /* - * Update the control info for this leaf node - */ - if (INT_GET(entry->nameidx, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT)) - INT_COPY(hdr->firstused, entry->nameidx, ARCH_CONVERT); - ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr))); - tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1) * (uint)sizeof(xfs_dir_leaf_entry_t) - + (uint)sizeof(xfs_dir_leaf_hdr_t); - map = &hdr->freemap[0]; - for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) { - if (INT_GET(map->base, ARCH_CONVERT) == tmp) { - INT_MOD(map->base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t)); - INT_MOD(map->size, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t))); - } - } - INT_MOD(hdr->namebytes, ARCH_CONVERT, args->namelen); - xfs_da_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); -} - -/* - * Garbage collect a leaf directory block by copying it to a new buffer. - */ -STATIC int -xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave, - int justcheck) -{ - xfs_dir_leafblock_t *leaf_s, *leaf_d; - xfs_dir_leaf_hdr_t *hdr_s, *hdr_d; - xfs_mount_t *mp; - char *tmpbuffer; - char *tmpbuffer2=NULL; - int rval; - int lbsize; - - mp = trans->t_mountp; - lbsize = XFS_LBSIZE(mp); - tmpbuffer = kmem_alloc(lbsize, KM_SLEEP); - ASSERT(tmpbuffer != NULL); - memcpy(tmpbuffer, bp->data, lbsize); - - /* - * Make a second copy in case xfs_dir_leaf_moveents() - * below destroys the original. - */ - if (musthave || justcheck) { - tmpbuffer2 = kmem_alloc(lbsize, KM_SLEEP); - memcpy(tmpbuffer2, bp->data, lbsize); - } - memset(bp->data, 0, lbsize); - - /* - * Copy basic information - */ - leaf_s = (xfs_dir_leafblock_t *)tmpbuffer; - leaf_d = bp->data; - hdr_s = &leaf_s->hdr; - hdr_d = &leaf_d->hdr; - hdr_d->info = hdr_s->info; /* struct copy */ - INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize); - if (!hdr_d->firstused) - INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize - 1); - hdr_d->namebytes = 0; - hdr_d->count = 0; - hdr_d->holes = 0; - INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t)); - INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT)); - - /* - * Copy all entry's in the same (sorted) order, - * but allocate filenames packed and in sequence. - * This changes the source (leaf_s) as well. - */ - xfs_dir_leaf_moveents(leaf_s, 0, leaf_d, 0, (int)INT_GET(hdr_s->count, ARCH_CONVERT), mp); - - if (musthave && INT_GET(hdr_d->freemap[0].size, ARCH_CONVERT) < musthave) - rval = XFS_ERROR(ENOSPC); - else - rval = 0; - - if (justcheck || rval == ENOSPC) { - ASSERT(tmpbuffer2); - memcpy(bp->data, tmpbuffer2, lbsize); - } else { - xfs_da_log_buf(trans, bp, 0, lbsize - 1); - } - - kmem_free(tmpbuffer, lbsize); - if (musthave || justcheck) - kmem_free(tmpbuffer2, lbsize); - return rval; -} - -/* - * Redistribute the directory entries between two leaf nodes, - * taking into account the size of the new entry. - * - * NOTE: if new block is empty, then it will get the upper half of old block. - */ -STATIC void -xfs_dir_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, - xfs_da_state_blk_t *blk2) -{ - xfs_da_state_blk_t *tmp_blk; - xfs_dir_leafblock_t *leaf1, *leaf2; - xfs_dir_leaf_hdr_t *hdr1, *hdr2; - int count, totallen, max, space, swap; - - /* - * Set up environment. - */ - ASSERT(blk1->magic == XFS_DIR_LEAF_MAGIC); - ASSERT(blk2->magic == XFS_DIR_LEAF_MAGIC); - leaf1 = blk1->bp->data; - leaf2 = blk2->bp->data; - ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - - /* - * Check ordering of blocks, reverse if it makes things simpler. - */ - swap = 0; - if (xfs_dir_leaf_order(blk1->bp, blk2->bp)) { - tmp_blk = blk1; - blk1 = blk2; - blk2 = tmp_blk; - leaf1 = blk1->bp->data; - leaf2 = blk2->bp->data; - swap = 1; - } - hdr1 = &leaf1->hdr; - hdr2 = &leaf2->hdr; - - /* - * Examine entries until we reduce the absolute difference in - * byte usage between the two blocks to a minimum. Then get - * the direction to copy and the number of elements to move. - */ - state->inleaf = xfs_dir_leaf_figure_balance(state, blk1, blk2, - &count, &totallen); - if (swap) - state->inleaf = !state->inleaf; - - /* - * Move any entries required from leaf to leaf: - */ - if (count < INT_GET(hdr1->count, ARCH_CONVERT)) { - /* - * Figure the total bytes to be added to the destination leaf. - */ - count = INT_GET(hdr1->count, ARCH_CONVERT) - count; /* number entries being moved */ - space = INT_GET(hdr1->namebytes, ARCH_CONVERT) - totallen; - space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1); - space += count * (uint)sizeof(xfs_dir_leaf_entry_t); - - /* - * leaf2 is the destination, compact it if it looks tight. - */ - max = INT_GET(hdr2->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t); - max -= INT_GET(hdr2->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t); - if (space > max) { - xfs_dir_leaf_compact(state->args->trans, blk2->bp, - 0, 0); - } - - /* - * Move high entries from leaf1 to low end of leaf2. - */ - xfs_dir_leaf_moveents(leaf1, INT_GET(hdr1->count, ARCH_CONVERT) - count, - leaf2, 0, count, state->mp); - - xfs_da_log_buf(state->args->trans, blk1->bp, 0, - state->blocksize-1); - xfs_da_log_buf(state->args->trans, blk2->bp, 0, - state->blocksize-1); - - } else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) { - /* - * Figure the total bytes to be added to the destination leaf. - */ - count -= INT_GET(hdr1->count, ARCH_CONVERT); /* number entries being moved */ - space = totallen - INT_GET(hdr1->namebytes, ARCH_CONVERT); - space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1); - space += count * (uint)sizeof(xfs_dir_leaf_entry_t); - - /* - * leaf1 is the destination, compact it if it looks tight. - */ - max = INT_GET(hdr1->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t); - max -= INT_GET(hdr1->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t); - if (space > max) { - xfs_dir_leaf_compact(state->args->trans, blk1->bp, - 0, 0); - } - - /* - * Move low entries from leaf2 to high end of leaf1. - */ - xfs_dir_leaf_moveents(leaf2, 0, leaf1, (int)INT_GET(hdr1->count, ARCH_CONVERT), - count, state->mp); - - xfs_da_log_buf(state->args->trans, blk1->bp, 0, - state->blocksize-1); - xfs_da_log_buf(state->args->trans, blk2->bp, 0, - state->blocksize-1); - } - - /* - * Copy out last hashval in each block for B-tree code. - */ - blk1->hashval = INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); - blk2->hashval = INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); - - /* - * Adjust the expected index for insertion. - * GROT: this doesn't work unless blk2 was originally empty. - */ - if (!state->inleaf) { - blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT); - } -} - -/* - * Examine entries until we reduce the absolute difference in - * byte usage between the two blocks to a minimum. - * GROT: Is this really necessary? With other than a 512 byte blocksize, - * GROT: there will always be enough room in either block for a new entry. - * GROT: Do a double-split for this case? - */ -STATIC int -xfs_dir_leaf_figure_balance(xfs_da_state_t *state, - xfs_da_state_blk_t *blk1, - xfs_da_state_blk_t *blk2, - int *countarg, int *namebytesarg) -{ - xfs_dir_leafblock_t *leaf1, *leaf2; - xfs_dir_leaf_hdr_t *hdr1, *hdr2; - xfs_dir_leaf_entry_t *entry; - int count, max, totallen, half; - int lastdelta, foundit, tmp; - - /* - * Set up environment. - */ - leaf1 = blk1->bp->data; - leaf2 = blk2->bp->data; - hdr1 = &leaf1->hdr; - hdr2 = &leaf2->hdr; - foundit = 0; - totallen = 0; - - /* - * Examine entries until we reduce the absolute difference in - * byte usage between the two blocks to a minimum. - */ - max = INT_GET(hdr1->count, ARCH_CONVERT) + INT_GET(hdr2->count, ARCH_CONVERT); - half = (max+1) * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1); - half += INT_GET(hdr1->namebytes, ARCH_CONVERT) + INT_GET(hdr2->namebytes, ARCH_CONVERT) + state->args->namelen; - half /= 2; - lastdelta = state->blocksize; - entry = &leaf1->entries[0]; - for (count = 0; count < max; entry++, count++) { - -#define XFS_DIR_ABS(A) (((A) < 0) ? -(A) : (A)) - /* - * The new entry is in the first block, account for it. - */ - if (count == blk1->index) { - tmp = totallen + (uint)sizeof(*entry) - + XFS_DIR_LEAF_ENTSIZE_BYNAME(state->args->namelen); - if (XFS_DIR_ABS(half - tmp) > lastdelta) - break; - lastdelta = XFS_DIR_ABS(half - tmp); - totallen = tmp; - foundit = 1; - } - - /* - * Wrap around into the second block if necessary. - */ - if (count == INT_GET(hdr1->count, ARCH_CONVERT)) { - leaf1 = leaf2; - entry = &leaf1->entries[0]; - } - - /* - * Figure out if next leaf entry would be too much. - */ - tmp = totallen + (uint)sizeof(*entry) - + XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry); - if (XFS_DIR_ABS(half - tmp) > lastdelta) - break; - lastdelta = XFS_DIR_ABS(half - tmp); - totallen = tmp; -#undef XFS_DIR_ABS - } - - /* - * Calculate the number of namebytes that will end up in lower block. - * If new entry not in lower block, fix up the count. - */ - totallen -= - count * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1); - if (foundit) { - totallen -= (sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1) + - state->args->namelen; - } - - *countarg = count; - *namebytesarg = totallen; - return foundit; -} - -/*======================================================================== - * Routines used for shrinking the Btree. - *========================================================================*/ - -/* - * Check a leaf block and its neighbors to see if the block should be - * collapsed into one or the other neighbor. Always keep the block - * with the smaller block number. - * If the current block is over 50% full, don't try to join it, return 0. - * If the block is empty, fill in the state structure and return 2. - * If it can be collapsed, fill in the state structure and return 1. - * If nothing can be done, return 0. - */ -int -xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action) -{ - xfs_dir_leafblock_t *leaf; - xfs_da_state_blk_t *blk; - xfs_da_blkinfo_t *info; - int count, bytes, forward, error, retval, i; - xfs_dablk_t blkno; - xfs_dabuf_t *bp; - - /* - * Check for the degenerate case of the block being over 50% full. - * If so, it's not worth even looking to see if we might be able - * to coalesce with a sibling. - */ - blk = &state->path.blk[ state->path.active-1 ]; - info = blk->bp->data; - ASSERT(be16_to_cpu(info->magic) == XFS_DIR_LEAF_MAGIC); - leaf = (xfs_dir_leafblock_t *)info; - count = INT_GET(leaf->hdr.count, ARCH_CONVERT); - bytes = (uint)sizeof(xfs_dir_leaf_hdr_t) + - count * (uint)sizeof(xfs_dir_leaf_entry_t) + - count * ((uint)sizeof(xfs_dir_leaf_name_t)-1) + - INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); - if (bytes > (state->blocksize >> 1)) { - *action = 0; /* blk over 50%, don't try to join */ - return 0; - } - - /* - * Check for the degenerate case of the block being empty. - * If the block is empty, we'll simply delete it, no need to - * coalesce it with a sibling block. We choose (arbitrarily) - * to merge with the forward block unless it is NULL. - */ - if (count == 0) { - /* - * Make altpath point to the block we want to keep and - * path point to the block we want to drop (this one). - */ - forward = (info->forw != 0); - memcpy(&state->altpath, &state->path, sizeof(state->path)); - error = xfs_da_path_shift(state, &state->altpath, forward, - 0, &retval); - if (error) - return error; - if (retval) { - *action = 0; - } else { - *action = 2; - } - return 0; - } - - /* - * Examine each sibling block to see if we can coalesce with - * at least 25% free space to spare. We need to figure out - * whether to merge with the forward or the backward block. - * We prefer coalescing with the lower numbered sibling so as - * to shrink a directory over time. - */ - forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back)); /* start with smaller blk num */ - for (i = 0; i < 2; forward = !forward, i++) { - if (forward) - blkno = be32_to_cpu(info->forw); - else - blkno = be32_to_cpu(info->back); - if (blkno == 0) - continue; - error = xfs_da_read_buf(state->args->trans, state->args->dp, - blkno, -1, &bp, - XFS_DATA_FORK); - if (error) - return error; - ASSERT(bp != NULL); - - leaf = (xfs_dir_leafblock_t *)info; - count = INT_GET(leaf->hdr.count, ARCH_CONVERT); - bytes = state->blocksize - (state->blocksize>>2); - bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - count += INT_GET(leaf->hdr.count, ARCH_CONVERT); - bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); - bytes -= count * ((uint)sizeof(xfs_dir_leaf_name_t) - 1); - bytes -= count * (uint)sizeof(xfs_dir_leaf_entry_t); - bytes -= (uint)sizeof(xfs_dir_leaf_hdr_t); - if (bytes >= 0) - break; /* fits with at least 25% to spare */ - - xfs_da_brelse(state->args->trans, bp); - } - if (i >= 2) { - *action = 0; - return 0; - } - xfs_da_buf_done(bp); - - /* - * Make altpath point to the block we want to keep (the lower - * numbered block) and path point to the block we want to drop. - */ - memcpy(&state->altpath, &state->path, sizeof(state->path)); - if (blkno < blk->blkno) { - error = xfs_da_path_shift(state, &state->altpath, forward, - 0, &retval); - } else { - error = xfs_da_path_shift(state, &state->path, forward, - 0, &retval); - } - if (error) - return error; - if (retval) { - *action = 0; - } else { - *action = 1; - } - return 0; -} - -/* - * Remove a name from the leaf directory structure. - * - * Return 1 if leaf is less than 37% full, 0 if >= 37% full. - * If two leaves are 37% full, when combined they will leave 25% free. - */ -int -xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index) -{ - xfs_dir_leafblock_t *leaf; - xfs_dir_leaf_hdr_t *hdr; - xfs_dir_leaf_map_t *map; - xfs_dir_leaf_entry_t *entry; - xfs_dir_leaf_name_t *namest; - int before, after, smallest, entsize; - int tablesize, tmp, i; - xfs_mount_t *mp; - - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - hdr = &leaf->hdr; - mp = trans->t_mountp; - ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0) && (INT_GET(hdr->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8))); - ASSERT((index >= 0) && (index < INT_GET(hdr->count, ARCH_CONVERT))); - ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr))); - entry = &leaf->entries[index]; - ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT)); - ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp)); - - /* - * Scan through free region table: - * check for adjacency of free'd entry with an existing one, - * find smallest free region in case we need to replace it, - * adjust any map that borders the entry table, - */ - tablesize = INT_GET(hdr->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t) - + (uint)sizeof(xfs_dir_leaf_hdr_t); - map = &hdr->freemap[0]; - tmp = INT_GET(map->size, ARCH_CONVERT); - before = after = -1; - smallest = XFS_DIR_LEAF_MAPSIZE - 1; - entsize = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry); - for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) { - ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp)); - ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp)); - if (INT_GET(map->base, ARCH_CONVERT) == tablesize) { - INT_MOD(map->base, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t))); - INT_MOD(map->size, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t)); - } - - if ((INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)) == INT_GET(entry->nameidx, ARCH_CONVERT)) { - before = i; - } else if (INT_GET(map->base, ARCH_CONVERT) == (INT_GET(entry->nameidx, ARCH_CONVERT) + entsize)) { - after = i; - } else if (INT_GET(map->size, ARCH_CONVERT) < tmp) { - tmp = INT_GET(map->size, ARCH_CONVERT); - smallest = i; - } - } - - /* - * Coalesce adjacent freemap regions, - * or replace the smallest region. - */ - if ((before >= 0) || (after >= 0)) { - if ((before >= 0) && (after >= 0)) { - map = &hdr->freemap[before]; - INT_MOD(map->size, ARCH_CONVERT, entsize); - INT_MOD(map->size, ARCH_CONVERT, INT_GET(hdr->freemap[after].size, ARCH_CONVERT)); - hdr->freemap[after].base = 0; - hdr->freemap[after].size = 0; - } else if (before >= 0) { - map = &hdr->freemap[before]; - INT_MOD(map->size, ARCH_CONVERT, entsize); - } else { - map = &hdr->freemap[after]; - INT_COPY(map->base, entry->nameidx, ARCH_CONVERT); - INT_MOD(map->size, ARCH_CONVERT, entsize); - } - } else { - /* - * Replace smallest region (if it is smaller than free'd entry) - */ - map = &hdr->freemap[smallest]; - if (INT_GET(map->size, ARCH_CONVERT) < entsize) { - INT_COPY(map->base, entry->nameidx, ARCH_CONVERT); - INT_SET(map->size, ARCH_CONVERT, entsize); - } - } - - /* - * Did we remove the first entry? - */ - if (INT_GET(entry->nameidx, ARCH_CONVERT) == INT_GET(hdr->firstused, ARCH_CONVERT)) - smallest = 1; - else - smallest = 0; - - /* - * Compress the remaining entries and zero out the removed stuff. - */ - namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); - memset((char *)namest, 0, entsize); - xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, namest, entsize)); - - INT_MOD(hdr->namebytes, ARCH_CONVERT, -(entry->namelen)); - tmp = (INT_GET(hdr->count, ARCH_CONVERT) - index) * (uint)sizeof(xfs_dir_leaf_entry_t); - memmove(entry, entry + 1, tmp); - INT_MOD(hdr->count, ARCH_CONVERT, -1); - xfs_da_log_buf(trans, bp, - XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry))); - entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)]; - memset((char *)entry, 0, sizeof(xfs_dir_leaf_entry_t)); - - /* - * If we removed the first entry, re-find the first used byte - * in the name area. Note that if the entry was the "firstused", - * then we don't have a "hole" in our block resulting from - * removing the name. - */ - if (smallest) { - tmp = XFS_LBSIZE(mp); - entry = &leaf->entries[0]; - for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) { - ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT)); - ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp)); - if (INT_GET(entry->nameidx, ARCH_CONVERT) < tmp) - tmp = INT_GET(entry->nameidx, ARCH_CONVERT); - } - INT_SET(hdr->firstused, ARCH_CONVERT, tmp); - if (!hdr->firstused) - INT_SET(hdr->firstused, ARCH_CONVERT, tmp - 1); - } else { - hdr->holes = 1; /* mark as needing compaction */ - } - - xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); - - /* - * Check if leaf is less than 50% full, caller may want to - * "join" the leaf with a sibling if so. - */ - tmp = (uint)sizeof(xfs_dir_leaf_hdr_t); - tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t); - tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * ((uint)sizeof(xfs_dir_leaf_name_t) - 1); - tmp += INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); - if (tmp < mp->m_dir_magicpct) - return 1; /* leaf is < 37% full */ - return 0; -} - -/* - * Move all the directory entries from drop_leaf into save_leaf. - */ -void -xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, - xfs_da_state_blk_t *save_blk) -{ - xfs_dir_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf; - xfs_dir_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr; - xfs_mount_t *mp; - char *tmpbuffer; - - /* - * Set up environment. - */ - mp = state->mp; - ASSERT(drop_blk->magic == XFS_DIR_LEAF_MAGIC); - ASSERT(save_blk->magic == XFS_DIR_LEAF_MAGIC); - drop_leaf = drop_blk->bp->data; - save_leaf = save_blk->bp->data; - ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - drop_hdr = &drop_leaf->hdr; - save_hdr = &save_leaf->hdr; - - /* - * Save last hashval from dying block for later Btree fixup. - */ - drop_blk->hashval = INT_GET(drop_leaf->entries[ drop_leaf->hdr.count-1 ].hashval, ARCH_CONVERT); - - /* - * Check if we need a temp buffer, or can we do it in place. - * Note that we don't check "leaf" for holes because we will - * always be dropping it, toosmall() decided that for us already. - */ - if (save_hdr->holes == 0) { - /* - * dest leaf has no holes, so we add there. May need - * to make some room in the entry array. - */ - if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) { - xfs_dir_leaf_moveents(drop_leaf, 0, save_leaf, 0, - (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp); - } else { - xfs_dir_leaf_moveents(drop_leaf, 0, - save_leaf, INT_GET(save_hdr->count, ARCH_CONVERT), - (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp); - } - } else { - /* - * Destination has holes, so we make a temporary copy - * of the leaf and add them both to that. - */ - tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP); - ASSERT(tmpbuffer != NULL); - memset(tmpbuffer, 0, state->blocksize); - tmp_leaf = (xfs_dir_leafblock_t *)tmpbuffer; - tmp_hdr = &tmp_leaf->hdr; - tmp_hdr->info = save_hdr->info; /* struct copy */ - tmp_hdr->count = 0; - INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize); - if (!tmp_hdr->firstused) - INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize - 1); - tmp_hdr->namebytes = 0; - if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) { - xfs_dir_leaf_moveents(drop_leaf, 0, tmp_leaf, 0, - (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp); - xfs_dir_leaf_moveents(save_leaf, 0, - tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT), - (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp); - } else { - xfs_dir_leaf_moveents(save_leaf, 0, tmp_leaf, 0, - (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp); - xfs_dir_leaf_moveents(drop_leaf, 0, - tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT), - (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp); - } - memcpy(save_leaf, tmp_leaf, state->blocksize); - kmem_free(tmpbuffer, state->blocksize); - } - - xfs_da_log_buf(state->args->trans, save_blk->bp, 0, - state->blocksize - 1); - - /* - * Copy out last hashval in each block for B-tree code. - */ - save_blk->hashval = INT_GET(save_leaf->entries[ INT_GET(save_leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); -} - -/*======================================================================== - * Routines used for finding things in the Btree. - *========================================================================*/ - -/* - * Look up a name in a leaf directory structure. - * This is the internal routine, it uses the caller's buffer. - * - * Note that duplicate keys are allowed, but only check within the - * current leaf node. The Btree code must check in adjacent leaf nodes. - * - * Return in *index the index into the entry[] array of either the found - * entry, or where the entry should have been (insert before that entry). - * - * Don't change the args->inumber unless we find the filename. - */ -int -xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index) -{ - xfs_dir_leafblock_t *leaf; - xfs_dir_leaf_entry_t *entry; - xfs_dir_leaf_name_t *namest; - int probe, span; - xfs_dahash_t hashval; - - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) < (XFS_LBSIZE(args->dp->i_mount)/8)); - - /* - * Binary search. (note: small blocks will skip this loop) - */ - hashval = args->hashval; - probe = span = INT_GET(leaf->hdr.count, ARCH_CONVERT) / 2; - for (entry = &leaf->entries[probe]; span > 4; - entry = &leaf->entries[probe]) { - span /= 2; - if (INT_GET(entry->hashval, ARCH_CONVERT) < hashval) - probe += span; - else if (INT_GET(entry->hashval, ARCH_CONVERT) > hashval) - probe -= span; - else - break; - } - ASSERT((probe >= 0) && \ - ((!leaf->hdr.count) || (probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)))); - ASSERT((span <= 4) || (INT_GET(entry->hashval, ARCH_CONVERT) == hashval)); - - /* - * Since we may have duplicate hashval's, find the first matching - * hashval in the leaf. - */ - while ((probe > 0) && (INT_GET(entry->hashval, ARCH_CONVERT) >= hashval)) { - entry--; - probe--; - } - while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)) { - entry++; - probe++; - } - if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT)) || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) { - *index = probe; - ASSERT(args->oknoent); - return XFS_ERROR(ENOENT); - } - - /* - * Duplicate keys may be present, so search all of them for a match. - */ - while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) == hashval)) { - namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); - if (entry->namelen == args->namelen && - namest->name[0] == args->name[0] && - memcmp(args->name, namest->name, args->namelen) == 0) { - XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args->inumber); - *index = probe; - return XFS_ERROR(EEXIST); - } - entry++; - probe++; - } - *index = probe; - ASSERT(probe == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent); - return XFS_ERROR(ENOENT); -} - -/*======================================================================== - * Utility routines. - *========================================================================*/ - -/* - * Move the indicated entries from one leaf to another. - * NOTE: this routine modifies both source and destination leaves. - */ -/* ARGSUSED */ -STATIC void -xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s, - xfs_dir_leafblock_t *leaf_d, int start_d, - int count, xfs_mount_t *mp) -{ - xfs_dir_leaf_hdr_t *hdr_s, *hdr_d; - xfs_dir_leaf_entry_t *entry_s, *entry_d; - int tmp, i; - - /* - * Check for nothing to do. - */ - if (count == 0) - return; - - /* - * Set up environment. - */ - ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - hdr_s = &leaf_s->hdr; - hdr_d = &leaf_d->hdr; - ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0) && (INT_GET(hdr_s->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8))); - ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >= - ((INT_GET(hdr_s->count, ARCH_CONVERT)*sizeof(*entry_s))+sizeof(*hdr_s))); - ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)); - ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= - ((INT_GET(hdr_d->count, ARCH_CONVERT)*sizeof(*entry_d))+sizeof(*hdr_d))); - - ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT)); - ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT)); - ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT)); - - /* - * Move the entries in the destination leaf up to make a hole? - */ - if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) { - tmp = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d; - tmp *= (uint)sizeof(xfs_dir_leaf_entry_t); - entry_s = &leaf_d->entries[start_d]; - entry_d = &leaf_d->entries[start_d + count]; - memcpy(entry_d, entry_s, tmp); - } - - /* - * Copy all entry's in the same (sorted) order, - * but allocate filenames packed and in sequence. - */ - entry_s = &leaf_s->entries[start_s]; - entry_d = &leaf_d->entries[start_d]; - for (i = 0; i < count; entry_s++, entry_d++, i++) { - ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) >= INT_GET(hdr_s->firstused, ARCH_CONVERT)); - tmp = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry_s); - INT_MOD(hdr_d->firstused, ARCH_CONVERT, -(tmp)); - entry_d->hashval = entry_s->hashval; /* INT_: direct copy */ - INT_COPY(entry_d->nameidx, hdr_d->firstused, ARCH_CONVERT); - entry_d->namelen = entry_s->namelen; - ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp)); - memcpy(XFS_DIR_LEAF_NAMESTRUCT(leaf_d, INT_GET(entry_d->nameidx, ARCH_CONVERT)), - XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)), tmp); - ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp)); - memset((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)), - 0, tmp); - INT_MOD(hdr_s->namebytes, ARCH_CONVERT, -(entry_d->namelen)); - INT_MOD(hdr_d->namebytes, ARCH_CONVERT, entry_d->namelen); - INT_MOD(hdr_s->count, ARCH_CONVERT, -1); - INT_MOD(hdr_d->count, ARCH_CONVERT, +1); - tmp = INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t) - + (uint)sizeof(xfs_dir_leaf_hdr_t); - ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp); - - } - - /* - * Zero out the entries we just copied. - */ - if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) { - tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t); - entry_s = &leaf_s->entries[start_s]; - ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp)); - memset((char *)entry_s, 0, tmp); - } else { - /* - * Move the remaining entries down to fill the hole, - * then zero the entries at the top. - */ - tmp = INT_GET(hdr_s->count, ARCH_CONVERT) - count; - tmp *= (uint)sizeof(xfs_dir_leaf_entry_t); - entry_s = &leaf_s->entries[start_s + count]; - entry_d = &leaf_s->entries[start_s]; - memcpy(entry_d, entry_s, tmp); - - tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t); - entry_s = &leaf_s->entries[INT_GET(hdr_s->count, ARCH_CONVERT)]; - ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp)); - memset((char *)entry_s, 0, tmp); - } - - /* - * Fill in the freemap information - */ - INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_hdr_t)); - INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT, INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)); - INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT)); - INT_SET(hdr_d->freemap[1].base, ARCH_CONVERT, (hdr_d->freemap[2].base = 0)); - INT_SET(hdr_d->freemap[1].size, ARCH_CONVERT, (hdr_d->freemap[2].size = 0)); - hdr_s->holes = 1; /* leaf may not be compact */ -} - -/* - * Compare two leaf blocks "order". - */ -int -xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp) -{ - xfs_dir_leafblock_t *leaf1, *leaf2; - - leaf1 = leaf1_bp->data; - leaf2 = leaf2_bp->data; - ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC) && - (be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC)); - if ((INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0) && - ((INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) < - INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) || - (INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) < - INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) { - return 1; - } - return 0; -} - -/* - * Pick up the last hashvalue from a leaf block. - */ -xfs_dahash_t -xfs_dir_leaf_lasthash(xfs_dabuf_t *bp, int *count) -{ - xfs_dir_leafblock_t *leaf; - - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC); - if (count) - *count = INT_GET(leaf->hdr.count, ARCH_CONVERT); - if (!leaf->hdr.count) - return(0); - return(INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)); -} - -/* - * Copy out directory entries for getdents(), for leaf directories. - */ -int -xfs_dir_leaf_getdents_int( - xfs_dabuf_t *bp, - xfs_inode_t *dp, - xfs_dablk_t bno, - uio_t *uio, - int *eobp, - xfs_dirent_t *dbp, - xfs_dir_put_t put, - xfs_daddr_t nextda) -{ - xfs_dir_leafblock_t *leaf; - xfs_dir_leaf_entry_t *entry; - xfs_dir_leaf_name_t *namest; - int entno, want_entno, i, nextentno; - xfs_mount_t *mp; - xfs_dahash_t cookhash; - xfs_dahash_t nexthash = 0; -#if (BITS_PER_LONG == 32) - xfs_dahash_t lasthash = XFS_DA_MAXHASH; -#endif - xfs_dir_put_args_t p; - - mp = dp->i_mount; - leaf = bp->data; - if (be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC) { - *eobp = 1; - return XFS_ERROR(ENOENT); /* XXX wrong code */ - } - - want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset); - - cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset); - - xfs_dir_trace_g_dul("leaf: start", dp, uio, leaf); - - /* - * Re-find our place. - */ - for (i = entno = 0, entry = &leaf->entries[0]; - i < INT_GET(leaf->hdr.count, ARCH_CONVERT); - entry++, i++) { - - namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, - INT_GET(entry->nameidx, ARCH_CONVERT)); - - if (unlikely( - ((char *)namest < (char *)leaf) || - ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) { - XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(1)", - XFS_ERRLEVEL_LOW, mp, leaf); - xfs_dir_trace_g_du("leaf: corrupted", dp, uio); - return XFS_ERROR(EFSCORRUPTED); - } - if (INT_GET(entry->hashval, ARCH_CONVERT) >= cookhash) { - if ( entno < want_entno - && INT_GET(entry->hashval, ARCH_CONVERT) - == cookhash) { - /* - * Trying to get to a particular offset in a - * run of equal-hashval entries. - */ - entno++; - } else if ( want_entno > 0 - && entno == want_entno - && INT_GET(entry->hashval, ARCH_CONVERT) - == cookhash) { - break; - } else { - entno = 0; - break; - } - } - } - - if (i == INT_GET(leaf->hdr.count, ARCH_CONVERT)) { - xfs_dir_trace_g_du("leaf: hash not found", dp, uio); - if (!leaf->hdr.info.forw) - uio->uio_offset = - XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH); - /* - * Don't set uio_offset if there's another block: - * the node code will be setting uio_offset anyway. - */ - *eobp = 0; - return 0; - } - xfs_dir_trace_g_due("leaf: hash found", dp, uio, entry); - - p.dbp = dbp; - p.put = put; - p.uio = uio; - - /* - * We're synchronized, start copying entries out to the user. - */ - for (; entno >= 0 && i < INT_GET(leaf->hdr.count, ARCH_CONVERT); - entry++, i++, (entno = nextentno)) { - int lastresid=0, retval; - xfs_dircook_t lastoffset; - xfs_dahash_t thishash; - - /* - * Check for a damaged directory leaf block and pick up - * the inode number from this entry. - */ - namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, - INT_GET(entry->nameidx, ARCH_CONVERT)); - - if (unlikely( - ((char *)namest < (char *)leaf) || - ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) { - XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(2)", - XFS_ERRLEVEL_LOW, mp, leaf); - xfs_dir_trace_g_du("leaf: corrupted", dp, uio); - return XFS_ERROR(EFSCORRUPTED); - } - - xfs_dir_trace_g_duc("leaf: middle cookie ", - dp, uio, p.cook.o); - - if (i < (INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1)) { - nexthash = INT_GET(entry[1].hashval, ARCH_CONVERT); - - if (nexthash == INT_GET(entry->hashval, ARCH_CONVERT)) - nextentno = entno + 1; - else - nextentno = 0; - XFS_PUT_COOKIE(p.cook, mp, bno, nextentno, nexthash); - xfs_dir_trace_g_duc("leaf: middle cookie ", - dp, uio, p.cook.o); - - } else if ((thishash = be32_to_cpu(leaf->hdr.info.forw))) { - xfs_dabuf_t *bp2; - xfs_dir_leafblock_t *leaf2; - - ASSERT(nextda != -1); - - retval = xfs_da_read_buf(dp->i_transp, dp, thishash, - nextda, &bp2, XFS_DATA_FORK); - if (retval) - return retval; - - ASSERT(bp2 != NULL); - - leaf2 = bp2->data; - - if (unlikely( - (be16_to_cpu(leaf2->hdr.info.magic) - != XFS_DIR_LEAF_MAGIC) - || (be32_to_cpu(leaf2->hdr.info.back) - != bno))) { /* GROT */ - XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(3)", - XFS_ERRLEVEL_LOW, mp, - leaf2); - xfs_da_brelse(dp->i_transp, bp2); - - return XFS_ERROR(EFSCORRUPTED); - } - - nexthash = INT_GET(leaf2->entries[0].hashval, - ARCH_CONVERT); - nextentno = -1; - XFS_PUT_COOKIE(p.cook, mp, thishash, 0, nexthash); - xfs_da_brelse(dp->i_transp, bp2); - xfs_dir_trace_g_duc("leaf: next blk cookie", - dp, uio, p.cook.o); - } else { - nextentno = -1; - XFS_PUT_COOKIE(p.cook, mp, 0, 0, XFS_DA_MAXHASH); - } - - /* - * Save off the cookie so we can fall back should the - * 'put' into the outgoing buffer fails. To handle a run - * of equal-hashvals, the off_t structure on 64bit - * builds has entno built into the cookie to ID the - * entry. On 32bit builds, we only have space for the - * hashval so we can't ID specific entries within a group - * of same hashval entries. For this, lastoffset is set - * to the first in the run of equal hashvals so we don't - * include any entries unless we can include all entries - * that share the same hashval. Hopefully the buffer - * provided is big enough to handle it (see pv763517). - */ -#if (BITS_PER_LONG == 32) - if ((thishash = INT_GET(entry->hashval, ARCH_CONVERT)) - != lasthash) { - XFS_PUT_COOKIE(lastoffset, mp, bno, entno, thishash); - lastresid = uio->uio_resid; - lasthash = thishash; - } else { - xfs_dir_trace_g_duc("leaf: DUP COOKIES, skipped", - dp, uio, p.cook.o); - } -#else - thishash = INT_GET(entry->hashval, ARCH_CONVERT); - XFS_PUT_COOKIE(lastoffset, mp, bno, entno, thishash); - lastresid = uio->uio_resid; -#endif /* BITS_PER_LONG == 32 */ - - /* - * Put the current entry into the outgoing buffer. If we fail - * then restore the UIO to the first entry in the current - * run of equal-hashval entries (probably one 1 entry long). - */ - p.ino = XFS_GET_DIR_INO8(namest->inumber); -#if XFS_BIG_INUMS - p.ino += mp->m_inoadd; -#endif - p.name = (char *)namest->name; - p.namelen = entry->namelen; - - retval = p.put(&p); - - if (!p.done) { - uio->uio_offset = lastoffset.o; - uio->uio_resid = lastresid; - - *eobp = 1; - - xfs_dir_trace_g_du("leaf: E-O-B", dp, uio); - - return retval; - } - } - - uio->uio_offset = p.cook.o; - - *eobp = 0; - - xfs_dir_trace_g_du("leaf: E-O-F", dp, uio); - - return 0; -} - -/* - * Format a dirent64 structure and copy it out the the user's buffer. - */ -int -xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa) -{ - iovec_t *iovp; - int reclen, namelen; - xfs_dirent_t *idbp; - uio_t *uio; - - namelen = pa->namelen; - reclen = DIRENTSIZE(namelen); - uio = pa->uio; - if (reclen > uio->uio_resid) { - pa->done = 0; - return 0; - } - iovp = uio->uio_iov; - idbp = (xfs_dirent_t *)iovp->iov_base; - iovp->iov_base = (char *)idbp + reclen; - iovp->iov_len -= reclen; - uio->uio_resid -= reclen; - idbp->d_reclen = reclen; - idbp->d_ino = pa->ino; - idbp->d_off = pa->cook.o; - idbp->d_name[namelen] = '\0'; - pa->done = 1; - memcpy(idbp->d_name, pa->name, namelen); - return 0; -} - -/* - * Format a dirent64 structure and copy it out the the user's buffer. - */ -int -xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa) -{ - int retval, reclen, namelen; - xfs_dirent_t *idbp; - uio_t *uio; - - namelen = pa->namelen; - reclen = DIRENTSIZE(namelen); - uio = pa->uio; - if (reclen > uio->uio_resid) { - pa->done = 0; - return 0; - } - idbp = pa->dbp; - idbp->d_reclen = reclen; - idbp->d_ino = pa->ino; - idbp->d_off = pa->cook.o; - idbp->d_name[namelen] = '\0'; - memcpy(idbp->d_name, pa->name, namelen); - retval = uio_read((caddr_t)idbp, reclen, uio); - pa->done = (retval == 0); - return retval; -} diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h deleted file mode 100644 index eb8cd9a4667f..000000000000 --- a/fs/xfs/xfs_dir_leaf.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR_LEAF_H__ -#define __XFS_DIR_LEAF_H__ - -/* - * Directory layout, internal structure, access macros, etc. - * - * Large directories are structured around Btrees where all the data - * elements are in the leaf nodes. Filenames are hashed into an int, - * then that int is used as the index into the Btree. Since the hashval - * of a filename may not be unique, we may have duplicate keys. The - * internal links in the Btree are logical block offsets into the file. - */ - -struct uio; -struct xfs_bmap_free; -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_da_state; -struct xfs_da_state_blk; -struct xfs_dir_put_args; -struct xfs_inode; -struct xfs_mount; -struct xfs_trans; - -/*======================================================================== - * Directory Structure when equal to XFS_LBSIZE(mp) bytes. - *========================================================================*/ - -/* - * This is the structure of the leaf nodes in the Btree. - * - * Struct leaf_entry's are packed from the top. Names grow from the bottom - * but are not packed. The freemap contains run-length-encoded entries - * for the free bytes after the leaf_entry's, but only the N largest such, - * smaller runs are dropped. When the freemap doesn't show enough space - * for an allocation, we compact the namelist area and try again. If we - * still don't have enough space, then we have to split the block. - * - * Since we have duplicate hash keys, for each key that matches, compare - * the actual string. The root and intermediate node search always takes - * the first-in-the-block key match found, so we should only have to work - * "forw"ard. If none matches, continue with the "forw"ard leaf nodes - * until the hash key changes or the filename is found. - * - * The parent directory and the self-pointer are explicitly represented - * (ie: there are entries for "." and ".."). - * - * Note that the count being a __uint16_t limits us to something like a - * blocksize of 1.3MB in the face of worst case (short) filenames. - */ -#define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */ - -typedef struct xfs_dir_leaf_map { /* RLE map of free bytes */ - __uint16_t base; /* base of free region */ - __uint16_t size; /* run length of free region */ -} xfs_dir_leaf_map_t; - -typedef struct xfs_dir_leaf_hdr { /* constant-structure header block */ - xfs_da_blkinfo_t info; /* block type, links, etc. */ - __uint16_t count; /* count of active leaf_entry's */ - __uint16_t namebytes; /* num bytes of name strings stored */ - __uint16_t firstused; /* first used byte in name area */ - __uint8_t holes; /* != 0 if blk needs compaction */ - __uint8_t pad1; - xfs_dir_leaf_map_t freemap[XFS_DIR_LEAF_MAPSIZE]; -} xfs_dir_leaf_hdr_t; - -typedef struct xfs_dir_leaf_entry { /* sorted on key, not name */ - xfs_dahash_t hashval; /* hash value of name */ - __uint16_t nameidx; /* index into buffer of name */ - __uint8_t namelen; /* length of name string */ - __uint8_t pad2; -} xfs_dir_leaf_entry_t; - -typedef struct xfs_dir_leaf_name { - xfs_dir_ino_t inumber; /* inode number for this key */ - __uint8_t name[1]; /* name string itself */ -} xfs_dir_leaf_name_t; - -typedef struct xfs_dir_leafblock { - xfs_dir_leaf_hdr_t hdr; /* constant-structure header block */ - xfs_dir_leaf_entry_t entries[1]; /* var sized array */ - xfs_dir_leaf_name_t namelist[1]; /* grows from bottom of buf */ -} xfs_dir_leafblock_t; - -/* - * Length of name for which a 512-byte block filesystem - * can get a double split. - */ -#define XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN \ - (512 - (uint)sizeof(xfs_dir_leaf_hdr_t) - \ - (uint)sizeof(xfs_dir_leaf_entry_t) * 2 - \ - (uint)sizeof(xfs_dir_leaf_name_t) * 2 - (MAXNAMELEN - 2) + 1 + 1) - -typedef int (*xfs_dir_put_t)(struct xfs_dir_put_args *pa); - -typedef union { - xfs_off_t o; /* offset (cookie) */ - /* - * Watch the order here (endian-ness dependent). - */ - struct { -#ifndef XFS_NATIVE_HOST - xfs_dahash_t h; /* hash value */ - __uint32_t be; /* block and entry */ -#else - __uint32_t be; /* block and entry */ - xfs_dahash_t h; /* hash value */ -#endif /* XFS_NATIVE_HOST */ - } s; -} xfs_dircook_t; - -#define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \ - ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash)) - -typedef struct xfs_dir_put_args { - xfs_dircook_t cook; /* cookie of (next) entry */ - xfs_intino_t ino; /* inode number */ - struct xfs_dirent *dbp; /* buffer pointer */ - char *name; /* directory entry name */ - int namelen; /* length of name */ - int done; /* output: set if value was stored */ - xfs_dir_put_t put; /* put function ptr (i/o) */ - struct uio *uio; /* uio control structure */ -} xfs_dir_put_args_t; - -#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) \ - xfs_dir_leaf_entsize_byname(len) -static inline int xfs_dir_leaf_entsize_byname(int len) -{ - return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len; -} - -#define XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry) \ - xfs_dir_leaf_entsize_byentry(entry) -static inline int xfs_dir_leaf_entsize_byentry(xfs_dir_leaf_entry_t *entry) -{ - return (uint)sizeof(xfs_dir_leaf_name_t)-1 + (entry)->namelen; -} - -#define XFS_DIR_LEAF_NAMESTRUCT(leafp,offset) \ - xfs_dir_leaf_namestruct(leafp,offset) -static inline xfs_dir_leaf_name_t * -xfs_dir_leaf_namestruct(xfs_dir_leafblock_t *leafp, int offset) -{ - return (xfs_dir_leaf_name_t *)&((char *)(leafp))[offset]; -} - -/*======================================================================== - * Function prototypes for the kernel. - *========================================================================*/ - -/* - * Internal routines when dirsize < XFS_LITINO(mp). - */ -int xfs_dir_shortform_create(struct xfs_da_args *args, xfs_ino_t parent); -int xfs_dir_shortform_addname(struct xfs_da_args *args); -int xfs_dir_shortform_lookup(struct xfs_da_args *args); -int xfs_dir_shortform_to_leaf(struct xfs_da_args *args); -int xfs_dir_shortform_removename(struct xfs_da_args *args); -int xfs_dir_shortform_getdents(struct xfs_inode *dp, struct uio *uio, int *eofp, - struct xfs_dirent *dbp, xfs_dir_put_t put); -int xfs_dir_shortform_replace(struct xfs_da_args *args); - -/* - * Internal routines when dirsize == XFS_LBSIZE(mp). - */ -int xfs_dir_leaf_to_node(struct xfs_da_args *args); -int xfs_dir_leaf_to_shortform(struct xfs_da_args *args); - -/* - * Routines used for growing the Btree. - */ -int xfs_dir_leaf_split(struct xfs_da_state *state, - struct xfs_da_state_blk *oldblk, - struct xfs_da_state_blk *newblk); -int xfs_dir_leaf_add(struct xfs_dabuf *leaf_buffer, - struct xfs_da_args *args, int insertion_index); -int xfs_dir_leaf_addname(struct xfs_da_args *args); -int xfs_dir_leaf_lookup_int(struct xfs_dabuf *leaf_buffer, - struct xfs_da_args *args, - int *index_found_at); -int xfs_dir_leaf_remove(struct xfs_trans *trans, - struct xfs_dabuf *leaf_buffer, - int index_to_remove); -int xfs_dir_leaf_getdents_int(struct xfs_dabuf *bp, struct xfs_inode *dp, - xfs_dablk_t bno, struct uio *uio, - int *eobp, struct xfs_dirent *dbp, - xfs_dir_put_t put, xfs_daddr_t nextda); - -/* - * Routines used for shrinking the Btree. - */ -int xfs_dir_leaf_toosmall(struct xfs_da_state *state, int *retval); -void xfs_dir_leaf_unbalance(struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk, - struct xfs_da_state_blk *save_blk); - -/* - * Utility routines. - */ -uint xfs_dir_leaf_lasthash(struct xfs_dabuf *bp, int *count); -int xfs_dir_leaf_order(struct xfs_dabuf *leaf1_bp, - struct xfs_dabuf *leaf2_bp); -int xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa); -int xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa); -int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); - -/* - * Global data. - */ -extern xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot; - -#endif /* __XFS_DIR_LEAF_H__ */ diff --git a/fs/xfs/xfs_dir_sf.h b/fs/xfs/xfs_dir_sf.h deleted file mode 100644 index 5b20b4d3f57d..000000000000 --- a/fs/xfs/xfs_dir_sf.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR_SF_H__ -#define __XFS_DIR_SF_H__ - -/* - * Directory layout when stored internal to an inode. - * - * Small directories are packed as tightly as possible so as to - * fit into the literal area of the inode. - */ - -typedef struct { __uint8_t i[sizeof(xfs_ino_t)]; } xfs_dir_ino_t; - -/* - * The parent directory has a dedicated field, and the self-pointer must - * be calculated on the fly. - * - * Entries are packed toward the top as tight as possible. The header - * and the elements much be memcpy'd out into a work area to get correct - * alignment for the inode number fields. - */ -typedef struct xfs_dir_sf_hdr { /* constant-structure header block */ - xfs_dir_ino_t parent; /* parent dir inode number */ - __uint8_t count; /* count of active entries */ -} xfs_dir_sf_hdr_t; - -typedef struct xfs_dir_sf_entry { - xfs_dir_ino_t inumber; /* referenced inode number */ - __uint8_t namelen; /* actual length of name (no NULL) */ - __uint8_t name[1]; /* name */ -} xfs_dir_sf_entry_t; - -typedef struct xfs_dir_shortform { - xfs_dir_sf_hdr_t hdr; - xfs_dir_sf_entry_t list[1]; /* variable sized array */ -} xfs_dir_shortform_t; - -/* - * We generate this then sort it, so that readdirs are returned in - * hash-order. Else seekdir won't work. - */ -typedef struct xfs_dir_sf_sort { - __uint8_t entno; /* .=0, ..=1, else entry# + 2 */ - __uint8_t seqno; /* sequence # with same hash value */ - __uint8_t namelen; /* length of name value (no null) */ - xfs_dahash_t hash; /* this entry's hash value */ - xfs_intino_t ino; /* this entry's inode number */ - char *name; /* name value, pointer into buffer */ -} xfs_dir_sf_sort_t; - -#define XFS_DIR_SF_GET_DIRINO(from,to) xfs_dir_sf_get_dirino(from, to) -static inline void xfs_dir_sf_get_dirino(xfs_dir_ino_t *from, xfs_ino_t *to) -{ - *(to) = XFS_GET_DIR_INO8(*from); -} - -#define XFS_DIR_SF_PUT_DIRINO(from,to) xfs_dir_sf_put_dirino(from, to) -static inline void xfs_dir_sf_put_dirino(xfs_ino_t *from, xfs_dir_ino_t *to) -{ - XFS_PUT_DIR_INO8(*(from), *(to)); -} - -#define XFS_DIR_SF_ENTSIZE_BYNAME(len) xfs_dir_sf_entsize_byname(len) -static inline int xfs_dir_sf_entsize_byname(int len) -{ - return (uint)sizeof(xfs_dir_sf_entry_t)-1 + (len); -} - -#define XFS_DIR_SF_ENTSIZE_BYENTRY(sfep) xfs_dir_sf_entsize_byentry(sfep) -static inline int xfs_dir_sf_entsize_byentry(xfs_dir_sf_entry_t *sfep) -{ - return (uint)sizeof(xfs_dir_sf_entry_t)-1 + (sfep)->namelen; -} - -#define XFS_DIR_SF_NEXTENTRY(sfep) xfs_dir_sf_nextentry(sfep) -static inline xfs_dir_sf_entry_t *xfs_dir_sf_nextentry(xfs_dir_sf_entry_t *sfep) -{ - return (xfs_dir_sf_entry_t *) \ - ((char *)(sfep) + XFS_DIR_SF_ENTSIZE_BYENTRY(sfep)); -} - -#define XFS_DIR_SF_ALLFIT(count,totallen) \ - xfs_dir_sf_allfit(count,totallen) -static inline int xfs_dir_sf_allfit(int count, int totallen) -{ - return ((uint)sizeof(xfs_dir_sf_hdr_t) + \ - ((uint)sizeof(xfs_dir_sf_entry_t)-1)*(count) + (totallen)); -} - -#if defined(XFS_DIR_TRACE) - -/* - * Kernel tracing support for directories. - */ -struct uio; -struct xfs_inode; -struct xfs_da_intnode; -struct xfs_dinode; -struct xfs_dir_leafblock; -struct xfs_dir_leaf_entry; - -#define XFS_DIR_TRACE_SIZE 4096 /* size of global trace buffer */ -extern ktrace_t *xfs_dir_trace_buf; - -/* - * Trace record types. - */ -#define XFS_DIR_KTRACE_G_DU 1 /* dp, uio */ -#define XFS_DIR_KTRACE_G_DUB 2 /* dp, uio, bno */ -#define XFS_DIR_KTRACE_G_DUN 3 /* dp, uio, node */ -#define XFS_DIR_KTRACE_G_DUL 4 /* dp, uio, leaf */ -#define XFS_DIR_KTRACE_G_DUE 5 /* dp, uio, leaf entry */ -#define XFS_DIR_KTRACE_G_DUC 6 /* dp, uio, cookie */ - -void xfs_dir_trace_g_du(char *where, struct xfs_inode *dp, struct uio *uio); -void xfs_dir_trace_g_dub(char *where, struct xfs_inode *dp, struct uio *uio, - xfs_dablk_t bno); -void xfs_dir_trace_g_dun(char *where, struct xfs_inode *dp, struct uio *uio, - struct xfs_da_intnode *node); -void xfs_dir_trace_g_dul(char *where, struct xfs_inode *dp, struct uio *uio, - struct xfs_dir_leafblock *leaf); -void xfs_dir_trace_g_due(char *where, struct xfs_inode *dp, struct uio *uio, - struct xfs_dir_leaf_entry *entry); -void xfs_dir_trace_g_duc(char *where, struct xfs_inode *dp, struct uio *uio, - xfs_off_t cookie); -void xfs_dir_trace_enter(int type, char *where, - void *a0, void *a1, void *a2, void *a3, - void *a4, void *a5, void *a6, void *a7, - void *a8, void *a9, void *a10, void *a11); -#else -#define xfs_dir_trace_g_du(w,d,u) -#define xfs_dir_trace_g_dub(w,d,u,b) -#define xfs_dir_trace_g_dun(w,d,u,n) -#define xfs_dir_trace_g_dul(w,d,u,l) -#define xfs_dir_trace_g_due(w,d,u,e) -#define xfs_dir_trace_g_duc(w,d,u,c) -#endif /* DEBUG */ - -#endif /* __XFS_DIR_SF_H__ */ diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index 00b1540f8108..4e7865ad6f0e 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -189,6 +189,6 @@ typedef enum { #define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0) -extern struct bhv_vfsops xfs_dmops; +extern struct bhv_module_vfsops xfs_dmops; #endif /* __XFS_DMAPI_H__ */ diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c index 629795b3b3d5..1e4a35ddf7f9 100644 --- a/fs/xfs/xfs_dmops.c +++ b/fs/xfs/xfs_dmops.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 2a21c5024017..b95681b03d81 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -22,12 +22,10 @@ #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index f19282ec8549..6cf6d8769b97 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_sb.h" -#include "xfs_dir.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_trans_priv.h" @@ -294,6 +293,62 @@ xfs_efi_init(xfs_mount_t *mp, } /* + * Copy an EFI format buffer from the given buf, and into the destination + * EFI format structure. + * The given buffer can be in 32 bit or 64 bit form (which has different padding), + * one of which will be the native format for this kernel. + * It will handle the conversion of formats if necessary. + */ +int +xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) +{ + xfs_efi_log_format_t *src_efi_fmt = (xfs_efi_log_format_t *)buf->i_addr; + uint i; + uint len = sizeof(xfs_efi_log_format_t) + + (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t); + uint len32 = sizeof(xfs_efi_log_format_32_t) + + (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_32_t); + uint len64 = sizeof(xfs_efi_log_format_64_t) + + (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_64_t); + + if (buf->i_len == len) { + memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len); + return 0; + } else if (buf->i_len == len32) { + xfs_efi_log_format_32_t *src_efi_fmt_32 = + (xfs_efi_log_format_32_t *)buf->i_addr; + + dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type; + dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size; + dst_efi_fmt->efi_nextents = src_efi_fmt_32->efi_nextents; + dst_efi_fmt->efi_id = src_efi_fmt_32->efi_id; + for (i = 0; i < dst_efi_fmt->efi_nextents; i++) { + dst_efi_fmt->efi_extents[i].ext_start = + src_efi_fmt_32->efi_extents[i].ext_start; + dst_efi_fmt->efi_extents[i].ext_len = + src_efi_fmt_32->efi_extents[i].ext_len; + } + return 0; + } else if (buf->i_len == len64) { + xfs_efi_log_format_64_t *src_efi_fmt_64 = + (xfs_efi_log_format_64_t *)buf->i_addr; + + dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type; + dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size; + dst_efi_fmt->efi_nextents = src_efi_fmt_64->efi_nextents; + dst_efi_fmt->efi_id = src_efi_fmt_64->efi_id; + for (i = 0; i < dst_efi_fmt->efi_nextents; i++) { + dst_efi_fmt->efi_extents[i].ext_start = + src_efi_fmt_64->efi_extents[i].ext_start; + dst_efi_fmt->efi_extents[i].ext_len = + src_efi_fmt_64->efi_extents[i].ext_len; + } + return 0; + } + return EFSCORRUPTED; +} + +/* * This is called by the efd item code below to release references to * the given efi item. Each efd calls this with the number of * extents that it has logged, and when the sum of these reaches diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 5bf681708fec..0ea45edaab03 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -27,6 +27,24 @@ typedef struct xfs_extent { } xfs_extent_t; /* + * Since an xfs_extent_t has types (start:64, len: 32) + * there are different alignments on 32 bit and 64 bit kernels. + * So we provide the different variants for use by a + * conversion routine. + */ + +typedef struct xfs_extent_32 { + xfs_dfsbno_t ext_start; + xfs_extlen_t ext_len; +} __attribute__((packed)) xfs_extent_32_t; + +typedef struct xfs_extent_64 { + xfs_dfsbno_t ext_start; + xfs_extlen_t ext_len; + __uint32_t ext_pad; +} xfs_extent_64_t; + +/* * This is the structure used to lay out an efi log item in the * log. The efi_extents field is a variable size array whose * size is given by efi_nextents. @@ -39,6 +57,22 @@ typedef struct xfs_efi_log_format { xfs_extent_t efi_extents[1]; /* array of extents to free */ } xfs_efi_log_format_t; +typedef struct xfs_efi_log_format_32 { + unsigned short efi_type; /* efi log item type */ + unsigned short efi_size; /* size of this item */ + uint efi_nextents; /* # extents to free */ + __uint64_t efi_id; /* efi identifier */ + xfs_extent_32_t efi_extents[1]; /* array of extents to free */ +} __attribute__((packed)) xfs_efi_log_format_32_t; + +typedef struct xfs_efi_log_format_64 { + unsigned short efi_type; /* efi log item type */ + unsigned short efi_size; /* size of this item */ + uint efi_nextents; /* # extents to free */ + __uint64_t efi_id; /* efi identifier */ + xfs_extent_64_t efi_extents[1]; /* array of extents to free */ +} xfs_efi_log_format_64_t; + /* * This is the structure used to lay out an efd log item in the * log. The efd_extents array is a variable size array whose @@ -52,6 +86,22 @@ typedef struct xfs_efd_log_format { xfs_extent_t efd_extents[1]; /* array of extents freed */ } xfs_efd_log_format_t; +typedef struct xfs_efd_log_format_32 { + unsigned short efd_type; /* efd log item type */ + unsigned short efd_size; /* size of this item */ + uint efd_nextents; /* # of extents freed */ + __uint64_t efd_efi_id; /* id of corresponding efi */ + xfs_extent_32_t efd_extents[1]; /* array of extents freed */ +} __attribute__((packed)) xfs_efd_log_format_32_t; + +typedef struct xfs_efd_log_format_64 { + unsigned short efd_type; /* efd log item type */ + unsigned short efd_size; /* size of this item */ + uint efd_nextents; /* # of extents freed */ + __uint64_t efd_efi_id; /* id of corresponding efi */ + xfs_extent_64_t efd_extents[1]; /* array of extents freed */ +} xfs_efd_log_format_64_t; + #ifdef __KERNEL__ @@ -103,7 +153,8 @@ extern struct kmem_zone *xfs_efd_zone; xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint); xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *, uint); - +int xfs_efi_copy_format(xfs_log_iovec_t *buf, + xfs_efi_log_format_t *dst_efi_fmt); void xfs_efi_item_free(xfs_efi_log_item_t *); #endif /* __KERNEL__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 14010f1fa82f..0f0ad1535951 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -67,14 +67,15 @@ struct fsxattr { #define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ #define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ #define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ +#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* * Structure for XFS_IOC_GETBMAP. * On input, fill in bmv_offset and bmv_length of the first structure - * to indicate the area of interest in the file, and bmv_entry with the - * number of array elements given. The first structure is updated on - * return to give the offset and length for the next call. + * to indicate the area of interest in the file, and bmv_entries with + * the number of array elements given back. The first structure is + * updated on return to give the offset and length for the next call. */ #ifndef HAVE_GETBMAP struct getbmap { diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index dfa3527b20a7..077629bab532 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -542,14 +540,13 @@ xfs_reserve_blocks( } void -xfs_fs_log_dummy(xfs_mount_t *mp) +xfs_fs_log_dummy( + xfs_mount_t *mp) { - xfs_trans_t *tp; - xfs_inode_t *ip; - + xfs_trans_t *tp; + xfs_inode_t *ip; tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); - atomic_inc(&mp->m_active_trans); if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) { xfs_trans_cancel(tp, 0); return; @@ -574,21 +571,22 @@ xfs_fs_goingdown( { switch (inflags) { case XFS_FSOP_GOING_FLAGS_DEFAULT: { - struct vfs *vfsp = XFS_MTOVFS(mp); + struct bhv_vfs *vfsp = XFS_MTOVFS(mp); struct super_block *sb = freeze_bdev(vfsp->vfs_super->s_bdev); if (sb && !IS_ERR(sb)) { - xfs_force_shutdown(mp, XFS_FORCE_UMOUNT); + xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); thaw_bdev(sb->s_bdev, sb); } break; } case XFS_FSOP_GOING_FLAGS_LOGFLUSH: - xfs_force_shutdown(mp, XFS_FORCE_UMOUNT); + xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); break; case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH: - xfs_force_shutdown(mp, XFS_FORCE_UMOUNT|XFS_LOG_IO_ERROR); + xfs_force_shutdown(mp, + SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR); break; default: return XFS_ERROR(EINVAL); diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index deddbd03c166..33164a85aa9d 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -1174,6 +1172,9 @@ xfs_dilocate( if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || ino != XFS_AGINO_TO_INO(mp, agno, agino)) { #ifdef DEBUG + /* no diagnostics for bulkstat, ino comes from userspace */ + if (flags & XFS_IMAP_BULKSTAT) + return XFS_ERROR(EINVAL); if (agno >= mp->m_sb.sb_agcount) { xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: agno (%d) >= " diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 60c65683462d..616eeeb6953e 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b53854325266..0724df7fabb7 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -186,7 +184,7 @@ xfs_ihash_promote( */ STATIC int xfs_iget_core( - vnode_t *vp, + bhv_vnode_t *vp, xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, @@ -198,7 +196,7 @@ xfs_iget_core( xfs_ihash_t *ih; xfs_inode_t *ip; xfs_inode_t *iq; - vnode_t *inode_vp; + bhv_vnode_t *inode_vp; ulong version; int error; /* REFERENCED */ @@ -468,7 +466,7 @@ finish_inode: * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. */ - VFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1); + bhv_vfs_init_vnode(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1); return 0; } @@ -489,7 +487,7 @@ xfs_iget( xfs_daddr_t bno) { struct inode *inode; - vnode_t *vp = NULL; + bhv_vnode_t *vp = NULL; int error; XFS_STATS_INC(xs_ig_attempts); @@ -543,7 +541,7 @@ retry: void xfs_inode_lock_init( xfs_inode_t *ip, - vnode_t *vp) + bhv_vnode_t *vp) { mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, "xfsino", (long)vp->v_number); @@ -603,12 +601,10 @@ void xfs_iput(xfs_inode_t *ip, uint lock_flags) { - vnode_t *vp = XFS_ITOV(ip); + bhv_vnode_t *vp = XFS_ITOV(ip); vn_trace_entry(vp, "xfs_iput", (inst_t *)__return_address); - xfs_iunlock(ip, lock_flags); - VN_RELE(vp); } @@ -619,7 +615,7 @@ void xfs_iput_new(xfs_inode_t *ip, uint lock_flags) { - vnode_t *vp = XFS_ITOV(ip); + bhv_vnode_t *vp = XFS_ITOV(ip); struct inode *inode = vn_to_inode(vp); vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address); @@ -645,7 +641,7 @@ xfs_iput_new(xfs_inode_t *ip, void xfs_ireclaim(xfs_inode_t *ip) { - vnode_t *vp; + bhv_vnode_t *vp; /* * Remove from old hash list and mount list. @@ -1033,6 +1029,6 @@ xfs_iflock_nowait(xfs_inode_t *ip) void xfs_ifunlock(xfs_inode_t *ip) { - ASSERT(valusema(&(ip->i_flock)) <= 0); + ASSERT(issemalocked(&(ip->i_flock))); vsema(&(ip->i_flock)); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 94b60dd03801..5fa0adb7e173 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -26,14 +26,12 @@ #include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -256,13 +254,11 @@ xfs_itobp( xfs_daddr_t bno, uint imap_flags) { + xfs_imap_t imap; xfs_buf_t *bp; int error; - xfs_imap_t imap; -#ifdef __KERNEL__ int i; int ni; -#endif if (ip->i_blkno == (xfs_daddr_t)0) { /* @@ -319,7 +315,6 @@ xfs_itobp( */ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, (int)imap.im_len, XFS_BUF_LOCK, &bp); - if (error) { #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " @@ -330,17 +325,21 @@ xfs_itobp( #endif /* DEBUG */ return error; } -#ifdef __KERNEL__ + /* * Validate the magic number and version of every inode in the buffer * (if DEBUG kernel) or the first inode in the buffer, otherwise. + * No validation is done here in userspace (xfs_repair). */ -#ifdef DEBUG +#if !defined(__KERNEL__) + ni = 0; +#elif defined(DEBUG) ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : (BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog); -#else +#else /* usual case */ ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1; #endif + for (i = 0; i < ni; i++) { int di_ok; xfs_dinode_t *dip; @@ -352,8 +351,11 @@ xfs_itobp( if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, XFS_RANDOM_ITOBP_INOTOBP))) { #ifdef DEBUG - prdev("bad inode magic/vsn daddr %lld #%d (magic=%x)", - mp->m_ddev_targp, + if (!(imap_flags & XFS_IMAP_BULKSTAT)) + cmn_err(CE_ALERT, + "Device %s - bad inode magic/vsn " + "daddr %lld #%d (magic=%x)", + XFS_BUFTARG_NAME(mp->m_ddev_targp), (unsigned long long)imap.im_blkno, i, INT_GET(dip->di_core.di_magic, ARCH_CONVERT)); #endif @@ -363,7 +365,6 @@ xfs_itobp( return XFS_ERROR(EFSCORRUPTED); } } -#endif /* __KERNEL__ */ xfs_inobp_check(mp, bp); @@ -782,7 +783,6 @@ xfs_xlate_dinode_core( STATIC uint _xfs_dic2xflags( - xfs_dinode_core_t *dic, __uint16_t di_flags) { uint flags = 0; @@ -812,6 +812,8 @@ _xfs_dic2xflags( flags |= XFS_XFLAG_EXTSIZE; if (di_flags & XFS_DIFLAG_EXTSZINHERIT) flags |= XFS_XFLAG_EXTSZINHERIT; + if (di_flags & XFS_DIFLAG_NODEFRAG) + flags |= XFS_XFLAG_NODEFRAG; } return flags; @@ -823,16 +825,16 @@ xfs_ip2xflags( { xfs_dinode_core_t *dic = &ip->i_d; - return _xfs_dic2xflags(dic, dic->di_flags) | - (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); + return _xfs_dic2xflags(dic->di_flags) | + (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); } uint xfs_dic2xflags( xfs_dinode_core_t *dic) { - return _xfs_dic2xflags(dic, INT_GET(dic->di_flags, ARCH_CONVERT)) | - (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); + return _xfs_dic2xflags(INT_GET(dic->di_flags, ARCH_CONVERT)) | + (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); } /* @@ -1083,7 +1085,7 @@ xfs_ialloc( { xfs_ino_t ino; xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; uint flags; int error; @@ -1221,6 +1223,9 @@ xfs_ialloc( di_flags |= XFS_DIFLAG_NOSYMLINKS; if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) di_flags |= XFS_DIFLAG_PROJINHERIT; + if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && + xfs_inherit_nodefrag) + di_flags |= XFS_DIFLAG_NODEFRAG; ip->i_d.di_flags |= di_flags; } /* FALLTHROUGH */ @@ -1244,8 +1249,8 @@ xfs_ialloc( */ xfs_trans_log_inode(tp, ip, flags); - /* now that we have an i_mode we can set Linux inode ops (& unlock) */ - VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); + /* now that we have an i_mode we can setup inode ops and unlock */ + bhv_vfs_init_vnode(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); *ipp = ip; return 0; @@ -1285,7 +1290,7 @@ xfs_isize_check( (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first), XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, - NULL)) + NULL, NULL)) return; ASSERT(nimaps == 1); ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); @@ -1421,7 +1426,7 @@ xfs_itruncate_start( xfs_fsize_t last_byte; xfs_off_t toss_start; xfs_mount_t *mp; - vnode_t *vp; + bhv_vnode_t *vp; ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); @@ -1434,9 +1439,9 @@ xfs_itruncate_start( vn_iowait(vp); /* wait for the completion of any pending DIOs */ /* - * Call VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES() to get rid of pages and buffers + * Call toss_pages or flushinval_pages to get rid of pages * overlapping the region being removed. We have to use - * the less efficient VOP_FLUSHINVAL_PAGES() in the case that the + * the less efficient flushinval_pages in the case that the * caller may not be able to finish the truncate without * dropping the inode's I/O lock. Make sure * to catch any pages brought in by buffers overlapping @@ -1445,10 +1450,10 @@ xfs_itruncate_start( * so that we don't toss things on the same block as * new_size but before it. * - * Before calling VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES(), make sure to + * Before calling toss_page or flushinval_pages, make sure to * call remapf() over the same region if the file is mapped. * This frees up mapped file references to the pages in the - * given range and for the VOP_FLUSHINVAL_PAGES() case it ensures + * given range and for the flushinval_pages case it ensures * that we get the latest mapped changes flushed out. */ toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); @@ -1466,9 +1471,9 @@ xfs_itruncate_start( last_byte); if (last_byte > toss_start) { if (flags & XFS_ITRUNC_DEFINITE) { - VOP_TOSS_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); + bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); } else { - VOP_FLUSHINVAL_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); + bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); } } @@ -1666,12 +1671,13 @@ xfs_itruncate_finish( * runs. */ XFS_BMAP_INIT(&free_list, &first_block); - error = xfs_bunmapi(ntp, ip, first_unmap_block, - unmap_len, + error = XFS_BUNMAPI(mp, ntp, &ip->i_iocore, + first_unmap_block, unmap_len, XFS_BMAPI_AFLAG(fork) | (sync ? 0 : XFS_BMAPI_ASYNC), XFS_ITRUNC_MAX_EXTENTS, - &first_block, &free_list, &done); + &first_block, &free_list, + NULL, &done); if (error) { /* * If the bunmapi call encounters an error, @@ -2745,13 +2751,14 @@ xfs_iunpin( * the inode to become unpinned. */ if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { - vnode_t *vp = XFS_ITOV_NULL(ip); + bhv_vnode_t *vp = XFS_ITOV_NULL(ip); /* make sync come back and flush this inode */ if (vp) { struct inode *inode = vn_to_inode(vp); - if (!(inode->i_state & I_NEW)) + if (!(inode->i_state & + (I_NEW|I_FREEING|I_CLEAR))) mark_inode_dirty_sync(inode); } } @@ -2916,13 +2923,6 @@ xfs_iflush_fork( ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); } - if (whichfork == XFS_DATA_FORK) { - if (unlikely(XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip))) { - XFS_ERROR_REPORT("xfs_iflush_fork", - XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); - } - } break; case XFS_DINODE_FMT_EXTENTS: @@ -3006,7 +3006,7 @@ xfs_iflush( XFS_STATS_INC(xs_iflush_count); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); - ASSERT(valusema(&ip->i_flock) <= 0); + ASSERT(issemalocked(&(ip->i_flock))); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3199,7 +3199,7 @@ xfs_iflush( corrupt_out: xfs_buf_relse(bp); - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_iflush_abort(ip); /* * Unlocks the flush lock @@ -3221,7 +3221,7 @@ cluster_corrupt_out: xfs_buf_relse(bp); } - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); if(!bufwasdelwri) { /* @@ -3264,7 +3264,7 @@ xfs_iflush_int( SPLDECL(s); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); - ASSERT(valusema(&ip->i_flock) <= 0); + ASSERT(issemalocked(&(ip->i_flock))); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3504,7 +3504,7 @@ xfs_iflush_all( xfs_mount_t *mp) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; again: XFS_MOUNT_ILOCK(mp); @@ -4180,7 +4180,7 @@ xfs_iext_direct_to_inline( */ memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, nextents * sizeof(xfs_bmbt_rec_t)); - kmem_free(ifp->if_u1.if_extents, KM_SLEEP); + kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; ifp->if_real_bytes = 0; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3b544db1790b..d10b76ed1e5b 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -102,9 +102,9 @@ typedef struct xfs_ifork { #ifdef __KERNEL__ struct bhv_desc; +struct bhv_vnode; struct cred; struct ktrace; -struct vnode; struct xfs_buf; struct xfs_bmap_free; struct xfs_bmbt_irec; @@ -400,7 +400,7 @@ void xfs_chash_init(struct xfs_mount *); void xfs_chash_free(struct xfs_mount *); xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, struct xfs_trans *); -void xfs_inode_lock_init(xfs_inode_t *, struct vnode *); +void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *); int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, uint, uint, xfs_inode_t **, xfs_daddr_t); void xfs_iput(xfs_inode_t *, uint); @@ -461,7 +461,7 @@ void xfs_ichgtime(xfs_inode_t *, int); xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); void xfs_lock_inodes(xfs_inode_t **, int, int, uint); -xfs_inode_t *xfs_vtoi(struct vnode *vp); +xfs_inode_t *xfs_vtoi(struct bhv_vnode *vp); void xfs_synchronize_atime(xfs_inode_t *); @@ -509,7 +509,6 @@ extern struct kmem_zone *xfs_chashlist_zone; extern struct kmem_zone *xfs_ifork_zone; extern struct kmem_zone *xfs_inode_zone; extern struct kmem_zone *xfs_ili_zone; -extern struct vnodeops xfs_vnodeops; #endif /* __KERNEL__ */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 7497a481b2f5..f8e80d8e7237 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -25,7 +25,6 @@ #include "xfs_buf_item.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" @@ -33,7 +32,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -794,7 +792,7 @@ xfs_inode_item_pushbuf( * inode flush completed and the inode was taken off the AIL. * So, just get out. */ - if ((valusema(&(ip->i_flock)) > 0) || + if (!issemalocked(&(ip->i_flock)) || ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -816,7 +814,7 @@ xfs_inode_item_pushbuf( * If not, we can flush it async. */ dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && - (valusema(&(ip->i_flock)) <= 0)); + issemalocked(&(ip->i_flock))); iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_buftrace("INODE ITEM PUSH", bp); @@ -864,7 +862,7 @@ xfs_inode_item_push( ip = iip->ili_inode; ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); - ASSERT(valusema(&(ip->i_flock)) <= 0); + ASSERT(issemalocked(&(ip->i_flock))); /* * Since we were able to lock the inode's flush lock and * we found it on the AIL, the inode must be dirty. This @@ -1084,3 +1082,52 @@ xfs_istale_done( { xfs_iflush_abort(iip->ili_inode); } + +/* + * convert an xfs_inode_log_format struct from either 32 or 64 bit versions + * (which can have different field alignments) to the native version + */ +int +xfs_inode_item_format_convert( + xfs_log_iovec_t *buf, + xfs_inode_log_format_t *in_f) +{ + if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { + xfs_inode_log_format_32_t *in_f32; + + in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr; + in_f->ilf_type = in_f32->ilf_type; + in_f->ilf_size = in_f32->ilf_size; + in_f->ilf_fields = in_f32->ilf_fields; + in_f->ilf_asize = in_f32->ilf_asize; + in_f->ilf_dsize = in_f32->ilf_dsize; + in_f->ilf_ino = in_f32->ilf_ino; + /* copy biggest field of ilf_u */ + memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, + in_f32->ilf_u.ilfu_uuid.__u_bits, + sizeof(uuid_t)); + in_f->ilf_blkno = in_f32->ilf_blkno; + in_f->ilf_len = in_f32->ilf_len; + in_f->ilf_boffset = in_f32->ilf_boffset; + return 0; + } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ + xfs_inode_log_format_64_t *in_f64; + + in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr; + in_f->ilf_type = in_f64->ilf_type; + in_f->ilf_size = in_f64->ilf_size; + in_f->ilf_fields = in_f64->ilf_fields; + in_f->ilf_asize = in_f64->ilf_asize; + in_f->ilf_dsize = in_f64->ilf_dsize; + in_f->ilf_ino = in_f64->ilf_ino; + /* copy biggest field of ilf_u */ + memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, + in_f64->ilf_u.ilfu_uuid.__u_bits, + sizeof(uuid_t)); + in_f->ilf_blkno = in_f64->ilf_blkno; + in_f->ilf_len = in_f64->ilf_len; + in_f->ilf_boffset = in_f64->ilf_boffset; + return 0; + } + return EFSCORRUPTED; +} diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index c5dbf93b6661..5db6cd1b4cf3 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -23,25 +23,6 @@ * log. The size of the inline data/extents/b-tree root to be logged * (if any) is indicated in the ilf_dsize field. Changes to this structure * must be added on to the end. - * - * Convention for naming inode log item versions : The current version - * is always named XFS_LI_INODE. When an inode log item gets superseded, - * add the latest version of IRIX that will generate logs with that item - * to the version name. - * - * -Version 1 of this structure (XFS_LI_5_3_INODE) included up to the first - * union (ilf_u) field. This was released with IRIX 5.3-XFS. - * -Version 2 of this structure (XFS_LI_6_1_INODE) is currently the entire - * structure. This was released with IRIX 6.0.1-XFS and IRIX 6.1. - * -Version 3 of this structure (XFS_LI_INODE) is the same as version 2 - * so a new structure definition wasn't necessary. However, we had - * to add a new type because the inode cluster size changed from 4K - * to 8K and the version number had to be rev'ved to keep older kernels - * from trying to recover logs with the 8K buffers in them. The logging - * code can handle recovery on different-sized clusters now so hopefully - * this'll be the last time we need to change the inode log item just - * for a change in the inode cluster size. This new version was - * released with IRIX 6.2. */ typedef struct xfs_inode_log_format { unsigned short ilf_type; /* inode log item type */ @@ -59,18 +40,38 @@ typedef struct xfs_inode_log_format { int ilf_boffset; /* off of inode in buffer */ } xfs_inode_log_format_t; -/* Initial version shipped with IRIX 5.3-XFS */ -typedef struct xfs_inode_log_format_v1 { - unsigned short ilf_type; /* inode log item type */ - unsigned short ilf_size; /* size of this item */ - uint ilf_fields; /* flags for fields logged */ - uint ilf_dsize; /* size of data/ext/root */ - xfs_ino_t ilf_ino; /* inode number */ +typedef struct xfs_inode_log_format_32 { + unsigned short ilf_type; /* 16: inode log item type */ + unsigned short ilf_size; /* 16: size of this item */ + uint ilf_fields; /* 32: flags for fields logged */ + ushort ilf_asize; /* 32: size of attr d/ext/root */ + ushort ilf_dsize; /* 32: size of data/ext/root */ + xfs_ino_t ilf_ino; /* 64: inode number */ union { - xfs_dev_t ilfu_rdev; /* rdev value for dev inode*/ - uuid_t ilfu_uuid; /* mount point value */ + xfs_dev_t ilfu_rdev; /* 32: rdev value for dev inode*/ + uuid_t ilfu_uuid; /* 128: mount point value */ + } ilf_u; + __int64_t ilf_blkno; /* 64: blkno of inode buffer */ + int ilf_len; /* 32: len of inode buffer */ + int ilf_boffset; /* 32: off of inode in buffer */ +} __attribute__((packed)) xfs_inode_log_format_32_t; + +typedef struct xfs_inode_log_format_64 { + unsigned short ilf_type; /* 16: inode log item type */ + unsigned short ilf_size; /* 16: size of this item */ + uint ilf_fields; /* 32: flags for fields logged */ + ushort ilf_asize; /* 32: size of attr d/ext/root */ + ushort ilf_dsize; /* 32: size of data/ext/root */ + __uint32_t ilf_pad; /* 32: pad for 64 bit boundary */ + xfs_ino_t ilf_ino; /* 64: inode number */ + union { + xfs_dev_t ilfu_rdev; /* 32: rdev value for dev inode*/ + uuid_t ilfu_uuid; /* 128: mount point value */ } ilf_u; -} xfs_inode_log_format_t_v1; + __int64_t ilf_blkno; /* 64: blkno of inode buffer */ + int ilf_len; /* 32: len of inode buffer */ + int ilf_boffset; /* 32: off of inode in buffer */ +} xfs_inode_log_format_64_t; /* * Flags for xfs_trans_log_inode flags field. @@ -172,6 +173,8 @@ extern void xfs_inode_item_destroy(struct xfs_inode *); extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); extern void xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *); extern void xfs_iflush_abort(struct xfs_inode *); +extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, + xfs_inode_log_format_t *); #endif /* __KERNEL__ */ diff --git a/fs/xfs/xfs_iocore.c b/fs/xfs/xfs_iocore.c index a07815661a8c..06d710c9ce4b 100644 --- a/fs/xfs/xfs_iocore.c +++ b/fs/xfs/xfs_iocore.c @@ -24,14 +24,13 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" +#include "xfs_dfrag.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -58,7 +57,7 @@ xfs_size_fn( STATIC int xfs_ioinit( - struct vfs *vfsp, + struct bhv_vfs *vfsp, struct xfs_mount_args *mntargs, int flags) { @@ -68,6 +67,7 @@ xfs_ioinit( xfs_ioops_t xfs_iocore_xfs = { .xfs_ioinit = (xfs_ioinit_t) xfs_ioinit, .xfs_bmapi_func = (xfs_bmapi_t) xfs_bmapi, + .xfs_bunmapi_func = (xfs_bunmapi_t) xfs_bunmapi, .xfs_bmap_eof_func = (xfs_bmap_eof_t) xfs_bmap_eof, .xfs_iomap_write_direct = (xfs_iomap_write_direct_t) xfs_iomap_write_direct, @@ -84,6 +84,7 @@ xfs_ioops_t xfs_iocore_xfs = { .xfs_unlock = (xfs_unlk_t) xfs_iunlock, .xfs_size_func = (xfs_size_t) xfs_size_fn, .xfs_iodone = (xfs_iodone_t) fs_noerr, + .xfs_swap_extents_func = (xfs_swap_extents_t) xfs_swap_extents, }; void diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d5dfedcb8922..f1949c16df15 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_alloc.h" #include "xfs_dmapi.h" @@ -32,7 +31,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -252,7 +250,7 @@ xfs_iomap( error = XFS_BMAPI(mp, NULL, io, offset_fsb, (xfs_filblks_t)(end_fsb - offset_fsb), bmapi_flags, NULL, 0, &imap, - &nimaps, NULL); + &nimaps, NULL, NULL); if (error) goto out; @@ -519,8 +517,8 @@ xfs_iomap_write_direct( */ XFS_BMAP_INIT(&free_list, &firstfsb); nimaps = 1; - error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, - bmapi_flag, &firstfsb, 0, &imap, &nimaps, &free_list); + error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, bmapi_flag, + &firstfsb, 0, &imap, &nimaps, &free_list, NULL); if (error) goto error0; @@ -610,8 +608,8 @@ xfs_iomap_eof_want_preallocate( while (count_fsb > 0) { imaps = nimaps; firstblock = NULLFSBLOCK; - error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, - 0, &firstblock, 0, imap, &imaps, NULL); + error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 0, + &firstblock, 0, imap, &imaps, NULL, NULL); if (error) return error; for (n = 0; n < imaps; n++) { @@ -695,11 +693,11 @@ retry: nimaps = XFS_WRITE_IMAPS; firstblock = NULLFSBLOCK; - error = xfs_bmapi(NULL, ip, offset_fsb, + error = XFS_BMAPI(mp, NULL, io, offset_fsb, (xfs_filblks_t)(last_fsb - offset_fsb), XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, - &nimaps, NULL); + &nimaps, NULL, NULL); if (error && (error != ENOSPC)) return XFS_ERROR(error); @@ -832,9 +830,9 @@ xfs_iomap_write_allocate( } /* Go get the actual blocks */ - error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, + error = XFS_BMAPI(mp, tp, io, map_start_fsb, count_fsb, XFS_BMAPI_WRITE, &first_block, 1, - imap, &nimaps, &free_list); + imap, &nimaps, &free_list, NULL); if (error) goto trans_cancel; @@ -955,9 +953,9 @@ xfs_iomap_write_unwritten( */ XFS_BMAP_INIT(&free_list, &firstfsb); nimaps = 1; - error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, + error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, - 1, &imap, &nimaps, &free_list); + 1, &imap, &nimaps, &free_list, NULL); if (error) goto error_on_bmapi_transaction; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 94068d014f27..46249e4d1fea 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -41,11 +39,6 @@ #include "xfs_error.h" #include "xfs_btree.h" -#ifndef HAVE_USERACC -#define useracc(ubuffer, size, flags, foo) (0) -#define unuseracc(ubuffer, size, flags) -#endif - STATIC int xfs_bulkstat_one_iget( xfs_mount_t *mp, /* mount point for filesystem */ @@ -56,7 +49,7 @@ xfs_bulkstat_one_iget( { xfs_dinode_core_t *dic; /* dinode core info pointer */ xfs_inode_t *ip; /* incore inode pointer */ - vnode_t *vp; + bhv_vnode_t *vp; int error; error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno); @@ -336,15 +329,6 @@ xfs_bulkstat( nimask = ~(nicluster - 1); nbcluster = nicluster >> mp->m_sb.sb_inopblog; /* - * Lock down the user's buffer. If a buffer was not sent, as in the case - * disk quota code calls here, we skip this. - */ - if (ubuffer && - (error = useracc(ubuffer, ubcount * statstruct_size, - (B_READ|B_PHYS), NULL))) { - return error; - } - /* * Allocate a page-sized buffer for inode btree records. * We could try allocating something smaller, but for normal * calls we'll always (potentially) need the whole page. @@ -650,8 +634,6 @@ xfs_bulkstat( * Done, we're either out of filesystem or space to put the data. */ kmem_free(irbuf, NBPC); - if (ubuffer) - unuseracc(ubuffer, ubcount * statstruct_size, (B_READ|B_PHYS)); *ubcountp = ubelem; if (agno >= mp->m_sb.sb_agcount) { /* diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 11eb4e1b18c4..be5f12e07d22 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -45,7 +45,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, */ #define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */ #define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */ -#define BULKSTAT_FG_VFSLOCKED 0x4 /* Already have vfs lock */ /* * Return stat information in bulk (by-inode) for the filesystem. diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 32e841d2f26d..d8f5d4cbe8b7 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -24,7 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" @@ -36,7 +35,6 @@ #include "xfs_ialloc_btree.h" #include "xfs_log_recover.h" #include "xfs_trans_priv.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -402,7 +400,7 @@ xfs_log_release_iclog(xfs_mount_t *mp, xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; if (xlog_state_release_iclog(log, iclog)) { - xfs_force_shutdown(mp, XFS_LOG_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); return EIO; } @@ -498,9 +496,8 @@ xfs_log_mount(xfs_mount_t *mp, * just worked. */ if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { - int error; - vfs_t *vfsp = XFS_MTOVFS(mp); - int readonly = (vfsp->vfs_flag & VFS_RDONLY); + bhv_vfs_t *vfsp = XFS_MTOVFS(mp); + int error, readonly = (vfsp->vfs_flag & VFS_RDONLY); if (readonly) vfsp->vfs_flag &= ~VFS_RDONLY; @@ -726,7 +723,7 @@ xfs_log_write(xfs_mount_t * mp, return XFS_ERROR(EIO); if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) { - xfs_force_shutdown(mp, XFS_LOG_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); } return error; } /* xfs_log_write */ @@ -816,9 +813,9 @@ xfs_log_need_covered(xfs_mount_t *mp) SPLDECL(s); int needed = 0, gen; xlog_t *log = mp->m_log; - vfs_t *vfsp = XFS_MTOVFS(mp); + bhv_vfs_t *vfsp = XFS_MTOVFS(mp); - if (fs_frozen(vfsp) || XFS_FORCED_SHUTDOWN(mp) || + if (vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) || (vfsp->vfs_flag & VFS_RDONLY)) return 0; @@ -956,7 +953,7 @@ xlog_iodone(xfs_buf_t *bp) XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp)); XFS_BUF_STALE(bp); - xfs_force_shutdown(l->l_mp, XFS_LOG_IO_ERROR); + xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR); /* * This flag will be propagated to the trans-committed * callback routines to let them know that the log-commit @@ -1261,7 +1258,7 @@ xlog_commit_record(xfs_mount_t *mp, ASSERT_ALWAYS(iclog); if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, iclog, XLOG_COMMIT_TRANS))) { - xfs_force_shutdown(mp, XFS_LOG_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); } return error; } /* xlog_commit_record */ @@ -1790,7 +1787,7 @@ xlog_write(xfs_mount_t * mp, xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, "xfs_log_write: reservation ran out. Need to up reservation"); /* If we did not panic, shutdown the filesystem */ - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); #endif } else ticket->t_curr_res -= len; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 1f0016b0b4ec..55b4237c2153 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -24,7 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" @@ -32,7 +31,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -193,14 +191,14 @@ xlog_header_check_dump( { int b; - printk("%s: SB : uuid = ", __FUNCTION__); + cmn_err(CE_DEBUG, "%s: SB : uuid = ", __FUNCTION__); for (b = 0; b < 16; b++) - printk("%02x",((unsigned char *)&mp->m_sb.sb_uuid)[b]); - printk(", fmt = %d\n", XLOG_FMT); - printk(" log : uuid = "); + cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); + cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); + cmn_err(CE_DEBUG, " log : uuid = "); for (b = 0; b < 16; b++) - printk("%02x",((unsigned char *)&head->h_fs_uuid)[b]); - printk(", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT)); + cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); + cmn_err(CE_DEBUG, ", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT)); } #else #define xlog_header_check_dump(mp, head) @@ -282,7 +280,7 @@ xlog_recover_iodone( mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *); xfs_ioerror_alert("xlog_recover_iodone", mp, bp, XFS_BUF_ADDR(bp)); - xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); } XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); @@ -1889,7 +1887,7 @@ xlog_recover_do_inode_buffer( buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, next_unlinked_offset); - INT_SET(*buffer_nextp, ARCH_CONVERT, *logged_nextp); + *buffer_nextp = *logged_nextp; } return 0; @@ -2292,12 +2290,22 @@ xlog_recover_do_inode_trans( int attr_index; uint fields; xfs_dinode_core_t *dicp; + int need_free = 0; if (pass == XLOG_RECOVER_PASS1) { return 0; } - in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; + if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { + in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; + } else { + in_f = (xfs_inode_log_format_t *)kmem_alloc( + sizeof(xfs_inode_log_format_t), KM_SLEEP); + need_free = 1; + error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); + if (error) + goto error; + } ino = in_f->ilf_ino; mp = log->l_mp; if (ITEM_TYPE(item) == XFS_LI_INODE) { @@ -2323,8 +2331,10 @@ xlog_recover_do_inode_trans( * Inode buffers can be freed, look out for it, * and do not replay the inode. */ - if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) - return 0; + if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) { + error = 0; + goto error; + } bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len, XFS_BUF_LOCK); @@ -2333,7 +2343,7 @@ xlog_recover_do_inode_trans( bp, imap.im_blkno); error = XFS_BUF_GETERROR(bp); xfs_buf_relse(bp); - return error; + goto error; } error = 0; ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); @@ -2350,7 +2360,8 @@ xlog_recover_do_inode_trans( dip, bp, ino); XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + error = EFSCORRUPTED; + goto error; } dicp = (xfs_dinode_core_t*)(item->ri_buf[1].i_addr); if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { @@ -2360,7 +2371,8 @@ xlog_recover_do_inode_trans( item, ino); XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + error = EFSCORRUPTED; + goto error; } /* Skip replay when the on disk inode is newer than the log one */ @@ -2376,7 +2388,8 @@ xlog_recover_do_inode_trans( /* do nothing */ } else { xfs_buf_relse(bp); - return 0; + error = 0; + goto error; } } /* Take the opportunity to reset the flush iteration count */ @@ -2391,7 +2404,8 @@ xlog_recover_do_inode_trans( xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", item, dip, bp, ino); - return XFS_ERROR(EFSCORRUPTED); + error = EFSCORRUPTED; + goto error; } } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) { if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && @@ -2403,7 +2417,8 @@ xlog_recover_do_inode_trans( xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", item, dip, bp, ino); - return XFS_ERROR(EFSCORRUPTED); + error = EFSCORRUPTED; + goto error; } } if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ @@ -2415,7 +2430,8 @@ xlog_recover_do_inode_trans( item, dip, bp, ino, dicp->di_nextents + dicp->di_anextents, dicp->di_nblocks); - return XFS_ERROR(EFSCORRUPTED); + error = EFSCORRUPTED; + goto error; } if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", @@ -2424,7 +2440,8 @@ xlog_recover_do_inode_trans( xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", item, dip, bp, ino, dicp->di_forkoff); - return XFS_ERROR(EFSCORRUPTED); + error = EFSCORRUPTED; + goto error; } if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) { XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", @@ -2433,7 +2450,8 @@ xlog_recover_do_inode_trans( xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", item->ri_buf[1].i_len, item); - return XFS_ERROR(EFSCORRUPTED); + error = EFSCORRUPTED; + goto error; } /* The core is in in-core format */ @@ -2521,7 +2539,8 @@ xlog_recover_do_inode_trans( xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); ASSERT(0); xfs_buf_relse(bp); - return XFS_ERROR(EIO); + error = EIO; + goto error; } } @@ -2537,7 +2556,10 @@ write_inode_buffer: error = xfs_bwrite(mp, bp); } - return (error); +error: + if (need_free) + kmem_free(in_f, sizeof(*in_f)); + return XFS_ERROR(error); } /* @@ -2674,32 +2696,32 @@ xlog_recover_do_dquot_trans( * structure into it, and adds the efi to the AIL with the given * LSN. */ -STATIC void +STATIC int xlog_recover_do_efi_trans( xlog_t *log, xlog_recover_item_t *item, xfs_lsn_t lsn, int pass) { + int error; xfs_mount_t *mp; xfs_efi_log_item_t *efip; xfs_efi_log_format_t *efi_formatp; SPLDECL(s); if (pass == XLOG_RECOVER_PASS1) { - return; + return 0; } efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; - ASSERT(item->ri_buf[0].i_len == - (sizeof(xfs_efi_log_format_t) + - ((efi_formatp->efi_nextents - 1) * sizeof(xfs_extent_t)))); mp = log->l_mp; efip = xfs_efi_init(mp, efi_formatp->efi_nextents); - memcpy((char *)&(efip->efi_format), (char *)efi_formatp, - sizeof(xfs_efi_log_format_t) + - ((efi_formatp->efi_nextents - 1) * sizeof(xfs_extent_t))); + if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), + &(efip->efi_format)))) { + xfs_efi_item_free(efip); + return error; + } efip->efi_next_extent = efi_formatp->efi_nextents; efip->efi_flags |= XFS_EFI_COMMITTED; @@ -2708,6 +2730,7 @@ xlog_recover_do_efi_trans( * xfs_trans_update_ail() drops the AIL lock. */ xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn, s); + return 0; } @@ -2738,9 +2761,10 @@ xlog_recover_do_efd_trans( } efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; - ASSERT(item->ri_buf[0].i_len == - (sizeof(xfs_efd_log_format_t) + - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_t)))); + ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + + ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || + (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + + ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); efi_id = efd_formatp->efd_efi_id; /* @@ -2810,15 +2834,14 @@ xlog_recover_do_trans( if ((error = xlog_recover_do_buffer_trans(log, item, pass))) break; - } else if ((ITEM_TYPE(item) == XFS_LI_INODE) || - (ITEM_TYPE(item) == XFS_LI_6_1_INODE) || - (ITEM_TYPE(item) == XFS_LI_5_3_INODE)) { + } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) { if ((error = xlog_recover_do_inode_trans(log, item, pass))) break; } else if (ITEM_TYPE(item) == XFS_LI_EFI) { - xlog_recover_do_efi_trans(log, item, trans->r_lsn, - pass); + if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn, + pass))) + break; } else if (ITEM_TYPE(item) == XFS_LI_EFD) { xlog_recover_do_efd_trans(log, item, pass); } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) { @@ -3419,13 +3442,13 @@ xlog_unpack_data_checksum( if (rhead->h_chksum || ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { cmn_err(CE_DEBUG, - "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)", + "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n", INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum); cmn_err(CE_DEBUG, "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { cmn_err(CE_DEBUG, - "XFS: LogR this is a LogV2 filesystem"); + "XFS: LogR this is a LogV2 filesystem\n"); } log->l_flags |= XLOG_CHKSUM_MISMATCH; } @@ -3798,7 +3821,7 @@ xlog_do_log_recovery( error = xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS2); #ifdef DEBUG - { + if (!error) { int i; for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) @@ -3974,7 +3997,7 @@ xlog_recover_finish( log->l_flags &= ~XLOG_RECOVERY_NEEDED; } else { cmn_err(CE_DEBUG, - "!Ending clean XFS mount for filesystem: %s", + "!Ending clean XFS mount for filesystem: %s\n", log->l_mp->m_fsname); } return 0; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index c0b1c2906880..10dbf203c62f 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -196,7 +194,7 @@ xfs_mount_free( kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); if (remove_bhv) { - struct vfs *vfsp = XFS_MTOVFS(mp); + struct bhv_vfs *vfsp = XFS_MTOVFS(mp); bhv_remove_all_vfsops(vfsp, 0); VFS_REMOVEBHV(vfsp, &mp->m_bhv); @@ -337,7 +335,7 @@ xfs_mount_validate_sb( xfs_agnumber_t xfs_initialize_perag( - struct vfs *vfs, + bhv_vfs_t *vfs, xfs_mount_t *mp, xfs_agnumber_t agcount) { @@ -651,14 +649,14 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) */ int xfs_mountfs( - vfs_t *vfsp, + bhv_vfs_t *vfsp, xfs_mount_t *mp, int mfsi_flags) { xfs_buf_t *bp; xfs_sb_t *sbp = &(mp->m_sb); xfs_inode_t *rip; - vnode_t *rvp = NULL; + bhv_vnode_t *rvp = NULL; int readio_log, writeio_log; xfs_daddr_t d; __uint64_t ret64; @@ -934,18 +932,7 @@ xfs_mountfs( vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid; mp->m_dmevmask = 0; /* not persistent; set after each mount */ - /* - * Select the right directory manager. - */ - mp->m_dirops = - XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ? - xfsv2_dirops : - xfsv1_dirops; - - /* - * Initialize directory manager's entries. - */ - XFS_DIR_MOUNT(mp); + xfs_dir_mount(mp); /* * Initialize the attribute manager's entries. @@ -1006,8 +993,9 @@ xfs_mountfs( if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { cmn_err(CE_WARN, "XFS: corrupted root inode"); - prdev("Root inode %llu is not a directory", - mp->m_ddev_targp, (unsigned long long)rip->i_ino); + cmn_err(CE_WARN, "Device %s - root %llu is not a directory", + XFS_BUFTARG_NAME(mp->m_ddev_targp), + (unsigned long long)rip->i_ino); xfs_iunlock(rip, XFS_ILOCK_EXCL); XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, mp); @@ -1094,7 +1082,7 @@ xfs_mountfs( int xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) { - struct vfs *vfsp = XFS_MTOVFS(mp); + struct bhv_vfs *vfsp = XFS_MTOVFS(mp); #if defined(DEBUG) || defined(INDUCE_IO_ERROR) int64_t fsid; #endif @@ -1254,6 +1242,26 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) xfs_trans_log_buf(tp, bp, first, last); } + +/* + * In order to avoid ENOSPC-related deadlock caused by + * out-of-order locking of AGF buffer (PV 947395), we place + * constraints on the relationship among actual allocations for + * data blocks, freelist blocks, and potential file data bmap + * btree blocks. However, these restrictions may result in no + * actual space allocated for a delayed extent, for example, a data + * block in a certain AG is allocated but there is no additional + * block for the additional bmap btree block due to a split of the + * bmap btree of the file. The result of this may lead to an + * infinite loop in xfssyncd when the file gets flushed to disk and + * all delayed extents need to be actually allocated. To get around + * this, we explicitly set aside a few blocks which will not be + * reserved in delayed allocation. Considering the minimum number of + * needed freelist blocks is 4 fsbs, a potential split of file's bmap + * btree requires 1 fsb, so we set the number of set-aside blocks to 8. +*/ +#define SET_ASIDE_BLOCKS 8 + /* * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply * a delta to a specified field in the in-core superblock. Simply @@ -1298,7 +1306,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, return 0; case XFS_SBS_FDBLOCKS: - lcounter = (long long)mp->m_sb.sb_fdblocks; + lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS; res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); if (delta > 0) { /* Putting blocks back */ @@ -1332,7 +1340,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, } } - mp->m_sb.sb_fdblocks = lcounter; + mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS; return 0; case XFS_SBS_FREXTENTS: lcounter = (long long)mp->m_sb.sb_frextents; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 668ad23fd37c..b2bd4be4200a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -53,8 +53,8 @@ typedef struct xfs_trans_reservations { #else struct cred; struct log; -struct vfs; -struct vnode; +struct bhv_vfs; +struct bhv_vnode; struct xfs_mount_args; struct xfs_ihash; struct xfs_chash; @@ -63,9 +63,11 @@ struct xfs_perag; struct xfs_iocore; struct xfs_bmbt_irec; struct xfs_bmap_free; +struct xfs_extdelta; +struct xfs_swapext; -extern struct vfsops xfs_vfsops; -extern struct vnodeops xfs_vnodeops; +extern struct bhv_vfsops xfs_vfsops; +extern struct bhv_vnodeops xfs_vnodeops; #define AIL_LOCK_T lock_t #define AIL_LOCKINIT(x,y) spinlock_init(x,y) @@ -78,15 +80,15 @@ extern struct vnodeops xfs_vnodeops; * Prototypes and functions for the Data Migration subsystem. */ -typedef int (*xfs_send_data_t)(int, struct vnode *, - xfs_off_t, size_t, int, vrwlock_t *); +typedef int (*xfs_send_data_t)(int, struct bhv_vnode *, + xfs_off_t, size_t, int, bhv_vrwlock_t *); typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint); -typedef int (*xfs_send_destroy_t)(struct vnode *, dm_right_t); -typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct vfs *, - struct vnode *, - dm_right_t, struct vnode *, dm_right_t, +typedef int (*xfs_send_destroy_t)(struct bhv_vnode *, dm_right_t); +typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct bhv_vfs *, + struct bhv_vnode *, + dm_right_t, struct bhv_vnode *, dm_right_t, char *, char *, mode_t, int, int); -typedef void (*xfs_send_unmount_t)(struct vfs *, struct vnode *, +typedef void (*xfs_send_unmount_t)(struct bhv_vfs *, struct bhv_vnode *, dm_right_t, mode_t, int, int); typedef struct xfs_dmops { @@ -188,13 +190,18 @@ typedef struct xfs_qmops { * Prototypes and functions for I/O core modularization. */ -typedef int (*xfs_ioinit_t)(struct vfs *, +typedef int (*xfs_ioinit_t)(struct bhv_vfs *, struct xfs_mount_args *, int); typedef int (*xfs_bmapi_t)(struct xfs_trans *, void *, xfs_fileoff_t, xfs_filblks_t, int, xfs_fsblock_t *, xfs_extlen_t, struct xfs_bmbt_irec *, int *, - struct xfs_bmap_free *); + struct xfs_bmap_free *, struct xfs_extdelta *); +typedef int (*xfs_bunmapi_t)(struct xfs_trans *, + void *, xfs_fileoff_t, + xfs_filblks_t, int, xfs_extnum_t, + xfs_fsblock_t *, struct xfs_bmap_free *, + struct xfs_extdelta *, int *); typedef int (*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *); typedef int (*xfs_iomap_write_direct_t)( void *, xfs_off_t, size_t, int, @@ -213,11 +220,14 @@ typedef void (*xfs_lock_demote_t)(void *, uint); typedef int (*xfs_lock_nowait_t)(void *, uint); typedef void (*xfs_unlk_t)(void *, unsigned int); typedef xfs_fsize_t (*xfs_size_t)(void *); -typedef xfs_fsize_t (*xfs_iodone_t)(struct vfs *); +typedef xfs_fsize_t (*xfs_iodone_t)(struct bhv_vfs *); +typedef int (*xfs_swap_extents_t)(void *, void *, + struct xfs_swapext*); typedef struct xfs_ioops { xfs_ioinit_t xfs_ioinit; xfs_bmapi_t xfs_bmapi_func; + xfs_bunmapi_t xfs_bunmapi_func; xfs_bmap_eof_t xfs_bmap_eof_func; xfs_iomap_write_direct_t xfs_iomap_write_direct; xfs_iomap_write_delay_t xfs_iomap_write_delay; @@ -230,13 +240,17 @@ typedef struct xfs_ioops { xfs_unlk_t xfs_unlock; xfs_size_t xfs_size_func; xfs_iodone_t xfs_iodone; + xfs_swap_extents_t xfs_swap_extents_func; } xfs_ioops_t; #define XFS_IOINIT(vfsp, args, flags) \ (*(mp)->m_io_ops.xfs_ioinit)(vfsp, args, flags) -#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist) \ +#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist,delta) \ (*(mp)->m_io_ops.xfs_bmapi_func) \ - (trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist) + (trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist,delta) +#define XFS_BUNMAPI(mp, trans,io,bno,len,f,nexts,first,flist,delta,done) \ + (*(mp)->m_io_ops.xfs_bunmapi_func) \ + (trans,(io)->io_obj,bno,len,f,nexts,first,flist,delta,done) #define XFS_BMAP_EOF(mp, io, endoff, whichfork, eof) \ (*(mp)->m_io_ops.xfs_bmap_eof_func) \ ((io)->io_obj, endoff, whichfork, eof) @@ -266,6 +280,9 @@ typedef struct xfs_ioops { (*(mp)->m_io_ops.xfs_size_func)((io)->io_obj) #define XFS_IODONE(vfsp) \ (*(mp)->m_io_ops.xfs_iodone)(vfsp) +#define XFS_SWAP_EXTENTS(mp, io, tio, sxp) \ + (*(mp)->m_io_ops.xfs_swap_extents_func) \ + ((io)->io_obj, (tio)->io_obj, sxp) #ifdef HAVE_PERCPU_SB @@ -386,8 +403,6 @@ typedef struct xfs_mount { __uint8_t m_inode_quiesce;/* call quiesce on new inodes. field governed by m_ilock */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ - __uint8_t m_dirversion; /* 1 or 2 */ - xfs_dirops_t m_dirops; /* table of dir funcs */ int m_dirblksize; /* directory block sz--bytes */ int m_dirblkfsbs; /* directory block sz--fsbs */ xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ @@ -494,16 +509,7 @@ xfs_preferred_iosize(xfs_mount_t *mp) #define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) #define xfs_force_shutdown(m,f) \ - VFS_FORCE_SHUTDOWN((XFS_MTOVFS(m)), f, __FILE__, __LINE__) - -/* - * Flags sent to xfs_force_shutdown. - */ -#define XFS_METADATA_IO_ERROR 0x1 -#define XFS_LOG_IO_ERROR 0x2 -#define XFS_FORCE_UMOUNT 0x4 -#define XFS_CORRUPT_INCORE 0x8 /* Corrupt in-memory data structures */ -#define XFS_SHUTDOWN_REMOTE_REQ 0x10 /* Shutdown came from remote cell */ + bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__) /* * Flags for xfs_mountfs @@ -521,7 +527,7 @@ xfs_preferred_iosize(xfs_mount_t *mp) * Macros for getting from mount to vfs and back. */ #define XFS_MTOVFS(mp) xfs_mtovfs(mp) -static inline struct vfs *xfs_mtovfs(xfs_mount_t *mp) +static inline struct bhv_vfs *xfs_mtovfs(xfs_mount_t *mp) { return bhvtovfs(&mp->m_bhv); } @@ -533,7 +539,7 @@ static inline xfs_mount_t *xfs_bhvtom(bhv_desc_t *bdp) } #define XFS_VFSTOM(vfs) xfs_vfstom(vfs) -static inline xfs_mount_t *xfs_vfstom(vfs_t *vfs) +static inline xfs_mount_t *xfs_vfstom(bhv_vfs_t *vfs) { return XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfs), &xfs_vfsops)); } @@ -571,7 +577,7 @@ typedef struct xfs_mod_sb { extern xfs_mount_t *xfs_mount_init(void); extern void xfs_mod_sb(xfs_trans_t *, __int64_t); extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv); -extern int xfs_mountfs(struct vfs *, xfs_mount_t *mp, int); +extern int xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int); extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); extern int xfs_unmountfs(xfs_mount_t *, struct cred *); @@ -589,7 +595,7 @@ extern void xfs_freesb(xfs_mount_t *); extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int); extern int xfs_syncsub(xfs_mount_t *, int, int, int *); extern int xfs_sync_inodes(xfs_mount_t *, int, int, int *); -extern xfs_agnumber_t xfs_initialize_perag(struct vfs *, xfs_mount_t *, +extern xfs_agnumber_t xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *, xfs_agnumber_t); extern void xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t); diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c index 1408a32eef88..320d63ff9ca2 100644 --- a/fs/xfs/xfs_qmops.c +++ b/fs/xfs/xfs_qmops.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 7fbef974bce6..acb853b33ebb 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -365,7 +365,7 @@ typedef struct xfs_dqtrxops { extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); extern int xfs_mount_reset_sbqflags(struct xfs_mount *); -extern struct bhv_vfsops xfs_qmops; +extern struct bhv_module_vfsops xfs_qmops; #endif /* __KERNEL__ */ diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 1f148762eb28..d98171deaa1c 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -22,13 +22,11 @@ #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -40,7 +38,6 @@ #include "xfs_refcache.h" #include "xfs_utils.h" #include "xfs_trans_space.h" -#include "xfs_dir_leaf.h" /* @@ -87,8 +84,8 @@ STATIC int xfs_lock_for_rename( xfs_inode_t *dp1, /* old (source) directory inode */ xfs_inode_t *dp2, /* new (target) directory inode */ - vname_t *vname1,/* old entry name */ - vname_t *vname2,/* new entry name */ + bhv_vname_t *vname1,/* old entry name */ + bhv_vname_t *vname2,/* new entry name */ xfs_inode_t **ipp1, /* inode of old entry */ xfs_inode_t **ipp2, /* inode of new entry, if it already exists, NULL otherwise. */ @@ -225,9 +222,9 @@ xfs_lock_for_rename( int xfs_rename( bhv_desc_t *src_dir_bdp, - vname_t *src_vname, - vnode_t *target_dir_vp, - vname_t *target_vname, + bhv_vname_t *src_vname, + bhv_vnode_t *target_dir_vp, + bhv_vname_t *target_vname, cred_t *credp) { xfs_trans_t *tp; @@ -242,7 +239,7 @@ xfs_rename( int committed; xfs_inode_t *inodes[4]; int target_ip_dropped = 0; /* dropped target_ip link? */ - vnode_t *src_dir_vp; + bhv_vnode_t *src_dir_vp; int spaceres; int target_link_zero = 0; int num_inodes; @@ -398,34 +395,29 @@ xfs_rename( * fit before actually inserting it. */ if (spaceres == 0 && - (error = XFS_DIR_CANENTER(mp, tp, target_dp, target_name, - target_namelen))) { + (error = xfs_dir_canenter(tp, target_dp, target_name, + target_namelen))) goto error_return; - } /* * If target does not exist and the rename crosses * directories, adjust the target directory link count * to account for the ".." reference from the new entry. */ - error = XFS_DIR_CREATENAME(mp, tp, target_dp, target_name, + error = xfs_dir_createname(tp, target_dp, target_name, target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); - if (error == ENOSPC) { + if (error == ENOSPC) goto error_return; - } - if (error) { + if (error) goto abort_return; - } xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); - if (error) { + if (error) goto abort_return; - } } } else { /* target_ip != NULL */ - /* * If target exists and it's a directory, check that both * target and source are directories and that target can be @@ -435,7 +427,7 @@ xfs_rename( /* * Make sure target dir is empty. */ - if (!(XFS_DIR_ISEMPTY(target_ip->i_mount, target_ip)) || + if (!(xfs_dir_isempty(target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = XFS_ERROR(EEXIST); goto error_return; @@ -451,12 +443,11 @@ xfs_rename( * In case there is already an entry with the same * name at the destination directory, remove it first. */ - error = XFS_DIR_REPLACE(mp, tp, target_dp, target_name, - target_namelen, src_ip->i_ino, &first_block, - &free_list, spaceres); - if (error) { + error = xfs_dir_replace(tp, target_dp, target_name, + target_namelen, src_ip->i_ino, + &first_block, &free_list, spaceres); + if (error) goto abort_return; - } xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* @@ -464,9 +455,8 @@ xfs_rename( * dir no longer points to it. */ error = xfs_droplink(tp, target_ip); - if (error) { + if (error) goto abort_return; - } target_ip_dropped = 1; if (src_is_directory) { @@ -474,9 +464,8 @@ xfs_rename( * Drop the link from the old "." entry. */ error = xfs_droplink(tp, target_ip); - if (error) { + if (error) goto abort_return; - } } /* Do this test while we still hold the locks */ @@ -488,18 +477,15 @@ xfs_rename( * Remove the source. */ if (new_parent && src_is_directory) { - /* * Rewrite the ".." entry to point to the new * directory. */ - error = XFS_DIR_REPLACE(mp, tp, src_ip, "..", 2, - target_dp->i_ino, &first_block, - &free_list, spaceres); + error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino, + &first_block, &free_list, spaceres); ASSERT(error != EEXIST); - if (error) { + if (error) goto abort_return; - } xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } else { @@ -527,16 +513,14 @@ xfs_rename( * entry that's moved no longer points to it. */ error = xfs_droplink(tp, src_dp); - if (error) { + if (error) goto abort_return; - } } - error = XFS_DIR_REMOVENAME(mp, tp, src_dp, src_name, src_namelen, + error = xfs_dir_removename(tp, src_dp, src_name, src_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); - if (error) { + if (error) goto abort_return; - } xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* @@ -609,7 +593,7 @@ xfs_rename( * Let interposed file systems know about removed links. */ if (target_ip_dropped) { - VOP_LINK_REMOVED(XFS_ITOV(target_ip), target_dir_vp, + bhv_vop_link_removed(XFS_ITOV(target_ip), target_dir_vp, target_link_zero); IRELE(target_ip); } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 5b413946b1c5..0c1e42b037ef 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -141,7 +139,7 @@ xfs_growfs_rt_alloc( cancelflags |= XFS_TRANS_ABORT; error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks, XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock, - resblks, &map, &nmap, &flist); + resblks, &map, &nmap, &flist, NULL); if (!error && nmap < 1) error = XFS_ERROR(ENOSPC); if (error) @@ -2404,10 +2402,10 @@ xfs_rtprint_range( { xfs_extlen_t i; /* block number in the extent */ - printk("%Ld: ", (long long)start); + cmn_err(CE_DEBUG, "%Ld: ", (long long)start); for (i = 0; i < len; i++) - printk("%d", xfs_rtcheck_bit(mp, tp, start + i, 1)); - printk("\n"); + cmn_err(CE_DEBUG, "%d", xfs_rtcheck_bit(mp, tp, start + i, 1)); + cmn_err(CE_DEBUG, "\n"); } /* @@ -2431,17 +2429,17 @@ xfs_rtprint_summary( (void)xfs_rtget_summary(mp, tp, l, i, &sumbp, &sb, &c); if (c) { if (!p) { - printk("%Ld-%Ld:", 1LL << l, + cmn_err(CE_DEBUG, "%Ld-%Ld:", 1LL << l, XFS_RTMIN((1LL << l) + ((1LL << l) - 1LL), mp->m_sb.sb_rextents)); p = 1; } - printk(" %Ld:%d", (long long)i, c); + cmn_err(CE_DEBUG, " %Ld:%d", (long long)i, c); } } if (p) - printk("\n"); + cmn_err(CE_DEBUG, "\n"); } if (sumbp) xfs_trans_brelse(tp, sumbp); diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index a59c102cf214..defb2febaaf5 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -92,6 +90,90 @@ xfs_write_clear_setuid( } /* + * Handle logging requirements of various synchronous types of write. + */ +int +xfs_write_sync_logforce( + xfs_mount_t *mp, + xfs_inode_t *ip) +{ + int error = 0; + + /* + * If we're treating this as O_DSYNC and we have not updated the + * size, force the log. + */ + if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && + !(ip->i_update_size)) { + xfs_inode_log_item_t *iip = ip->i_itemp; + + /* + * If an allocation transaction occurred + * without extending the size, then we have to force + * the log up the proper point to ensure that the + * allocation is permanent. We can't count on + * the fact that buffered writes lock out direct I/O + * writes - the direct I/O write could have extended + * the size nontransactionally, then finished before + * we started. xfs_write_file will think that the file + * didn't grow but the update isn't safe unless the + * size change is logged. + * + * Force the log if we've committed a transaction + * against the inode or if someone else has and + * the commit record hasn't gone to disk (e.g. + * the inode is pinned). This guarantees that + * all changes affecting the inode are permanent + * when we return. + */ + if (iip && iip->ili_last_lsn) { + xfs_log_force(mp, iip->ili_last_lsn, + XFS_LOG_FORCE | XFS_LOG_SYNC); + } else if (xfs_ipincount(ip) > 0) { + xfs_log_force(mp, (xfs_lsn_t)0, + XFS_LOG_FORCE | XFS_LOG_SYNC); + } + + } else { + xfs_trans_t *tp; + + /* + * O_SYNC or O_DSYNC _with_ a size update are handled + * the same way. + * + * If the write was synchronous then we need to make + * sure that the inode modification time is permanent. + * We'll have updated the timestamp above, so here + * we use a synchronous transaction to log the inode. + * It's not fast, but it's necessary. + * + * If this a dsync write and the size got changed + * non-transactionally, then we need to ensure that + * the size change gets logged in a synchronous + * transaction. + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); + if ((error = xfs_trans_reserve(tp, 0, + XFS_SWRITE_LOG_RES(mp), + 0, 0, 0))) { + /* Transaction reserve failed */ + xfs_trans_cancel(tp, 0); + } else { + /* Transaction reserve successful */ + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0, NULL); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } + } + + return error; +} + +/* * Force a shutdown of the filesystem instantly while keeping * the filesystem consistent. We don't do an unmount here; just shutdown * the shop, make sure that absolutely nothing persistent happens to @@ -109,12 +191,12 @@ xfs_do_force_shutdown( xfs_mount_t *mp; mp = XFS_BHVTOM(bdp); - logerror = flags & XFS_LOG_IO_ERROR; + logerror = flags & SHUTDOWN_LOG_IO_ERROR; - if (!(flags & XFS_FORCE_UMOUNT)) { - cmn_err(CE_NOTE, - "xfs_force_shutdown(%s,0x%x) called from line %d of file %s. Return address = 0x%p", - mp->m_fsname,flags,lnnum,fname,__return_address); + if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { + cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from " + "line %d of file %s. Return address = 0x%p", + mp->m_fsname, flags, lnnum, fname, __return_address); } /* * No need to duplicate efforts. @@ -125,33 +207,37 @@ xfs_do_force_shutdown( /* * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't * queue up anybody new on the log reservations, and wakes up - * everybody who's sleeping on log reservations and tells - * them the bad news. + * everybody who's sleeping on log reservations to tell them + * the bad news. */ if (xfs_log_force_umount(mp, logerror)) return; - if (flags & XFS_CORRUPT_INCORE) { + if (flags & SHUTDOWN_CORRUPT_INCORE) { xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp, "Corruption of in-memory data detected. Shutting down filesystem: %s", mp->m_fsname); if (XFS_ERRLEVEL_HIGH <= xfs_error_level) { xfs_stack_trace(); } - } else if (!(flags & XFS_FORCE_UMOUNT)) { + } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { if (logerror) { xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp, - "Log I/O Error Detected. Shutting down filesystem: %s", + "Log I/O Error Detected. Shutting down filesystem: %s", + mp->m_fsname); + } else if (flags & SHUTDOWN_DEVICE_REQ) { + xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, + "All device paths lost. Shutting down filesystem: %s", mp->m_fsname); - } else if (!(flags & XFS_SHUTDOWN_REMOTE_REQ)) { + } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, - "I/O Error Detected. Shutting down filesystem: %s", + "I/O Error Detected. Shutting down filesystem: %s", mp->m_fsname); } } - if (!(flags & XFS_FORCE_UMOUNT)) { - cmn_err(CE_ALERT, - "Please umount the filesystem, and rectify the problem(s)"); + if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { + cmn_err(CE_ALERT, "Please umount the filesystem, " + "and rectify the problem(s)"); } } @@ -335,7 +421,7 @@ xfs_bwrite( * from bwrite and we could be tracing a buffer that has * been reused. */ - xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); } return (error); } diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index e63795644478..188b296ff50c 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -75,6 +75,7 @@ xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb) * Prototypes for functions in xfs_rw.c. */ extern int xfs_write_clear_setuid(struct xfs_inode *ip); +extern int xfs_write_sync_logforce(struct xfs_mount *mp, struct xfs_inode *ip); extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); extern int xfs_bioerror(struct xfs_buf *bp); extern int xfs_bioerror_relse(struct xfs_buf *bp); @@ -87,9 +88,10 @@ extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, /* * Prototypes for functions in xfs_vnodeops.c. */ -extern int xfs_rwlock(bhv_desc_t *bdp, vrwlock_t write_lock); -extern void xfs_rwunlock(bhv_desc_t *bdp, vrwlock_t write_lock); -extern int xfs_setattr(bhv_desc_t *bdp, vattr_t *vap, int flags, cred_t *credp); +extern int xfs_rwlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock); +extern void xfs_rwunlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock); +extern int xfs_setattr(bhv_desc_t *, bhv_vattr_t *vap, int flags, + cred_t *credp); extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf, xfs_off_t offset, cred_t *credp, int flags); extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state, diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 8d056cef5d1f..ee2721e0de4d 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -24,7 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" @@ -33,7 +32,6 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -236,11 +234,8 @@ xfs_trans_alloc( xfs_mount_t *mp, uint type) { - fs_check_frozen(XFS_MTOVFS(mp), SB_FREEZE_TRANS); - atomic_inc(&mp->m_active_trans); - - return (_xfs_trans_alloc(mp, type)); - + vfs_wait_for_freeze(XFS_MTOVFS(mp), SB_FREEZE_TRANS); + return _xfs_trans_alloc(mp, type); } xfs_trans_t * @@ -250,12 +245,9 @@ _xfs_trans_alloc( { xfs_trans_t *tp; - ASSERT(xfs_trans_zone != NULL); - tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); + atomic_inc(&mp->m_active_trans); - /* - * Initialize the transaction structure. - */ + tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); tp->t_magic = XFS_TRANS_MAGIC; tp->t_type = type; tp->t_mountp = mp; @@ -263,8 +255,7 @@ _xfs_trans_alloc( tp->t_busy_free = XFS_LBC_NUM_SLOTS; XFS_LIC_INIT(&(tp->t_items)); XFS_LBC_INIT(&(tp->t_busy)); - - return (tp); + return tp; } /* @@ -303,7 +294,7 @@ xfs_trans_dup( tp->t_blk_res = tp->t_blk_res_used; ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used; - PFLAGS_DUP(&tp->t_pflags, &ntp->t_pflags); + ntp->t_pflags = tp->t_pflags; XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp); @@ -335,14 +326,11 @@ xfs_trans_reserve( uint logcount) { int log_flags; - int error; - int rsvd; - - error = 0; - rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; + int error = 0; + int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; /* Mark this thread as being in a transaction */ - PFLAGS_SET_FSTRANS(&tp->t_pflags); + current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); /* * Attempt to reserve the needed disk blocks by decrementing @@ -353,7 +341,7 @@ xfs_trans_reserve( error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, -blocks, rsvd); if (error != 0) { - PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); + current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); return (XFS_ERROR(ENOSPC)); } tp->t_blk_res += blocks; @@ -426,9 +414,9 @@ undo_blocks: tp->t_blk_res = 0; } - PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); + current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); - return (error); + return error; } @@ -819,7 +807,7 @@ shut_us_down: if (commit_lsn == -1 && !shutdown) shutdown = XFS_ERROR(EIO); } - PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); + current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_free_items(tp, shutdown? XFS_TRANS_ABORT : 0); xfs_trans_free_busy(tp); xfs_trans_free(tp); @@ -846,7 +834,7 @@ shut_us_down: */ nvec = xfs_trans_count_vecs(tp); if (nvec == 0) { - xfs_force_shutdown(mp, XFS_LOG_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); goto shut_us_down; } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) { log_vector = log_vector_fast; @@ -884,7 +872,7 @@ shut_us_down: * had pinned, clean up, free trans structure, and return error. */ if (error || commit_lsn == -1) { - PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); + current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT); return XFS_ERROR(EIO); } @@ -926,7 +914,7 @@ shut_us_down: /* * Mark this thread as no longer being in a transaction */ - PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); + current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); /* * Once all the items of the transaction have been copied @@ -1148,7 +1136,7 @@ xfs_trans_cancel( */ if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) { XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } #ifdef DEBUG if (!(flags & XFS_TRANS_ABORT)) { @@ -1182,7 +1170,7 @@ xfs_trans_cancel( } /* mark this thread as no longer being in a transaction */ - PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); + current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_free_items(tp, flags); xfs_trans_free_busy(tp); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 100d9a4b38ee..cb65c3a603f5 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -805,12 +805,9 @@ typedef struct xfs_trans { ((mp)->m_sb.sb_inodesize + \ (mp)->m_sb.sb_sectsize * 2 + \ (mp)->m_dirblksize + \ - (XFS_DIR_IS_V1(mp) ? 0 : \ - XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1))) + \ + XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \ XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (4 + \ - (XFS_DIR_IS_V1(mp) ? 0 : \ - XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \ + (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \ XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) #define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 19ab24af1c1c..558c87ff0c41 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -22,7 +22,6 @@ #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_dir.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_trans_priv.h" @@ -363,9 +362,10 @@ xfs_trans_delete_ail( AIL_UNLOCK(mp, s); else { xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, - "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL"); + "%s: attempting to delete a log item that is not in the AIL", + __FUNCTION__); AIL_UNLOCK(mp, s); - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } } } diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index c74c31ebc81c..60b6b898022b 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -320,7 +318,7 @@ xfs_trans_read_buf( if (xfs_error_target == target) { if (((xfs_req_num++) % xfs_error_mod) == 0) { xfs_buf_relse(bp); - printk("Returning error!\n"); + cmn_err(CE_DEBUG, "Returning error!\n"); return XFS_ERROR(EIO); } } @@ -369,7 +367,7 @@ xfs_trans_read_buf( */ if (tp->t_flags & XFS_TRANS_DIRTY) xfs_force_shutdown(tp->t_mountp, - XFS_METADATA_IO_ERROR); + SHUTDOWN_META_IO_ERROR); return error; } } @@ -414,7 +412,7 @@ xfs_trans_read_buf( xfs_ioerror_alert("xfs_trans_read_buf", mp, bp, blkno); if (tp->t_flags & XFS_TRANS_DIRTY) - xfs_force_shutdown(tp->t_mountp, XFS_METADATA_IO_ERROR); + xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); xfs_buf_relse(bp); return error; } @@ -423,9 +421,9 @@ xfs_trans_read_buf( if (xfs_error_target == target) { if (((xfs_req_num++) % xfs_error_mod) == 0) { xfs_force_shutdown(tp->t_mountp, - XFS_METADATA_IO_ERROR); + SHUTDOWN_META_IO_ERROR); xfs_buf_relse(bp); - printk("Returning error in trans!\n"); + cmn_err(CE_DEBUG, "Returning trans error!\n"); return XFS_ERROR(EIO); } } diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index 7d7d627f25df..b290270dd4a6 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c @@ -22,7 +22,6 @@ #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_dir.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_trans_priv.h" diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 7c5894d59f81..b8db1d5cde5a 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -24,14 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index 1117d600d741..2912aac07c7b 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c @@ -493,7 +493,7 @@ xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx) break; } else { /* out-of-order vacancy */ - printk("OOO vacancy lbcp 0x%p\n", lbcp); + cmn_err(CE_DEBUG, "OOO vacancy lbcp 0x%p\n", lbcp); ASSERT(0); } } diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h index 7fe3792b18df..4ea2e5074bdd 100644 --- a/fs/xfs/xfs_trans_space.h +++ b/fs/xfs/xfs_trans_space.h @@ -30,8 +30,7 @@ XFS_EXTENTADD_SPACE_RES(mp,w)) #define XFS_DAENTER_1B(mp,w) ((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1) #define XFS_DAENTER_DBS(mp,w) \ - (XFS_DA_NODE_MAXDEPTH + \ - ((XFS_DIR_IS_V2(mp) && (w) == XFS_DATA_FORK) ? 2 : 0)) + (XFS_DA_NODE_MAXDEPTH + (((w) == XFS_DATA_FORK) ? 2 : 0)) #define XFS_DAENTER_BLOCKS(mp,w) \ (XFS_DAENTER_1B(mp,w) * XFS_DAENTER_DBS(mp,w)) #define XFS_DAENTER_BMAP1B(mp,w) \ @@ -41,10 +40,7 @@ #define XFS_DAENTER_SPACE_RES(mp,w) \ (XFS_DAENTER_BLOCKS(mp,w) + XFS_DAENTER_BMAPS(mp,w)) #define XFS_DAREMOVE_SPACE_RES(mp,w) XFS_DAENTER_BMAPS(mp,w) -#define XFS_DIRENTER_MAX_SPLIT(mp,nl) \ - (((mp)->m_sb.sb_blocksize == 512 && \ - XFS_DIR_IS_V1(mp) && \ - (nl) >= XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN) ? 2 : 1) +#define XFS_DIRENTER_MAX_SPLIT(mp,nl) 1 #define XFS_DIRENTER_SPACE_RES(mp,nl) \ (XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK) * \ XFS_DIRENTER_MAX_SPLIT(mp,nl)) @@ -57,8 +53,7 @@ * Space reservation values for various transactions. */ #define XFS_ADDAFORK_SPACE_RES(mp) \ - ((mp)->m_dirblkfsbs + \ - (XFS_DIR_IS_V1(mp) ? 0 : XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK))) + ((mp)->m_dirblkfsbs + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK)) #define XFS_ATTRRM_SPACE_RES(mp) \ XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK) /* This macro is not used - see inline code in xfs_attr_set */ diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 34654ec6ae10..9014d7e44488 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -24,12 +24,10 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -51,10 +49,10 @@ */ int xfs_get_dir_entry( - vname_t *dentry, + bhv_vname_t *dentry, xfs_inode_t **ipp) { - vnode_t *vp; + bhv_vnode_t *vp; vp = VNAME_TO_VNODE(dentry); @@ -69,11 +67,11 @@ int xfs_dir_lookup_int( bhv_desc_t *dir_bdp, uint lock_mode, - vname_t *dentry, + bhv_vname_t *dentry, xfs_ino_t *inum, xfs_inode_t **ipp) { - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; xfs_inode_t *dp; int error; @@ -82,8 +80,7 @@ xfs_dir_lookup_int( dp = XFS_BHVTOI(dir_bdp); - error = XFS_DIR_LOOKUP(dp->i_mount, NULL, dp, - VNAME(dentry), VNAMELEN(dentry), inum); + error = xfs_dir_lookup(NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum); if (!error) { /* * Unlock the directory. We do this because we can't diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index 472661a3b6d8..fe953e98afa7 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h @@ -23,9 +23,10 @@ #define ITRACE(ip) vn_trace_ref(XFS_ITOV(ip), __FILE__, __LINE__, \ (inst_t *)__return_address) -extern int xfs_rename (bhv_desc_t *, vname_t *, vnode_t *, vname_t *, cred_t *); -extern int xfs_get_dir_entry (vname_t *, xfs_inode_t **); -extern int xfs_dir_lookup_int (bhv_desc_t *, uint, vname_t *, xfs_ino_t *, +extern int xfs_rename (bhv_desc_t *, bhv_vname_t *, bhv_vnode_t *, + bhv_vname_t *, cred_t *); +extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **); +extern int xfs_dir_lookup_int (bhv_desc_t *, uint, bhv_vname_t *, xfs_ino_t *, xfs_inode_t **); extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *); extern int xfs_dir_ialloc (xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 36ea1b2094f2..6c96391f3f1a 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -24,7 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" @@ -32,7 +31,6 @@ #include "xfs_bmap_btree.h" #include "xfs_ialloc_btree.h" #include "xfs_alloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -131,9 +129,6 @@ xfs_init(void) #ifdef XFS_BMBT_TRACE xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP); #endif -#ifdef XFS_DIR_TRACE - xfs_dir_trace_buf = ktrace_alloc(XFS_DIR_TRACE_SIZE, KM_SLEEP); -#endif #ifdef XFS_ATTR_TRACE xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP); #endif @@ -177,9 +172,6 @@ xfs_cleanup(void) #ifdef XFS_ATTR_TRACE ktrace_free(xfs_attr_trace_buf); #endif -#ifdef XFS_DIR_TRACE - ktrace_free(xfs_dir_trace_buf); -#endif #ifdef XFS_BMBT_TRACE ktrace_free(xfs_bmbt_trace_buf); #endif @@ -212,7 +204,7 @@ xfs_cleanup(void) */ STATIC int xfs_start_flags( - struct vfs *vfs, + struct bhv_vfs *vfs, struct xfs_mount_args *ap, struct xfs_mount *mp) { @@ -337,7 +329,7 @@ xfs_start_flags( */ STATIC int xfs_finish_flags( - struct vfs *vfs, + struct bhv_vfs *vfs, struct xfs_mount_args *ap, struct xfs_mount *mp) { @@ -423,7 +415,7 @@ xfs_mount( struct xfs_mount_args *args, cred_t *credp) { - struct vfs *vfsp = bhvtovfs(bhvp); + struct bhv_vfs *vfsp = bhvtovfs(bhvp); struct bhv_desc *p; struct xfs_mount *mp = XFS_BHVTOM(bhvp); struct block_device *ddev, *logdev, *rtdev; @@ -552,10 +544,10 @@ xfs_unmount( int flags, cred_t *credp) { - struct vfs *vfsp = bhvtovfs(bdp); + bhv_vfs_t *vfsp = bhvtovfs(bdp); xfs_mount_t *mp = XFS_BHVTOM(bdp); xfs_inode_t *rip; - vnode_t *rvp; + bhv_vnode_t *rvp; int unmount_event_wanted = 0; int unmount_event_flags = 0; int xfs_unmountfs_needed = 0; @@ -665,9 +657,8 @@ xfs_mntupdate( int *flags, struct xfs_mount_args *args) { - struct vfs *vfsp = bhvtovfs(bdp); + bhv_vfs_t *vfsp = bhvtovfs(bdp); xfs_mount_t *mp = XFS_BHVTOM(bdp); - int error; if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */ if (vfsp->vfs_flag & VFS_RDONLY) @@ -679,7 +670,7 @@ xfs_mntupdate( mp->m_flags &= ~XFS_MOUNT_BARRIER; } } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */ - VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error); + bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL); xfs_quiesce_fs(mp); xfs_log_unmount_write(mp); xfs_unmountfs_writesb(mp); @@ -702,7 +693,7 @@ xfs_unmount_flush( xfs_inode_t *rip = mp->m_rootip; xfs_inode_t *rbmip; xfs_inode_t *rsumip = NULL; - vnode_t *rvp = XFS_ITOV(rip); + bhv_vnode_t *rvp = XFS_ITOV(rip); int error; xfs_ilock(rip, XFS_ILOCK_EXCL); @@ -781,9 +772,9 @@ fscorrupt_out2: STATIC int xfs_root( bhv_desc_t *bdp, - vnode_t **vpp) + bhv_vnode_t **vpp) { - vnode_t *vp; + bhv_vnode_t *vp; vp = XFS_ITOV((XFS_BHVTOM(bdp))->m_rootip); VN_HOLD(vp); @@ -801,8 +792,8 @@ xfs_root( STATIC int xfs_statvfs( bhv_desc_t *bdp, - xfs_statfs_t *statp, - vnode_t *vp) + bhv_statvfs_t *statp, + bhv_vnode_t *vp) { __uint64_t fakeinos; xfs_extlen_t lsize; @@ -900,7 +891,7 @@ xfs_sync( /* * xfs sync routine for internal use * - * This routine supports all of the flags defined for the generic VFS_SYNC + * This routine supports all of the flags defined for the generic vfs_sync * interface as explained above under xfs_sync. In the interests of not * changing interfaces within the 6.5 family, additional internally- * required functions are specified within a separate xflags parameter, @@ -917,7 +908,7 @@ xfs_sync_inodes( xfs_inode_t *ip = NULL; xfs_inode_t *ip_next; xfs_buf_t *bp; - vnode_t *vp = NULL; + bhv_vnode_t *vp = NULL; int error; int last_error; uint64_t fflag; @@ -1156,9 +1147,9 @@ xfs_sync_inodes( xfs_iunlock(ip, XFS_ILOCK_SHARED); if (XFS_FORCED_SHUTDOWN(mp)) { - VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); + bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF); } else { - VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_REMAPF); + bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF); } xfs_ilock(ip, XFS_ILOCK_SHARED); @@ -1178,8 +1169,8 @@ xfs_sync_inodes( * across calls to the buffer cache. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); - VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, - fflag, FI_NONE, error); + error = bhv_vop_flush_pages(vp, (xfs_off_t)0, + -1, fflag, FI_NONE); xfs_ilock(ip, XFS_ILOCK_SHARED); } @@ -1231,9 +1222,7 @@ xfs_sync_inodes( * marker and free it. */ XFS_MOUNT_ILOCK(mp); - IPOINTER_REMOVE(ip, mp); - XFS_MOUNT_IUNLOCK(mp); ASSERT(!(lock_flags & @@ -1421,7 +1410,7 @@ xfs_sync_inodes( /* * xfs sync routine for internal use * - * This routine supports all of the flags defined for the generic VFS_SYNC + * This routine supports all of the flags defined for the generic vfs_sync * interface as explained above under xfs_sync. In the interests of not * changing interfaces within the 6.5 family, additional internally- * required functions are specified within a separate xflags parameter, @@ -1574,7 +1563,7 @@ xfs_syncsub( STATIC int xfs_vget( bhv_desc_t *bdp, - vnode_t **vpp, + bhv_vnode_t **vpp, fid_t *fidp) { xfs_mount_t *mp = XFS_BHVTOM(bdp); @@ -1657,10 +1646,10 @@ xfs_vget( #define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ STATIC unsigned long -suffix_strtoul(const char *cp, char **endp, unsigned int base) +suffix_strtoul(char *s, char **endp, unsigned int base) { int last, shift_left_factor = 0; - char *value = (char *)cp; + char *value = s; last = strlen(value) - 1; if (value[last] == 'K' || value[last] == 'k') { @@ -1676,7 +1665,7 @@ suffix_strtoul(const char *cp, char **endp, unsigned int base) value[last] = '\0'; } - return simple_strtoul(cp, endp, base) << shift_left_factor; + return simple_strtoul((const char *)s, endp, base) << shift_left_factor; } STATIC int @@ -1686,7 +1675,7 @@ xfs_parseargs( struct xfs_mount_args *args, int update) { - struct vfs *vfsp = bhvtovfs(bhv); + bhv_vfs_t *vfsp = bhvtovfs(bhv); char *this_char, *value, *eov; int dsunit, dswidth, vol_dsunit, vol_dswidth; int iosize; @@ -1708,42 +1697,48 @@ xfs_parseargs( if (!strcmp(this_char, MNTOPT_LOGBUFS)) { if (!value || !*value) { - printk("XFS: %s option requires an argument\n", + cmn_err(CE_WARN, + "XFS: %s option requires an argument", this_char); return EINVAL; } args->logbufs = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { if (!value || !*value) { - printk("XFS: %s option requires an argument\n", + cmn_err(CE_WARN, + "XFS: %s option requires an argument", this_char); return EINVAL; } args->logbufsize = suffix_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { if (!value || !*value) { - printk("XFS: %s option requires an argument\n", + cmn_err(CE_WARN, + "XFS: %s option requires an argument", this_char); return EINVAL; } strncpy(args->logname, value, MAXNAMELEN); } else if (!strcmp(this_char, MNTOPT_MTPT)) { if (!value || !*value) { - printk("XFS: %s option requires an argument\n", + cmn_err(CE_WARN, + "XFS: %s option requires an argument", this_char); return EINVAL; } strncpy(args->mtpt, value, MAXNAMELEN); } else if (!strcmp(this_char, MNTOPT_RTDEV)) { if (!value || !*value) { - printk("XFS: %s option requires an argument\n", + cmn_err(CE_WARN, + "XFS: %s option requires an argument", this_char); return EINVAL; } strncpy(args->rtname, value, MAXNAMELEN); } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { if (!value || !*value) { - printk("XFS: %s option requires an argument\n", + cmn_err(CE_WARN, + "XFS: %s option requires an argument", this_char); return EINVAL; } @@ -1752,7 +1747,8 @@ xfs_parseargs( args->iosizelog = (uint8_t) iosize; } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { if (!value || !*value) { - printk("XFS: %s option requires an argument\n", + cmn_err(CE_WARN, + "XFS: %s option requires an argument", this_char); return EINVAL; } @@ -1761,7 +1757,8 @@ xfs_parseargs( args->iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) { if (!value || !*value) { - printk("XFS: %s option requires an argument\n", + cmn_err(CE_WARN, + "XFS: %s option requires an argument", this_char); return EINVAL; } @@ -1782,7 +1779,8 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_INO64)) { args->flags |= XFSMNT_INO64; #if !XFS_BIG_INUMS - printk("XFS: %s option not allowed on this system\n", + cmn_err(CE_WARN, + "XFS: %s option not allowed on this system", this_char); return EINVAL; #endif @@ -1792,14 +1790,16 @@ xfs_parseargs( args->flags |= XFSMNT_SWALLOC; } else if (!strcmp(this_char, MNTOPT_SUNIT)) { if (!value || !*value) { - printk("XFS: %s option requires an argument\n", + cmn_err(CE_WARN, + "XFS: %s option requires an argument", this_char); return EINVAL; } dsunit = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { if (!value || !*value) { - printk("XFS: %s option requires an argument\n", + cmn_err(CE_WARN, + "XFS: %s option requires an argument", this_char); return EINVAL; } @@ -1807,7 +1807,8 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { args->flags &= ~XFSMNT_32BITINODES; #if !XFS_BIG_INUMS - printk("XFS: %s option not allowed on this system\n", + cmn_err(CE_WARN, + "XFS: %s option not allowed on this system", this_char); return EINVAL; #endif @@ -1831,36 +1832,41 @@ xfs_parseargs( args->flags &= ~XFSMNT_ATTR2; } else if (!strcmp(this_char, "osyncisdsync")) { /* no-op, this is now the default */ -printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); + cmn_err(CE_WARN, + "XFS: osyncisdsync is now the default, option is deprecated."); } else if (!strcmp(this_char, "irixsgid")) { -printk("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n"); + cmn_err(CE_WARN, + "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); } else { - printk("XFS: unknown mount option [%s].\n", this_char); + cmn_err(CE_WARN, + "XFS: unknown mount option [%s].", this_char); return EINVAL; } } if (args->flags & XFSMNT_NORECOVERY) { if ((vfsp->vfs_flag & VFS_RDONLY) == 0) { - printk("XFS: no-recovery mounts must be read-only.\n"); + cmn_err(CE_WARN, + "XFS: no-recovery mounts must be read-only."); return EINVAL; } } if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { - printk( - "XFS: sunit and swidth options incompatible with the noalign option\n"); + cmn_err(CE_WARN, + "XFS: sunit and swidth options incompatible with the noalign option"); return EINVAL; } if ((dsunit && !dswidth) || (!dsunit && dswidth)) { - printk("XFS: sunit and swidth must be specified together\n"); + cmn_err(CE_WARN, + "XFS: sunit and swidth must be specified together"); return EINVAL; } if (dsunit && (dswidth % dsunit != 0)) { - printk( - "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)\n", + cmn_err(CE_WARN, + "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", dswidth, dsunit); return EINVAL; } @@ -1907,7 +1913,7 @@ xfs_showargs( }; struct proc_xfs_info *xfs_infop; struct xfs_mount *mp = XFS_BHVTOM(bhv); - struct vfs *vfsp = XFS_MTOVFS(mp); + struct bhv_vfs *vfsp = XFS_MTOVFS(mp); for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { if (mp->m_flags & xfs_infop->flag) @@ -1967,7 +1973,7 @@ xfs_freeze( } -vfsops_t xfs_vfsops = { +bhv_vfsops_t xfs_vfsops = { BHV_IDENTITY_INIT(VFS_BHV_XFS,VFS_POSITION_XFS), .vfs_parseargs = xfs_parseargs, .vfs_showargs = xfs_showargs, diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 7027ae68ee38..00a6b7dc24a0 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -16,8 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <linux/capability.h> - #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" @@ -27,7 +25,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" @@ -35,13 +32,11 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_dir_leaf.h" #include "xfs_itable.h" #include "xfs_btree.h" #include "xfs_ialloc.h" @@ -58,32 +53,14 @@ #include "xfs_log_priv.h" #include "xfs_mac.h" - -/* - * The maximum pathlen is 1024 bytes. Since the minimum file system - * blocksize is 512 bytes, we can get a max of 2 extents back from - * bmapi. - */ -#define SYMLINK_MAPS 2 - -/* - * For xfs, we check that the file isn't too big to be opened by this kernel. - * No other open action is required for regular files. Devices are handled - * through the specfs file system, pipes through fifofs. Device and - * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively, - * when a new vnode is first looked up or created. - */ STATIC int xfs_open( bhv_desc_t *bdp, cred_t *credp) { int mode; - vnode_t *vp; - xfs_inode_t *ip; - - vp = BHV_TO_VNODE(bdp); - ip = XFS_BHVTOI(bdp); + bhv_vnode_t *vp = BHV_TO_VNODE(bdp); + xfs_inode_t *ip = XFS_BHVTOI(bdp); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); @@ -101,6 +78,35 @@ xfs_open( return 0; } +STATIC int +xfs_close( + bhv_desc_t *bdp, + int flags, + lastclose_t lastclose, + cred_t *credp) +{ + bhv_vnode_t *vp = BHV_TO_VNODE(bdp); + xfs_inode_t *ip = XFS_BHVTOI(bdp); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return XFS_ERROR(EIO); + + if (lastclose != L_TRUE || !VN_ISREG(vp)) + return 0; + + /* + * If we previously truncated this file and removed old data in + * the process, we want to initiate "early" writeout on the last + * close. This is an attempt to combat the notorious NULL files + * problem which is particularly noticable from a truncate down, + * buffered (re-)write (delalloc), followed by a crash. What we + * are effectively doing here is significantly reducing the time + * window where we'd otherwise be exposed to that problem. + */ + if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0) + return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE); + return 0; +} /* * xfs_getattr @@ -108,13 +114,13 @@ xfs_open( STATIC int xfs_getattr( bhv_desc_t *bdp, - vattr_t *vap, + bhv_vattr_t *vap, int flags, cred_t *credp) { xfs_inode_t *ip; xfs_mount_t *mp; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); @@ -241,7 +247,7 @@ xfs_getattr( int xfs_setattr( bhv_desc_t *bdp, - vattr_t *vap, + bhv_vattr_t *vap, int flags, cred_t *credp) { @@ -255,7 +261,7 @@ xfs_setattr( uid_t uid=0, iuid=0; gid_t gid=0, igid=0; int timeflags = 0; - vnode_t *vp; + bhv_vnode_t *vp; xfs_prid_t projid=0, iprojid=0; int mandlock_before, mandlock_after; struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; @@ -347,7 +353,6 @@ xfs_setattr( */ tp = NULL; lock_flags = XFS_ILOCK_EXCL; - ASSERT(flags & ATTR_NOLOCK ? flags & ATTR_DMI : 1); if (flags & ATTR_NOLOCK) need_iolock = 0; if (!(mask & XFS_AT_SIZE)) { @@ -666,9 +671,17 @@ xfs_setattr( ((ip->i_d.di_nlink != 0 || !(mp->m_flags & XFS_MOUNT_WSYNC)) ? 1 : 0)); - if (code) { + if (code) goto abort_return; - } + /* + * Truncated "down", so we're removing references + * to old data here - if we now delay flushing for + * a long time, we expose ourselves unduly to the + * notorious NULL files problem. So, we mark this + * vnode and flush it when the file is closed, and + * do not wait the usual (long) time for writeout. + */ + VTRUNCATE(vp); } /* * Have to do this even if the file's size doesn't change. @@ -800,6 +813,8 @@ xfs_setattr( di_flags |= XFS_DIFLAG_NODUMP; if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) di_flags |= XFS_DIFLAG_PROJINHERIT; + if (vap->va_xflags & XFS_XFLAG_NODEFRAG) + di_flags |= XFS_DIFLAG_NODEFRAG; if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { if (vap->va_xflags & XFS_XFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_RTINHERIT; @@ -869,7 +884,7 @@ xfs_setattr( */ mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); if (mandlock_before != mandlock_after) { - VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING, + bhv_vop_vnode_change(vp, VCHANGE_FLAGS_ENF_LOCKING, mandlock_after); } @@ -936,6 +951,13 @@ xfs_access( /* + * The maximum pathlen is 1024 bytes. Since the minimum file system + * blocksize is 512 bytes, we can get a max of 2 extents back from + * bmapi. + */ +#define SYMLINK_MAPS 2 + +/* * xfs_readlink * */ @@ -950,7 +972,7 @@ xfs_readlink( int count; xfs_off_t offset; int pathlen; - vnode_t *vp; + bhv_vnode_t *vp; int error = 0; xfs_mount_t *mp; int nmaps; @@ -1000,7 +1022,7 @@ xfs_readlink( nmaps = SYMLINK_MAPS; error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), - 0, NULL, 0, mval, &nmaps, NULL); + 0, NULL, 0, mval, &nmaps, NULL, NULL); if (error) { goto error_return; @@ -1208,8 +1230,8 @@ xfs_inactive_free_eofblocks( nimaps = 1; xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, - NULL, 0, &imap, &nimaps, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, + NULL, 0, &imap, &nimaps, NULL, NULL); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!error && (nimaps != 0) && @@ -1338,7 +1360,7 @@ xfs_inactive_symlink_rmt( nmaps = ARRAY_SIZE(mval); if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, - &free_list))) + &free_list, NULL))) goto error0; /* * Invalidate the block(s). @@ -1353,7 +1375,7 @@ xfs_inactive_symlink_rmt( * Unmap the dead block(s) to the free_list. */ if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, - &first_block, &free_list, &done))) + &first_block, &free_list, NULL, &done))) goto error1; ASSERT(done); /* @@ -1469,9 +1491,6 @@ xfs_inactive_symlink_local( return 0; } -/* - * - */ STATIC int xfs_inactive_attrs( xfs_inode_t *ip, @@ -1524,16 +1543,16 @@ xfs_release( bhv_desc_t *bdp) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; xfs_mount_t *mp; int error; vp = BHV_TO_VNODE(bdp); ip = XFS_BHVTOI(bdp); + mp = ip->i_mount; - if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { + if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) return 0; - } /* If this is a read-only mount, don't do this (would generate I/O) */ if (vp->v_vfsp->vfs_flag & VFS_RDONLY) @@ -1545,8 +1564,6 @@ xfs_release( return 0; #endif - mp = ip->i_mount; - if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || @@ -1579,8 +1596,8 @@ xfs_inactive( cred_t *credp) { xfs_inode_t *ip; - vnode_t *vp; - xfs_bmap_free_t free_list; + bhv_vnode_t *vp; + xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int committed; xfs_trans_t *tp; @@ -1760,7 +1777,7 @@ xfs_inactive( cmn_err(CE_NOTE, "xfs_inactive: xfs_ifree() returned an error = %d on %s", error, mp->m_fsname); - xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); } xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); } else { @@ -1795,17 +1812,17 @@ xfs_inactive( STATIC int xfs_lookup( bhv_desc_t *dir_bdp, - vname_t *dentry, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vnode_t **vpp, int flags, - vnode_t *rdir, + bhv_vnode_t *rdir, cred_t *credp) { xfs_inode_t *dp, *ip; xfs_ino_t e_inum; int error; uint lock_mode; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; dir_vp = BHV_TO_VNODE(dir_bdp); vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); @@ -1832,15 +1849,15 @@ xfs_lookup( STATIC int xfs_create( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vattr_t *vap, + bhv_vnode_t **vpp, cred_t *credp) { char *name = VNAME(dentry); - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; xfs_inode_t *dp, *ip; - vnode_t *vp=NULL; + bhv_vnode_t *vp = NULL; xfs_trans_t *tp; xfs_mount_t *mp; xfs_dev_t rdev; @@ -1938,8 +1955,7 @@ xfs_create( if (error) goto error_return; - if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) + if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen))) goto error_return; rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, @@ -1970,9 +1986,9 @@ xfs_create( xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); dp_joined_to_trans = B_TRUE; - error = XFS_DIR_CREATENAME(mp, tp, dp, name, namelen, ip->i_ino, - &first_block, &free_list, - resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); + error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino, + &first_block, &free_list, resblks ? + resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { ASSERT(error != ENOSPC); goto abort_return; @@ -2026,7 +2042,7 @@ xfs_create( * Propagate the fact that the vnode changed after the * xfs_inode locks have been released. */ - VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3); + bhv_vop_vnode_change(vp, VCHANGE_FLAGS_TRUNCATED, 3); *vpp = vp; @@ -2107,7 +2123,7 @@ int xfs_rm_attempts; STATIC int xfs_lock_dir_and_entry( xfs_inode_t *dp, - vname_t *dentry, + bhv_vname_t *dentry, xfs_inode_t *ip) /* inode of entry 'name' */ { int attempts; @@ -2321,10 +2337,10 @@ int remove_which_error_return = 0; STATIC int xfs_remove( bhv_desc_t *dir_bdp, - vname_t *dentry, + bhv_vname_t *dentry, cred_t *credp) { - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; char *name = VNAME(dentry); xfs_inode_t *dp, *ip; xfs_trans_t *tp = NULL; @@ -2448,8 +2464,8 @@ xfs_remove( * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. */ XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, ip->i_ino, - &first_block, &free_list, 0); + error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino, + &first_block, &free_list, 0); if (error) { ASSERT(error != ENOENT); REMOVE_DEBUG_TRACE(__LINE__); @@ -2511,7 +2527,7 @@ xfs_remove( /* * Let interposed file systems know about removed links. */ - VOP_LINK_REMOVED(XFS_ITOV(ip), dir_vp, link_zero); + bhv_vop_link_removed(XFS_ITOV(ip), dir_vp, link_zero); IRELE(ip); @@ -2564,8 +2580,8 @@ xfs_remove( STATIC int xfs_link( bhv_desc_t *target_dir_bdp, - vnode_t *src_vp, - vname_t *dentry, + bhv_vnode_t *src_vp, + bhv_vname_t *dentry, cred_t *credp) { xfs_inode_t *tdp, *sip; @@ -2577,7 +2593,7 @@ xfs_link( xfs_fsblock_t first_block; int cancel_flags; int committed; - vnode_t *target_dir_vp; + bhv_vnode_t *target_dir_vp; int resblks; char *target_name = VNAME(dentry); int target_namelen; @@ -2668,13 +2684,12 @@ xfs_link( } if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name, - target_namelen))) + (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen))) goto error_return; XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_CREATENAME(mp, tp, tdp, target_name, target_namelen, + error = xfs_dir_createname(tp, tdp, target_name, target_namelen, sip->i_ino, &first_block, &free_list, resblks); if (error) @@ -2734,15 +2749,15 @@ std_return: STATIC int xfs_mkdir( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vattr_t *vap, + bhv_vnode_t **vpp, cred_t *credp) { char *dir_name = VNAME(dentry); xfs_inode_t *dp; xfs_inode_t *cdp; /* inode of created dir */ - vnode_t *cvp; /* vnode of created dir */ + bhv_vnode_t *cvp; /* vnode of created dir */ xfs_trans_t *tp; xfs_mount_t *mp; int cancel_flags; @@ -2750,7 +2765,7 @@ xfs_mkdir( int committed; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; boolean_t dp_joined_to_trans; boolean_t created = B_FALSE; int dm_event_sent = 0; @@ -2840,7 +2855,7 @@ xfs_mkdir( goto error_return; if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, dir_name, dir_namelen))) + (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen))) goto error_return; /* * create the directory inode. @@ -2867,9 +2882,9 @@ xfs_mkdir( XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_CREATENAME(mp, tp, dp, dir_name, dir_namelen, - cdp->i_ino, &first_block, &free_list, - resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); + error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino, + &first_block, &free_list, resblks ? + resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { ASSERT(error != ENOSPC); goto error1; @@ -2883,16 +2898,14 @@ xfs_mkdir( */ dp->i_gen++; - error = XFS_DIR_INIT(mp, tp, cdp, dp); - if (error) { + error = xfs_dir_init(tp, cdp, dp); + if (error) goto error2; - } cdp->i_gen = 1; error = xfs_bumplink(tp, dp); - if (error) { + if (error) goto error2; - } cvp = XFS_ITOV(cdp); @@ -2969,7 +2982,7 @@ std_return: STATIC int xfs_rmdir( bhv_desc_t *dir_bdp, - vname_t *dentry, + bhv_vname_t *dentry, cred_t *credp) { char *name = VNAME(dentry); @@ -2982,7 +2995,7 @@ xfs_rmdir( xfs_fsblock_t first_block; int cancel_flags; int committed; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; int dm_di_mode = 0; int last_cdp_link; int namelen; @@ -3101,16 +3114,15 @@ xfs_rmdir( error = XFS_ERROR(ENOTEMPTY); goto error_return; } - if (!XFS_DIR_ISEMPTY(mp, cdp)) { + if (!xfs_dir_isempty(cdp)) { error = XFS_ERROR(ENOTEMPTY); goto error_return; } - error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, cdp->i_ino, - &first_block, &free_list, resblks); - if (error) { + error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino, + &first_block, &free_list, resblks); + if (error) goto error1; - } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -3181,7 +3193,7 @@ xfs_rmdir( /* * Let interposed file systems know about removed links. */ - VOP_LINK_REMOVED(XFS_ITOV(cdp), dir_vp, last_cdp_link); + bhv_vop_link_removed(XFS_ITOV(cdp), dir_vp, last_cdp_link); IRELE(cdp); @@ -3209,8 +3221,6 @@ xfs_rmdir( /* - * xfs_readdir - * * Read dp's entries starting at uiop->uio_offset and translate them into * bufsize bytes worth of struct dirents starting at bufbase. */ @@ -3230,28 +3240,23 @@ xfs_readdir( (inst_t *)__return_address); dp = XFS_BHVTOI(dir_bdp); - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) { + if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return XFS_ERROR(EIO); - } lock_mode = xfs_ilock_map_shared(dp); - error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); + error = xfs_dir_getdents(tp, dp, uiop, eofp); xfs_iunlock_map_shared(dp, lock_mode); return error; } -/* - * xfs_symlink - * - */ STATIC int xfs_symlink( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, + bhv_vname_t *dentry, + bhv_vattr_t *vap, char *target_path, - vnode_t **vpp, + bhv_vnode_t **vpp, cred_t *credp) { xfs_trans_t *tp; @@ -3263,7 +3268,7 @@ xfs_symlink( xfs_bmap_free_t free_list; xfs_fsblock_t first_block; boolean_t dp_joined_to_trans; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; uint cancel_flags; int committed; xfs_fileoff_t first_fsb; @@ -3308,7 +3313,7 @@ xfs_symlink( int len, total; char *path; - for(total = 0, path = target_path; total < pathlen;) { + for (total = 0, path = target_path; total < pathlen;) { /* * Skip any slashes. */ @@ -3402,7 +3407,7 @@ xfs_symlink( * Check for ability to enter directory entry, if no space reserved. */ if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, link_name, link_namelen))) + (error = xfs_dir_canenter(tp, dp, link_name, link_namelen))) goto error_return; /* * Initialize the bmap freelist prior to calling either @@ -3457,7 +3462,7 @@ xfs_symlink( error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &first_block, resblks, mval, &nmaps, - &free_list); + &free_list, NULL); if (error) { goto error1; } @@ -3489,11 +3494,10 @@ xfs_symlink( /* * Create the directory entry for the symlink. */ - error = XFS_DIR_CREATENAME(mp, tp, dp, link_name, link_namelen, - ip->i_ino, &first_block, &free_list, resblks); - if (error) { + error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino, + &first_block, &free_list, resblks); + if (error) goto error1; - } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); @@ -3541,7 +3545,7 @@ std_return: } if (!error) { - vnode_t *vp; + bhv_vnode_t *vp; ASSERT(ip); vp = XFS_ITOV(ip); @@ -3606,10 +3610,10 @@ xfs_fid2( int xfs_rwlock( bhv_desc_t *bdp, - vrwlock_t locktype) + bhv_vrwlock_t locktype) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); if (VN_ISDIR(vp)) @@ -3637,10 +3641,10 @@ xfs_rwlock( void xfs_rwunlock( bhv_desc_t *bdp, - vrwlock_t locktype) + bhv_vrwlock_t locktype) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); if (VN_ISDIR(vp)) @@ -3744,7 +3748,6 @@ xfs_inode_flush( return error; } - int xfs_set_dmattrs ( bhv_desc_t *bdp, @@ -3785,16 +3788,12 @@ xfs_set_dmattrs ( return error; } - -/* - * xfs_reclaim - */ STATIC int xfs_reclaim( bhv_desc_t *bdp) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); ip = XFS_BHVTOI(bdp); @@ -3849,7 +3848,7 @@ xfs_finish_reclaim( int sync_mode) { xfs_ihash_t *ih = ip->i_hash; - vnode_t *vp = XFS_ITOV_NULL(ip); + bhv_vnode_t *vp = XFS_ITOV_NULL(ip); int error; if (vp && VN_BAD(vp)) @@ -4116,10 +4115,10 @@ retry: * Issue the xfs_bmapi() call to allocate the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = xfs_bmapi(tp, ip, startoffset_fsb, + error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, allocatesize_fsb, bmapi_flag, &firstfsb, 0, imapp, &nimaps, - &free_list); + &free_list, NULL); if (error) { goto error0; } @@ -4199,8 +4198,8 @@ xfs_zero_remaining_bytes( for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; - error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, NULL, 0, &imap, - &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, + NULL, 0, &imap, &nimap, NULL, NULL); if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); @@ -4259,7 +4258,7 @@ xfs_free_file_space( xfs_off_t len, int attr_flags) { - vnode_t *vp; + bhv_vnode_t *vp; int committed; int done; xfs_off_t end_dmi_offset; @@ -4308,7 +4307,6 @@ xfs_free_file_space( return error; } - ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1); if (attr_flags & ATTR_NOLOCK) need_iolock = 0; if (need_iolock) { @@ -4326,7 +4324,7 @@ xfs_free_file_space( if (VN_CACHED(vp) != 0) { xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, ctooff(offtoct(ioffset)), -1); - VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)), + bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)), -1, FI_REMAPF_LOCKED); } @@ -4338,8 +4336,8 @@ xfs_free_file_space( */ if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { nimap = 1; - error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0, - &imap, &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, + 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); @@ -4353,8 +4351,8 @@ xfs_free_file_space( startoffset_fsb += mp->m_sb.sb_rextsize - mod; } nimap = 1; - error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 1, 0, NULL, 0, - &imap, &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, + 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); @@ -4426,9 +4424,9 @@ xfs_free_file_space( * issue the bunmapi() call to free the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = xfs_bunmapi(tp, ip, startoffset_fsb, + error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, endoffset_fsb - startoffset_fsb, - 0, 2, &firstfsb, &free_list, &done); + 0, 2, &firstfsb, &free_list, NULL, &done); if (error) { goto error0; } @@ -4488,8 +4486,8 @@ xfs_change_file_space( xfs_off_t startoffset; xfs_off_t llen; xfs_trans_t *tp; - vattr_t va; - vnode_t *vp; + bhv_vattr_t va; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); @@ -4642,9 +4640,10 @@ xfs_change_file_space( return error; } -vnodeops_t xfs_vnodeops = { +bhv_vnodeops_t xfs_vnodeops = { BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), .vop_open = xfs_open, + .vop_close = xfs_close, .vop_read = xfs_read, #ifdef HAVE_SENDFILE .vop_sendfile = xfs_sendfile, |