diff options
author | Steve French <sfrench@us.ibm.com> | 2008-02-15 22:06:08 +0100 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2008-02-15 22:06:08 +0100 |
commit | 0a3abcf75bf391fec4e32356ab5ddb8f5d2e6b41 (patch) | |
tree | b80b1d344ec24cad28b057ef803cebac9434be01 /fs | |
parent | [CIFS] factoring out common code in get_inode_info functions (diff) | |
parent | Linux 2.6.25-rc2 (diff) | |
download | linux-0a3abcf75bf391fec4e32356ab5ddb8f5d2e6b41.tar.xz linux-0a3abcf75bf391fec4e32356ab5ddb8f5d2e6b41.zip |
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
389 files changed, 10005 insertions, 8244 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index b364da70ff28..dfebdbe7440e 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -175,7 +175,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) if (!wnames) return ERR_PTR(-ENOMEM); - for (d = dentry, i = n; i >= 0; i--, d = d->d_parent) + for (d = dentry, i = (n-1); i >= 0; i--, d = d->d_parent) wnames[i] = (char *) d->d_name.name; clone = 1; @@ -183,7 +183,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) while (i < n) { l = min(n - i, P9_MAXWELEM); fid = p9_client_walk(fid, l, &wnames[i], clone); - if (!fid) { + if (IS_ERR(fid)) { kfree(wnames); return fid; } diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index fbb12dadba83..9b0f0222e8bb 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -3,7 +3,7 @@ * * This file contains functions assisting in mapping VFS to 9P2000 * - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * * This program is free software; you can redistribute it and/or modify @@ -31,7 +31,6 @@ #include <linux/idr.h> #include <net/9p/9p.h> #include <net/9p/transport.h> -#include <net/9p/conn.h> #include <net/9p/client.h> #include "v9fs.h" #include "v9fs_vfs.h" @@ -43,11 +42,11 @@ enum { /* Options that take integer arguments */ - Opt_debug, Opt_msize, Opt_dfltuid, Opt_dfltgid, Opt_afid, + Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid, /* String options */ Opt_uname, Opt_remotename, Opt_trans, /* Options that take no arguments */ - Opt_legacy, Opt_nodevmap, + Opt_nodevmap, /* Cache options */ Opt_cache_loose, /* Access options */ @@ -58,14 +57,11 @@ enum { static match_table_t tokens = { {Opt_debug, "debug=%x"}, - {Opt_msize, "msize=%u"}, {Opt_dfltuid, "dfltuid=%u"}, {Opt_dfltgid, "dfltgid=%u"}, {Opt_afid, "afid=%u"}, {Opt_uname, "uname=%s"}, {Opt_remotename, "aname=%s"}, - {Opt_trans, "trans=%s"}, - {Opt_legacy, "noextend"}, {Opt_nodevmap, "nodevmap"}, {Opt_cache_loose, "cache=loose"}, {Opt_cache_loose, "loose"}, @@ -85,16 +81,14 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) char *options; substring_t args[MAX_OPT_ARGS]; char *p; - int option; - int ret; + int option = 0; char *s, *e; + int ret; /* setup defaults */ - v9ses->maxdata = 8192; v9ses->afid = ~0; v9ses->debug = 0; v9ses->cache = 0; - v9ses->trans = v9fs_default_trans(); if (!v9ses->options) return; @@ -106,7 +100,8 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) continue; token = match_token(p, tokens, args); if (token < Opt_uname) { - if ((ret = match_int(&args[0], &option)) < 0) { + ret = match_int(&args[0], &option); + if (ret < 0) { P9_DPRINTK(P9_DEBUG_ERROR, "integer field, but no integer?\n"); continue; @@ -119,9 +114,7 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) p9_debug_level = option; #endif break; - case Opt_msize: - v9ses->maxdata = option; - break; + case Opt_dfltuid: v9ses->dfltuid = option; break; @@ -131,18 +124,12 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) case Opt_afid: v9ses->afid = option; break; - case Opt_trans: - v9ses->trans = v9fs_match_trans(&args[0]); - break; case Opt_uname: match_strcpy(v9ses->uname, &args[0]); break; case Opt_remotename: match_strcpy(v9ses->aname, &args[0]); break; - case Opt_legacy: - v9ses->flags &= ~V9FS_EXTENDED; - break; case Opt_nodevmap: v9ses->nodev = 1; break; @@ -185,7 +172,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, const char *dev_name, char *data) { int retval = -EINVAL; - struct p9_trans *trans = NULL; struct p9_fid *fid; v9ses->uname = __getname(); @@ -207,24 +193,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, v9ses->options = kstrdup(data, GFP_KERNEL); v9fs_parse_options(v9ses); - if (v9ses->trans == NULL) { - retval = -EPROTONOSUPPORT; - P9_DPRINTK(P9_DEBUG_ERROR, - "No transport defined or default transport\n"); - goto error; - } - - trans = v9ses->trans->create(dev_name, v9ses->options); - if (IS_ERR(trans)) { - retval = PTR_ERR(trans); - trans = NULL; - goto error; - } - if ((v9ses->maxdata+P9_IOHDRSZ) > v9ses->trans->maxsize) - v9ses->maxdata = v9ses->trans->maxsize-P9_IOHDRSZ; - - v9ses->clnt = p9_client_create(trans, v9ses->maxdata+P9_IOHDRSZ, - v9fs_extended(v9ses)); + v9ses->clnt = p9_client_create(dev_name, v9ses->options); if (IS_ERR(v9ses->clnt)) { retval = PTR_ERR(v9ses->clnt); @@ -236,6 +205,8 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, if (!v9ses->clnt->dotu) v9ses->flags &= ~V9FS_EXTENDED; + v9ses->maxdata = v9ses->clnt->msize; + /* for legacy mode, fall back to V9FS_ACCESS_ANY */ if (!v9fs_extended(v9ses) && ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) { diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index db4b4193f2e2..7d3a1018db52 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -1,7 +1,7 @@ /* * V9FS definitions. * - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * * This program is free software; you can redistribute it and/or modify @@ -28,7 +28,6 @@ struct v9fs_session_info { /* options */ - unsigned int maxdata; unsigned char flags; /* session flags */ unsigned char nodev; /* set to 1 if no disable device mapping */ unsigned short debug; /* debug level */ @@ -38,10 +37,10 @@ struct v9fs_session_info { char *options; /* copy of mount options */ char *uname; /* user name to mount as */ char *aname; /* name of remote hierarchy being mounted */ + unsigned int maxdata; /* max data for client interface */ unsigned int dfltuid; /* default uid/muid for legacy support */ unsigned int dfltgid; /* default gid for legacy support */ u32 uid; /* if ACCESS_SINGLE, the uid that has access */ - struct p9_trans_module *trans; /* 9p transport */ struct p9_client *clnt; /* 9p client */ struct dentry *debugfs_dir; }; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index ba4b1caa9c43..a616fff8906d 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -184,7 +184,7 @@ static const struct file_operations v9fs_cached_file_operations = { .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock, - .mmap = generic_file_mmap, + .mmap = generic_file_readonly_mmap, }; const struct file_operations v9fs_file_operations = { @@ -194,5 +194,5 @@ const struct file_operations v9fs_file_operations = { .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock, - .mmap = generic_file_mmap, + .mmap = generic_file_readonly_mmap, }; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 23581bcb599b..6a28842052ea 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -77,6 +77,8 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) res |= P9_DMSETUID; if ((mode & S_ISGID) == S_ISGID) res |= P9_DMSETGID; + if ((mode & S_ISVTX) == S_ISVTX) + res |= P9_DMSETVTX; if ((mode & P9_DMLINK)) res |= P9_DMLINK; } @@ -119,6 +121,9 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) if ((mode & P9_DMSETGID) == P9_DMSETGID) res |= S_ISGID; + + if ((mode & P9_DMSETVTX) == P9_DMSETVTX) + res |= S_ISVTX; } return res; @@ -568,7 +573,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, v9ses = v9fs_inode2v9ses(dir); dfid = v9fs_fid_lookup(dentry->d_parent); if (IS_ERR(dfid)) - return ERR_PTR(PTR_ERR(dfid)); + return ERR_CAST(dfid); name = (char *) dentry->d_name.name; fid = p9_client_walk(dfid, 1, &name, 1); diff --git a/fs/Kconfig b/fs/Kconfig index ea5b35947623..d7312825592b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -463,40 +463,18 @@ config OCFS2_DEBUG_FS this option for debugging only as it is likely to decrease performance of the filesystem. -config MINIX_FS - tristate "Minix fs support" - help - Minix is a simple operating system used in many classes about OS's. - The minix file system (method to organize files on a hard disk - partition or a floppy disk) was the original file system for Linux, - but has been superseded by the second extended file system ext2fs. - You don't want to use the minix file system on your hard disk - because of certain built-in restrictions, but it is sometimes found - on older Linux floppy disks. This option will enlarge your kernel - by about 28 KB. If unsure, say N. - - To compile this file system support as a module, choose M here: the - module will be called minix. Note that the file system of your root - partition (the one containing the directory /) cannot be compiled as - a module. - -config ROMFS_FS - tristate "ROM file system support" - ---help--- - This is a very small read-only file system mainly intended for - initial ram disks of installation disks, but it could be used for - other read-only media as well. Read - <file:Documentation/filesystems/romfs.txt> for details. +endif # BLOCK - To compile this file system support as a module, choose M here: the - module will be called romfs. Note that the file system of your - root partition (the one containing the directory /) cannot be a - module. - - If you don't know whether you need it, then you don't need it: - answer N. +config DNOTIFY + bool "Dnotify support" + default y + help + Dnotify is a directory-based per-fd file change notification system + that uses signals to communicate events to user-space. There exist + superior alternatives, but some applications may still rely on + dnotify. -endif + If unsure, say Y. config INOTIFY bool "Inotify file change notification support" @@ -577,17 +555,6 @@ config QUOTACTL depends on XFS_QUOTA || QUOTA default y -config DNOTIFY - bool "Dnotify support" - default y - help - Dnotify is a directory-based per-fd file change notification system - that uses signals to communicate events to user-space. There exist - superior alternatives, but some applications may still rely on - dnotify. - - If unsure, say Y. - config AUTOFS_FS tristate "Kernel automounter support" help @@ -713,7 +680,7 @@ config UDF_NLS depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y) endmenu -endif +endif # BLOCK if BLOCK menu "DOS/FAT/NT Filesystems" @@ -896,7 +863,7 @@ config NTFS_RW It is perfectly safe to say N here. endmenu -endif +endif # BLOCK menu "Pseudo filesystems" @@ -1417,6 +1384,24 @@ config VXFS_FS To compile this as a module, choose M here: the module will be called freevxfs. If unsure, say N. +config MINIX_FS + tristate "Minix file system support" + depends on BLOCK + help + Minix is a simple operating system used in many classes about OS's. + The minix file system (method to organize files on a hard disk + partition or a floppy disk) was the original file system for Linux, + but has been superseded by the second extended file system ext2fs. + You don't want to use the minix file system on your hard disk + because of certain built-in restrictions, but it is sometimes found + on older Linux floppy disks. This option will enlarge your kernel + by about 28 KB. If unsure, say N. + + To compile this file system support as a module, choose M here: the + module will be called minix. Note that the file system of your root + partition (the one containing the directory /) cannot be compiled as + a module. + config HPFS_FS tristate "OS/2 HPFS file system support" @@ -1434,7 +1419,6 @@ config HPFS_FS module will be called hpfs. If unsure, say N. - config QNX4FS_FS tristate "QNX4 file system support (read only)" depends on BLOCK @@ -1461,6 +1445,22 @@ config QNX4FS_RW It's currently broken, so for now: answer N. +config ROMFS_FS + tristate "ROM file system support" + depends on BLOCK + ---help--- + This is a very small read-only file system mainly intended for + initial ram disks of installation disks, but it could be used for + other read-only media as well. Read + <file:Documentation/filesystems/romfs.txt> for details. + + To compile this file system support as a module, choose M here: the + module will be called romfs. Note that the file system of your + root partition (the one containing the directory /) cannot be a + module. + + If you don't know whether you need it, then you don't need it: + answer N. config SYSV_FS @@ -1501,7 +1501,6 @@ config SYSV_FS If you haven't heard about all of this before, it's safe to say N. - config UFS_FS tristate "UFS file system support (read only)" depends on BLOCK @@ -1779,12 +1778,9 @@ config SUNRPC_GSS tristate config SUNRPC_XPRT_RDMA - tristate "RDMA transport for sunrpc (EXPERIMENTAL)" + tristate depends on SUNRPC && INFINIBAND && EXPERIMENTAL - default m - help - Adds a client RPC transport for supporting kernel NFS over RDMA - mounts, including Infiniband and iWARP. Experimental. + default SUNRPC && INFINIBAND config SUNRPC_BIND34 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 7c3d5f923da1..b5c3b6114add 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -61,7 +61,8 @@ config BINFMT_SHARED_FLAT config BINFMT_AOUT tristate "Kernel support for a.out and ECOFF binaries" - depends on X86_32 || ALPHA || ARM || M68K || SPARC32 + depends on ARCH_SUPPORTS_AOUT && \ + (X86_32 || ALPHA || ARM || M68K || SPARC32) ---help--- A.out (Assembler.OUTput) is a set of formats for libraries and executables used in the earliest versions of UNIX. Linux used diff --git a/fs/adfs/super.c b/fs/adfs/super.c index b36695ae5c2e..9e421eeb672b 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -20,6 +20,8 @@ #include <linux/vfs.h> #include <linux/parser.h> #include <linux/bitops.h> +#include <linux/mount.h> +#include <linux/seq_file.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -30,6 +32,9 @@ #include "dir_f.h" #include "dir_fplus.h" +#define ADFS_DEFAULT_OWNER_MASK S_IRWXU +#define ADFS_DEFAULT_OTHER_MASK (S_IRWXG | S_IRWXO) + void __adfs_error(struct super_block *sb, const char *function, const char *fmt, ...) { char error_buf[128]; @@ -134,6 +139,22 @@ static void adfs_put_super(struct super_block *sb) sb->s_fs_info = NULL; } +static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) +{ + struct adfs_sb_info *asb = ADFS_SB(mnt->mnt_sb); + + if (asb->s_uid != 0) + seq_printf(seq, ",uid=%u", asb->s_uid); + if (asb->s_gid != 0) + seq_printf(seq, ",gid=%u", asb->s_gid); + if (asb->s_owner_mask != ADFS_DEFAULT_OWNER_MASK) + seq_printf(seq, ",ownmask=%o", asb->s_owner_mask); + if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK) + seq_printf(seq, ",othmask=%o", asb->s_other_mask); + + return 0; +} + enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; static match_table_t tokens = { @@ -259,6 +280,7 @@ static const struct super_operations adfs_sops = { .put_super = adfs_put_super, .statfs = adfs_statfs, .remount_fs = adfs_remount, + .show_options = adfs_show_options, }; static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_discrecord *dr) @@ -344,8 +366,8 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) /* set default options */ asb->s_uid = 0; asb->s_gid = 0; - asb->s_owner_mask = S_IRWXU; - asb->s_other_mask = S_IRWXG | S_IRWXO; + asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK; + asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK; if (parse_options(sb, data)) goto error; diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 232c69493683..d5bd497ab9cb 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -174,7 +174,8 @@ extern void affs_put_inode(struct inode *inode); extern void affs_drop_inode(struct inode *inode); extern void affs_delete_inode(struct inode *inode); extern void affs_clear_inode(struct inode *inode); -extern void affs_read_inode(struct inode *inode); +extern struct inode *affs_iget(struct super_block *sb, + unsigned long ino); extern int affs_write_inode(struct inode *inode, int); extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type); diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index f4de4b98004f..805573005de6 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -170,9 +170,11 @@ affs_remove_link(struct dentry *dentry) if (!link_bh) goto done; - dir = iget(sb, be32_to_cpu(AFFS_TAIL(sb, link_bh)->parent)); - if (!dir) + dir = affs_iget(sb, be32_to_cpu(AFFS_TAIL(sb, link_bh)->parent)); + if (IS_ERR(dir)) { + retval = PTR_ERR(dir); goto done; + } affs_lock_dir(dir); affs_fix_dcache(dentry, link_ino); diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 4609a6c13fe9..27fe6cbe43ae 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -15,20 +15,25 @@ extern const struct inode_operations affs_symlink_inode_operations; extern struct timezone sys_tz; -void -affs_read_inode(struct inode *inode) +struct inode *affs_iget(struct super_block *sb, unsigned long ino) { - struct super_block *sb = inode->i_sb; struct affs_sb_info *sbi = AFFS_SB(sb); struct buffer_head *bh; struct affs_head *head; struct affs_tail *tail; + struct inode *inode; u32 block; u32 size; u32 prot; u16 id; - pr_debug("AFFS: read_inode(%lu)\n",inode->i_ino); + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + pr_debug("AFFS: affs_iget(%lu)\n", inode->i_ino); block = inode->i_ino; bh = affs_bread(sb, block); @@ -154,12 +159,13 @@ affs_read_inode(struct inode *inode) sys_tz.tz_minuteswest * 60; inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_atime.tv_nsec = 0; affs_brelse(bh); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); affs_brelse(bh); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } int diff --git a/fs/affs/namei.c b/fs/affs/namei.c index a42143ca0169..2218f1ee71ce 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -208,9 +208,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) affs_lock_dir(dir); bh = affs_find_entry(dir, dentry); affs_unlock_dir(dir); - if (IS_ERR(bh)) { - return ERR_PTR(PTR_ERR(bh)); - } + if (IS_ERR(bh)) + return ERR_CAST(bh); if (bh) { u32 ino = bh->b_blocknr; @@ -223,10 +222,9 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) ino = be32_to_cpu(AFFS_TAIL(sb, bh)->original); } affs_brelse(bh); - inode = iget(sb, ino); - if (!inode) { - return ERR_PTR(-EACCES); - } + inode = affs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_PTR(PTR_ERR(inode)); } dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; d_add(dentry, inode); diff --git a/fs/affs/super.c b/fs/affs/super.c index b53e5d0ec65c..d2dc047cb479 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -113,7 +113,6 @@ static void destroy_inodecache(void) static const struct super_operations affs_sops = { .alloc_inode = affs_alloc_inode, .destroy_inode = affs_destroy_inode, - .read_inode = affs_read_inode, .write_inode = affs_write_inode, .put_inode = affs_put_inode, .drop_inode = affs_drop_inode, @@ -123,6 +122,7 @@ static const struct super_operations affs_sops = { .write_super = affs_write_super, .statfs = affs_statfs, .remount_fs = affs_remount, + .show_options = generic_show_options, }; enum { @@ -271,6 +271,9 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) unsigned long mount_flags; int tmp_flags; /* fix remount prototype... */ u8 sig[4]; + int ret = -EINVAL; + + save_mount_options(sb, data); pr_debug("AFFS: read_super(%s)\n",data ? (const char *)data : "no options"); @@ -444,7 +447,12 @@ got_root: /* set up enough so that it can read an inode */ - root_inode = iget(sb, root_block); + root_inode = affs_iget(sb, root_block); + if (IS_ERR(root_inode)) { + ret = PTR_ERR(root_inode); + goto out_error_noinode; + } + sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) { printk(KERN_ERR "AFFS: Get root inode failed\n"); @@ -461,12 +469,13 @@ got_root: out_error: if (root_inode) iput(root_inode); +out_error_noinode: kfree(sbi->s_bitmap); affs_brelse(root_bh); kfree(sbi->s_prefix); kfree(sbi); sb->s_fs_info = NULL; - return -EINVAL; + return ret; } static int @@ -481,14 +490,21 @@ affs_remount(struct super_block *sb, int *flags, char *data) int root_block; unsigned long mount_flags; int res = 0; + char *new_opts = kstrdup(data, GFP_KERNEL); pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); *flags |= MS_NODIRATIME; - if (!parse_options(data,&uid,&gid,&mode,&reserved,&root_block, - &blocksize,&sbi->s_prefix,sbi->s_volume,&mount_flags)) + if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block, + &blocksize, &sbi->s_prefix, sbi->s_volume, + &mount_flags)) { + kfree(new_opts); return -EINVAL; + } + kfree(sb->s_options); + sb->s_options = new_opts; + sbi->s_flags = mount_flags; sbi->s_mode = mode; sbi->s_uid = uid; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 0cc3597c1197..b58af8f18bc4 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -512,7 +512,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { _leave(" = %ld [key]", PTR_ERR(key)); - return ERR_PTR(PTR_ERR(key)); + return ERR_CAST(key); } ret = afs_validate(vnode, key); @@ -540,7 +540,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, key_put(key); if (IS_ERR(inode)) { _leave(" = %ld", PTR_ERR(inode)); - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); } dentry->d_op = &afs_fs_dentry_operations; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 84750c8e9f95..08db82e1343a 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -196,10 +196,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, /* failure */ bad_inode: - make_bad_inode(inode); - unlock_new_inode(inode); - iput(inode); - + iget_failed(inode); _leave(" = %d [bad]", ret); return ERR_PTR(ret); } diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 5ce43b63c60e..a3510b8ba3e7 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -218,16 +218,16 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) _enter("%p{%s},{%s:%p{%s},}", dentry, dentry->d_name.name, - nd->mnt->mnt_devname, + nd->path.mnt->mnt_devname, dentry, - nd->dentry->d_name.name); + nd->path.dentry->d_name.name); - dput(nd->dentry); - nd->dentry = dget(dentry); + dput(nd->path.dentry); + nd->path.dentry = dget(dentry); - newmnt = afs_mntpt_do_automount(nd->dentry); + newmnt = afs_mntpt_do_automount(nd->path.dentry); if (IS_ERR(newmnt)) { - path_release(nd); + path_put(&nd->path); return (void *)newmnt; } @@ -235,17 +235,16 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts); switch (err) { case 0: - dput(nd->dentry); - mntput(nd->mnt); - nd->mnt = newmnt; - nd->dentry = dget(newmnt->mnt_root); + path_put(&nd->path); + nd->path.mnt = newmnt; + nd->path.dentry = dget(newmnt->mnt_root); schedule_delayed_work(&afs_mntpt_expiry_timer, afs_mntpt_expiry_timeout * HZ); break; case -EBUSY: /* someone else made a mount here whilst we were busy */ - while (d_mountpoint(nd->dentry) && - follow_down(&nd->mnt, &nd->dentry)) + while (d_mountpoint(nd->path.dentry) && + follow_down(&nd->path.mnt, &nd->path.dentry)) ; err = 0; default: diff --git a/fs/afs/security.c b/fs/afs/security.c index 566fe712c682..3bcbeceba1bb 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -95,7 +95,7 @@ static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode, auth_inode = afs_iget(vnode->vfs_inode.i_sb, key, &vnode->status.parent, NULL, NULL); if (IS_ERR(auth_inode)) - return ERR_PTR(PTR_ERR(auth_inode)); + return ERR_CAST(auth_inode); } auth_vnode = AFS_FS_I(auth_inode); @@ -287,7 +287,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, int afs_permission(struct inode *inode, int mask, struct nameidata *nd) { struct afs_vnode *vnode = AFS_FS_I(inode); - afs_access_t access; + afs_access_t uninitialized_var(access); struct key *key; int ret; diff --git a/fs/afs/super.c b/fs/afs/super.c index 4b2558c42213..36bbce45f44b 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -52,6 +52,7 @@ static const struct super_operations afs_super_ops = { .clear_inode = afs_clear_inode, .umount_begin = afs_umount_begin, .put_super = afs_put_super, + .show_options = generic_show_options, }; static struct kmem_cache *afs_inode_cachep; @@ -357,6 +358,7 @@ static int afs_get_sb(struct file_system_type *fs_type, struct super_block *sb; struct afs_volume *vol; struct key *key; + char *new_opts = kstrdup(options, GFP_KERNEL); int ret; _enter(",,%s,%p", dev_name, options); @@ -408,9 +410,11 @@ static int afs_get_sb(struct file_system_type *fs_type, deactivate_super(sb); goto error; } + sb->s_options = new_opts; sb->s_flags |= MS_ACTIVE; } else { _debug("reuse"); + kfree(new_opts); ASSERTCMP(sb->s_flags, &, MS_ACTIVE); } @@ -424,6 +428,7 @@ error: afs_put_volume(params.volume); afs_put_cell(params.cell); key_put(params.key); + kfree(new_opts); _leave(" = %d", ret); return ret; } @@ -317,7 +317,7 @@ out: /* wait_on_sync_kiocb: * Waits on the given sync kiocb to complete. */ -ssize_t fastcall wait_on_sync_kiocb(struct kiocb *iocb) +ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { while (iocb->ki_users) { set_current_state(TASK_UNINTERRUPTIBLE); @@ -336,7 +336,7 @@ ssize_t fastcall wait_on_sync_kiocb(struct kiocb *iocb) * go away, they will call put_ioctx and release any pinned memory * associated with the request (held via struct page * references). */ -void fastcall exit_aio(struct mm_struct *mm) +void exit_aio(struct mm_struct *mm) { struct kioctx *ctx = mm->ioctx_list; mm->ioctx_list = NULL; @@ -365,7 +365,7 @@ void fastcall exit_aio(struct mm_struct *mm) * Called when the last user of an aio context has gone away, * and the struct needs to be freed. */ -void fastcall __put_ioctx(struct kioctx *ctx) +void __put_ioctx(struct kioctx *ctx) { unsigned nr_events = ctx->max_reqs; @@ -397,8 +397,7 @@ void fastcall __put_ioctx(struct kioctx *ctx) * This prevents races between the aio code path referencing the * req (after submitting it) and aio_complete() freeing the req. */ -static struct kiocb *__aio_get_req(struct kioctx *ctx); -static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) +static struct kiocb *__aio_get_req(struct kioctx *ctx) { struct kiocb *req = NULL; struct aio_ring *ring; @@ -533,7 +532,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) * Returns true if this put was the last user of the kiocb, * false if the request is still in use. */ -int fastcall aio_put_req(struct kiocb *req) +int aio_put_req(struct kiocb *req) { struct kioctx *ctx = req->ki_ctx; int ret; @@ -893,7 +892,7 @@ static void try_queue_kicked_iocb(struct kiocb *iocb) * The retry is usually executed by aio workqueue * threads (See aio_kick_handler). */ -void fastcall kick_iocb(struct kiocb *iocb) +void kick_iocb(struct kiocb *iocb) { /* sync iocbs are easy: they can only ever be executing from a * single context. */ @@ -912,7 +911,7 @@ EXPORT_SYMBOL(kick_iocb); * Returns true if this is the last user of the request. The * only other user of the request can be the cancellation code. */ -int fastcall aio_complete(struct kiocb *iocb, long res, long res2) +int aio_complete(struct kiocb *iocb, long res, long res2) { struct kioctx *ctx = iocb->ki_ctx; struct aio_ring_info *info; @@ -1330,6 +1329,10 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb) opcode = IOCB_CMD_PWRITEV; } + /* This matches the pread()/pwrite() logic */ + if (iocb->ki_pos < 0) + return -EINVAL; + do { ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], iocb->ki_nr_segs - iocb->ki_cur_seg, @@ -1348,6 +1351,13 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb) if ((ret == 0) || (iocb->ki_left == 0)) ret = iocb->ki_nbytes - iocb->ki_left; + /* If we managed to write some out we return that, rather than + * the eventual error. */ + if (opcode == IOCB_CMD_PWRITEV + && ret < 0 && ret != -EIOCBQUEUED && ret != -EIOCBRETRY + && iocb->ki_nbytes - iocb->ki_left) + ret = iocb->ki_nbytes - iocb->ki_left; + return ret; } @@ -1523,7 +1533,7 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode, return 1; } -int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, +int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb) { struct kiocb *req; diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 8b4cca3c4705..901a3e67ec45 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -150,6 +150,7 @@ extern const struct file_operations autofs_root_operations; int autofs_fill_super(struct super_block *, void *, int); void autofs_kill_sb(struct super_block *sb); +struct inode *autofs_iget(struct super_block *, unsigned long); /* Queue management functions */ diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 45f5992a0957..dda510d31f84 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -52,11 +52,9 @@ out_kill_sb: kill_anon_super(sb); } -static void autofs_read_inode(struct inode *inode); - static const struct super_operations autofs_sops = { - .read_inode = autofs_read_inode, .statfs = simple_statfs, + .show_options = generic_show_options, }; enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto}; @@ -143,6 +141,8 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) int minproto, maxproto; pid_t pgid; + save_mount_options(s, data); + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) goto fail_unlock; @@ -164,7 +164,9 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) s->s_time_gran = 1; sbi->sb = s; - root_inode = iget(s, AUTOFS_ROOT_INO); + root_inode = autofs_iget(s, AUTOFS_ROOT_INO); + if (IS_ERR(root_inode)) + goto fail_free; root = d_alloc_root(root_inode); pipe = NULL; @@ -230,11 +232,17 @@ fail_unlock: return -EINVAL; } -static void autofs_read_inode(struct inode *inode) +struct inode *autofs_iget(struct super_block *sb, unsigned long ino) { - ino_t ino = inode->i_ino; unsigned int n; - struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb); + struct autofs_sb_info *sbi = autofs_sbi(sb); + struct inode *inode; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; /* Initialize to the default case (stub directory) */ @@ -250,7 +258,7 @@ static void autofs_read_inode(struct inode *inode) inode->i_op = &autofs_root_inode_operations; inode->i_fop = &autofs_root_operations; inode->i_uid = inode->i_gid = 0; /* Changed in read_super */ - return; + goto done; } inode->i_uid = inode->i_sb->s_root->d_inode->i_uid; @@ -263,7 +271,7 @@ static void autofs_read_inode(struct inode *inode) n = ino - AUTOFS_FIRST_SYMLINK; if (n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) { printk("autofs: Looking for bad symlink inode %u\n", (unsigned int) ino); - return; + goto done; } inode->i_op = &autofs_symlink_inode_operations; @@ -275,4 +283,8 @@ static void autofs_read_inode(struct inode *inode) inode->i_size = sl->len; inode->i_nlink = 1; } + +done: + unlock_new_inode(inode); + return inode; } diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 5efff3c0d886..8aacade56956 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -114,8 +114,8 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str dentry->d_time = (unsigned long) ent; if (!dentry->d_inode) { - inode = iget(sb, ent->ino); - if (!inode) { + inode = autofs_iget(sb, ent->ino); + if (IS_ERR(inode)) { /* Failed, but leave pending for next time */ return 1; } @@ -274,6 +274,7 @@ static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const c unsigned int n; int slsize; struct autofs_symlink *sl; + struct inode *inode; DPRINTK(("autofs_root_symlink: %s <- ", symname)); autofs_say(dentry->d_name.name,dentry->d_name.len); @@ -331,7 +332,12 @@ static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const c ent->dentry = NULL; /* We don't keep the dentry for symlinks */ autofs_hash_insert(dh,ent); - d_instantiate(dentry, iget(dir->i_sb,ent->ino)); + + inode = autofs_iget(dir->i_sb, ent->ino); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + d_instantiate(dentry, inode); unlock_kernel(); return 0; } @@ -428,6 +434,7 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb); struct autofs_dirhash *dh = &sbi->dirhash; struct autofs_dir_ent *ent; + struct inode *inode; ino_t ino; lock_kernel(); @@ -469,7 +476,14 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) autofs_hash_insert(dh,ent); inc_nlink(dir); - d_instantiate(dentry, iget(dir->i_sb,ino)); + + inode = autofs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) { + drop_nlink(dir); + return PTR_ERR(inode); + } + + d_instantiate(dentry, inode); unlock_kernel(); return 0; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 7f05d6ccdb13..2fdcf5e1d236 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -176,11 +176,16 @@ out_kill_sb: static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) { struct autofs_sb_info *sbi = autofs4_sbi(mnt->mnt_sb); + struct inode *root_inode = mnt->mnt_sb->s_root->d_inode; if (!sbi) return 0; seq_printf(m, ",fd=%d", sbi->pipefd); + if (root_inode->i_uid != 0) + seq_printf(m, ",uid=%u", root_inode->i_uid); + if (root_inode->i_gid != 0) + seq_printf(m, ",gid=%u", root_inode->i_gid); seq_printf(m, ",pgrp=%d", sbi->oz_pgrp); seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); seq_printf(m, ",minproto=%d", sbi->min_proto); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 2bbcc8151dc3..a54a946a50ae 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -368,7 +368,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) * so we don't need to follow the mount. */ if (d_mountpoint(dentry)) { - if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) { + if (!autofs4_follow_mount(&nd->path.mnt, + &nd->path.dentry)) { status = -ENOENT; goto out_error; } @@ -382,7 +383,7 @@ done: return NULL; out_error: - path_release(nd); + path_put(&nd->path); return ERR_PTR(status); } diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 521ff7caadbd..f1c2ea8342f5 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -359,3 +359,17 @@ int is_bad_inode(struct inode *inode) } EXPORT_SYMBOL(is_bad_inode); + +/** + * iget_failed - Mark an under-construction inode as dead and release it + * @inode: The inode to discard + * + * Mark an under-construction inode as dead and release it. + */ +void iget_failed(struct inode *inode) +{ + make_bad_inode(inode); + unlock_new_inode(inode); + iput(inode); +} +EXPORT_SYMBOL(iget_failed); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index b28a20e61b80..82123ff3e1dd 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -35,7 +35,7 @@ static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); static int befs_readpage(struct file *file, struct page *page); static sector_t befs_bmap(struct address_space *mapping, sector_t block); static struct dentry *befs_lookup(struct inode *, struct dentry *, struct nameidata *); -static void befs_read_inode(struct inode *ino); +static struct inode *befs_iget(struct super_block *, unsigned long); static struct inode *befs_alloc_inode(struct super_block *sb); static void befs_destroy_inode(struct inode *inode); static int befs_init_inodecache(void); @@ -52,12 +52,12 @@ static int befs_statfs(struct dentry *, struct kstatfs *); static int parse_options(char *, befs_mount_options *); static const struct super_operations befs_sops = { - .read_inode = befs_read_inode, /* initialize & read inode */ .alloc_inode = befs_alloc_inode, /* allocate a new inode */ .destroy_inode = befs_destroy_inode, /* deallocate an inode */ .put_super = befs_put_super, /* uninit super */ .statfs = befs_statfs, /* statfs */ .remount_fs = befs_remount, + .show_options = generic_show_options, }; /* slab cache for befs_inode_info objects */ @@ -198,9 +198,9 @@ befs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) return ERR_PTR(-ENODATA); } - inode = iget(dir->i_sb, (ino_t) offset); - if (!inode) - return ERR_PTR(-EACCES); + inode = befs_iget(dir->i_sb, (ino_t) offset); + if (IS_ERR(inode)) + return ERR_CAST(inode); d_add(dentry, inode); @@ -296,17 +296,23 @@ static void init_once(struct kmem_cache *cachep, void *foo) inode_init_once(&bi->vfs_inode); } -static void -befs_read_inode(struct inode *inode) +static struct inode *befs_iget(struct super_block *sb, unsigned long ino) { struct buffer_head *bh = NULL; befs_inode *raw_inode = NULL; - struct super_block *sb = inode->i_sb; befs_sb_info *befs_sb = BEFS_SB(sb); befs_inode_info *befs_ino = NULL; + struct inode *inode; + long ret = -EIO; + + befs_debug(sb, "---> befs_read_inode() " "inode = %lu", ino); - befs_debug(sb, "---> befs_read_inode() " "inode = %lu", inode->i_ino); + inode = iget_locked(sb, ino); + if (IS_ERR(inode)) + return inode; + if (!(inode->i_state & I_NEW)) + return inode; befs_ino = BEFS_I(inode); @@ -402,15 +408,16 @@ befs_read_inode(struct inode *inode) brelse(bh); befs_debug(sb, "<--- befs_read_inode()"); - return; + unlock_new_inode(inode); + return inode; unacquire_bh: brelse(bh); unacquire_none: - make_bad_inode(inode); + iget_failed(inode); befs_debug(sb, "<--- befs_read_inode() - Bad inode"); - return; + return ERR_PTR(ret); } /* Initialize the inode cache. Called at fs setup. @@ -752,10 +759,12 @@ befs_fill_super(struct super_block *sb, void *data, int silent) befs_sb_info *befs_sb; befs_super_block *disk_sb; struct inode *root; - + long ret = -EINVAL; const unsigned long sb_block = 0; const off_t x86_sb_off = 512; + save_mount_options(sb, data); + sb->s_fs_info = kmalloc(sizeof (*befs_sb), GFP_KERNEL); if (sb->s_fs_info == NULL) { printk(KERN_ERR @@ -833,7 +842,11 @@ befs_fill_super(struct super_block *sb, void *data, int silent) /* Set real blocksize of fs */ sb_set_blocksize(sb, (ulong) befs_sb->block_size); sb->s_op = (struct super_operations *) &befs_sops; - root = iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); + root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto unacquire_priv_sbp; + } sb->s_root = d_alloc_root(root); if (!sb->s_root) { iput(root); @@ -868,7 +881,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) unacquire_none: sb->s_fs_info = NULL; - return -EINVAL; + return ret; } static int diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index ac7a8b1d6c3a..71faf4d23908 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -44,6 +44,8 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode) #define printf(format, args...) \ printk(KERN_ERR "BFS-fs: %s(): " format, __FUNCTION__, ## args) +/* inode.c */ +extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino); /* file.c */ extern const struct inode_operations bfs_file_inops; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 1fd056d0fc3d..034950cb3cbe 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -148,10 +148,10 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry, if (bh) { unsigned long ino = (unsigned long)le16_to_cpu(de->ino); brelse(bh); - inode = iget(dir->i_sb, ino); - if (!inode) { + inode = bfs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) { unlock_kernel(); - return ERR_PTR(-EACCES); + return ERR_CAST(inode); } } unlock_kernel(); diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index a64a71d444f5..8db623838b50 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -32,17 +32,22 @@ MODULE_LICENSE("GPL"); void dump_imap(const char *prefix, struct super_block *s); -static void bfs_read_inode(struct inode *inode) +struct inode *bfs_iget(struct super_block *sb, unsigned long ino) { - unsigned long ino = inode->i_ino; struct bfs_inode *di; + struct inode *inode; struct buffer_head *bh; int block, off; + inode = iget_locked(sb, ino); + if (IS_ERR(inode)) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) { printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino); - make_bad_inode(inode); - return; + goto error; } block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; @@ -50,8 +55,7 @@ static void bfs_read_inode(struct inode *inode) if (!bh) { printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); - make_bad_inode(inode); - return; + goto error; } off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; @@ -85,6 +89,12 @@ static void bfs_read_inode(struct inode *inode) inode->i_ctime.tv_nsec = 0; brelse(bh); + unlock_new_inode(inode); + return inode; + +error: + iget_failed(inode); + return ERR_PTR(-EIO); } static int bfs_write_inode(struct inode *inode, int unused) @@ -276,7 +286,6 @@ static void destroy_inodecache(void) static const struct super_operations bfs_sops = { .alloc_inode = bfs_alloc_inode, .destroy_inode = bfs_destroy_inode, - .read_inode = bfs_read_inode, .write_inode = bfs_write_inode, .delete_inode = bfs_delete_inode, .put_super = bfs_put_super, @@ -312,6 +321,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) struct inode *inode; unsigned i, imap_len; struct bfs_sb_info *info; + long ret = -EINVAL; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) @@ -346,14 +356,16 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) set_bit(i, info->si_imap); s->s_op = &bfs_sops; - inode = iget(s, BFS_ROOT_INO); - if (!inode) { + inode = bfs_iget(s, BFS_ROOT_INO); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); kfree(info->si_imap); goto out; } s->s_root = d_alloc_root(inode); if (!s->s_root) { iput(inode); + ret = -ENOMEM; kfree(info->si_imap); goto out; } @@ -404,7 +416,7 @@ out: brelse(bh); kfree(info); s->s_fs_info = NULL; - return -EINVAL; + return ret; } static int bfs_get_sb(struct file_system_type *fs_type, diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 7596e1e94cde..a1bb2244cac7 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -28,6 +28,7 @@ #include <asm/system.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> +#include <asm/a.out-core.h> static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); static int load_aout_library(struct file*); @@ -115,10 +116,10 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u current->flags |= PF_DUMPCORE; strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); #ifndef __sparc__ - dump.u_ar0 = (void *)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump))); + dump.u_ar0 = offsetof(struct user, regs); #endif dump.signal = signr; - dump_thread(regs, &dump); + aout_dump_thread(regs, &dump); /* If the size of the dump file exceeds the rlimit, then see what would happen if we wrote the stack, but not the data area. */ diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 4628c42ca892..41a958a7585e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -134,8 +134,7 @@ static int padzero(unsigned long elf_bss) static int create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, - int interp_aout, unsigned long load_addr, - unsigned long interp_load_addr) + unsigned long load_addr, unsigned long interp_load_addr) { unsigned long p = bprm->p; int argc = bprm->argc; @@ -223,12 +222,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, sp = STACK_ADD(p, ei_index); - items = (argc + 1) + (envc + 1); - if (interp_aout) { - items += 3; /* a.out interpreters require argv & envp too */ - } else { - items += 1; /* ELF interpreters only put argc on the stack */ - } + items = (argc + 1) + (envc + 1) + 1; bprm->p = STACK_ROUND(sp, items); /* Point sp at the lowest address on the stack */ @@ -251,16 +245,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, /* Now, let's put argc (and argv, envp if appropriate) on the stack */ if (__put_user(argc, sp++)) return -EFAULT; - if (interp_aout) { - argv = sp + 2; - envp = argv + argc + 1; - if (__put_user((elf_addr_t)(unsigned long)argv, sp++) || - __put_user((elf_addr_t)(unsigned long)envp, sp++)) - return -EFAULT; - } else { - argv = sp; - envp = argv + argc + 1; - } + argv = sp; + envp = argv + argc + 1; /* Populate argv and envp */ p = current->mm->arg_end = current->mm->arg_start; @@ -513,59 +499,12 @@ out: return error; } -static unsigned long load_aout_interp(struct exec *interp_ex, - struct file *interpreter) -{ - unsigned long text_data, elf_entry = ~0UL; - char __user * addr; - loff_t offset; - - current->mm->end_code = interp_ex->a_text; - text_data = interp_ex->a_text + interp_ex->a_data; - current->mm->end_data = text_data; - current->mm->brk = interp_ex->a_bss + text_data; - - switch (N_MAGIC(*interp_ex)) { - case OMAGIC: - offset = 32; - addr = (char __user *)0; - break; - case ZMAGIC: - case QMAGIC: - offset = N_TXTOFF(*interp_ex); - addr = (char __user *)N_TXTADDR(*interp_ex); - break; - default: - goto out; - } - - down_write(¤t->mm->mmap_sem); - do_brk(0, text_data); - up_write(¤t->mm->mmap_sem); - if (!interpreter->f_op || !interpreter->f_op->read) - goto out; - if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0) - goto out; - flush_icache_range((unsigned long)addr, - (unsigned long)addr + text_data); - - down_write(¤t->mm->mmap_sem); - do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1), - interp_ex->a_bss); - up_write(¤t->mm->mmap_sem); - elf_entry = interp_ex->a_entry; - -out: - return elf_entry; -} - /* * These are the functions used to load ELF style executables and shared * libraries. There is no binary dependent code anywhere else. */ #define INTERPRETER_NONE 0 -#define INTERPRETER_AOUT 1 #define INTERPRETER_ELF 2 #ifndef STACK_RND_MASK @@ -594,7 +533,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) unsigned long load_addr = 0, load_bias = 0; int load_addr_set = 0; char * elf_interpreter = NULL; - unsigned int interpreter_type = INTERPRETER_NONE; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata; unsigned long elf_bss, elf_brk; @@ -605,7 +543,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) unsigned long interp_load_addr = 0; unsigned long start_code, end_code, start_data, end_data; unsigned long reloc_func_desc = 0; - char passed_fileno[6]; struct files_struct *files; int executable_stack = EXSTACK_DEFAULT; unsigned long def_flags = 0; @@ -774,59 +711,18 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* Some simple consistency checks for the interpreter */ if (elf_interpreter) { - static int warn; - interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT; - - /* Now figure out which format our binary is */ - if ((N_MAGIC(loc->interp_ex) != OMAGIC) && - (N_MAGIC(loc->interp_ex) != ZMAGIC) && - (N_MAGIC(loc->interp_ex) != QMAGIC)) - interpreter_type = INTERPRETER_ELF; - - if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0) - interpreter_type &= ~INTERPRETER_ELF; - - if (interpreter_type == INTERPRETER_AOUT && warn < 10) { - printk(KERN_WARNING "a.out ELF interpreter %s is " - "deprecated and will not be supported " - "after Linux 2.6.25\n", elf_interpreter); - warn++; - } - retval = -ELIBBAD; - if (!interpreter_type) + /* Not an ELF interpreter */ + if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0) goto out_free_dentry; - - /* Make sure only one type was selected */ - if ((interpreter_type & INTERPRETER_ELF) && - interpreter_type != INTERPRETER_ELF) { - // FIXME - ratelimit this before re-enabling - // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n"); - interpreter_type = INTERPRETER_ELF; - } /* Verify the interpreter has a valid arch */ - if ((interpreter_type == INTERPRETER_ELF) && - !elf_check_arch(&loc->interp_elf_ex)) + if (!elf_check_arch(&loc->interp_elf_ex)) goto out_free_dentry; } else { /* Executables without an interpreter also need a personality */ SET_PERSONALITY(loc->elf_ex, 0); } - /* OK, we are done with that, now set up the arg stuff, - and then start this sucker up */ - if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) { - char *passed_p = passed_fileno; - sprintf(passed_fileno, "%d", elf_exec_fileno); - - if (elf_interpreter) { - retval = copy_strings_kernel(1, &passed_p, bprm); - if (retval) - goto out_free_dentry; - bprm->argc++; - } - } - /* Flush all traces of the currently running executable */ retval = flush_old_exec(bprm); if (retval) @@ -1004,24 +900,19 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } if (elf_interpreter) { - if (interpreter_type == INTERPRETER_AOUT) { - elf_entry = load_aout_interp(&loc->interp_ex, - interpreter); - } else { - unsigned long uninitialized_var(interp_map_addr); - - elf_entry = load_elf_interp(&loc->interp_elf_ex, - interpreter, - &interp_map_addr, - load_bias); - if (!IS_ERR((void *)elf_entry)) { - /* - * load_elf_interp() returns relocation - * adjustment - */ - interp_load_addr = elf_entry; - elf_entry += loc->interp_elf_ex.e_entry; - } + unsigned long uninitialized_var(interp_map_addr); + + elf_entry = load_elf_interp(&loc->interp_elf_ex, + interpreter, + &interp_map_addr, + load_bias); + if (!IS_ERR((void *)elf_entry)) { + /* + * load_elf_interp() returns relocation + * adjustment + */ + interp_load_addr = elf_entry; + elf_entry += loc->interp_elf_ex.e_entry; } if (BAD_ADDR(elf_entry)) { force_sig(SIGSEGV, current); @@ -1045,8 +936,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) kfree(elf_phdata); - if (interpreter_type != INTERPRETER_AOUT) - sys_close(elf_exec_fileno); + sys_close(elf_exec_fileno); set_binfmt(&elf_format); @@ -1061,15 +951,12 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; retval = create_elf_tables(bprm, &loc->elf_ex, - (interpreter_type == INTERPRETER_AOUT), load_addr, interp_load_addr); if (retval < 0) { send_sig(SIGKILL, current, 0); goto out; } /* N.B. passed_fileno might not be initialized? */ - if (interpreter_type == INTERPRETER_AOUT) - current->mm->arg_start += strlen(passed_fileno) + 1; current->mm->end_code = end_code; current->mm->start_code = start_code; current->mm->start_data = start_data; @@ -1077,7 +964,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->start_stack = bprm->p; #ifdef arch_randomize_brk - if (current->flags & PF_RANDOMIZE) + if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) current->mm->brk = current->mm->start_brk = arch_randomize_brk(current->mm); #endif diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 33764fd6db66..0498b181dd52 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -20,7 +20,6 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/a.out.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/string.h> @@ -444,12 +443,12 @@ static int load_flat_file(struct linux_binprm * bprm, if (strncmp(hdr->magic, "bFLT", 4)) { /* + * Previously, here was a printk to tell people + * "BINFMT_FLAT: bad header magic". + * But for the kernel which also use ELF FD-PIC format, this + * error message is confusing. * because a lot of people do not manage to produce good - * flat binaries, we leave this printk to help them realise - * the problem. We only print the error if its not a script file */ - if (strncmp(hdr->magic, "#!", 2)) - printk("BINFMT_FLAT: bad header magic\n"); ret = -ENOEXEC; goto err; } diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 9208c41209f9..14c63527c762 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -29,7 +29,6 @@ #include <linux/personality.h> #include <linux/init.h> -#include <asm/a.out.h> #include <asm/uaccess.h> #include <asm/pgtable.h> diff --git a/fs/block_dev.c b/fs/block_dev.c index e48a630ae266..67fe72ce6ac7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -534,7 +534,6 @@ void __init bdev_cache_init(void) if (err) panic("Cannot register bdev pseudo-fs"); bd_mnt = kern_mount(&bd_type); - err = PTR_ERR(bd_mnt); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ @@ -1398,19 +1397,19 @@ struct block_device *lookup_bdev(const char *path) if (error) return ERR_PTR(error); - inode = nd.dentry->d_inode; + inode = nd.path.dentry->d_inode; error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; - if (nd.mnt->mnt_flags & MNT_NODEV) + if (nd.path.mnt->mnt_flags & MNT_NODEV) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); if (!bdev) goto fail; out: - path_release(&nd); + path_put(&nd.path); return bdev; fail: bdev = ERR_PTR(error); diff --git a/fs/buffer.c b/fs/buffer.c index 826baf4f04bc..3ebccf4aa7e3 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -67,14 +67,14 @@ static int sync_buffer(void *word) return 0; } -void fastcall __lock_buffer(struct buffer_head *bh) +void __lock_buffer(struct buffer_head *bh) { wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(__lock_buffer); -void fastcall unlock_buffer(struct buffer_head *bh) +void unlock_buffer(struct buffer_head *bh) { smp_mb__before_clear_bit(); clear_buffer_locked(bh); @@ -678,7 +678,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) } else { BUG_ON(mapping->assoc_mapping != buffer_mapping); } - if (list_empty(&bh->b_assoc_buffers)) { + if (!bh->b_assoc_map) { spin_lock(&buffer_mapping->private_lock); list_move_tail(&bh->b_assoc_buffers, &mapping->private_list); @@ -794,6 +794,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) { struct buffer_head *bh; struct list_head tmp; + struct address_space *mapping; int err = 0, err2; INIT_LIST_HEAD(&tmp); @@ -801,9 +802,14 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) spin_lock(lock); while (!list_empty(list)) { bh = BH_ENTRY(list->next); + mapping = bh->b_assoc_map; __remove_assoc_queue(bh); + /* Avoid race with mark_buffer_dirty_inode() which does + * a lockless check and we rely on seeing the dirty bit */ + smp_mb(); if (buffer_dirty(bh) || buffer_locked(bh)) { list_add(&bh->b_assoc_buffers, &tmp); + bh->b_assoc_map = mapping; if (buffer_dirty(bh)) { get_bh(bh); spin_unlock(lock); @@ -822,8 +828,17 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) while (!list_empty(&tmp)) { bh = BH_ENTRY(tmp.prev); - list_del_init(&bh->b_assoc_buffers); get_bh(bh); + mapping = bh->b_assoc_map; + __remove_assoc_queue(bh); + /* Avoid race with mark_buffer_dirty_inode() which does + * a lockless check and we rely on seeing the dirty bit */ + smp_mb(); + if (buffer_dirty(bh)) { + list_add(&bh->b_assoc_buffers, + &bh->b_assoc_map->private_list); + bh->b_assoc_map = mapping; + } spin_unlock(lock); wait_on_buffer(bh); if (!buffer_uptodate(bh)) @@ -1164,7 +1179,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, * mapping->tree_lock and the global inode_lock. */ -void fastcall mark_buffer_dirty(struct buffer_head *bh) +void mark_buffer_dirty(struct buffer_head *bh) { WARN_ON_ONCE(!buffer_uptodate(bh)); if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) @@ -1195,7 +1210,7 @@ void __brelse(struct buffer_head * buf) void __bforget(struct buffer_head *bh) { clear_buffer_dirty(bh); - if (!list_empty(&bh->b_assoc_buffers)) { + if (bh->b_assoc_map) { struct address_space *buffer_mapping = bh->b_page->mapping; spin_lock(&buffer_mapping->private_lock); @@ -1436,6 +1451,7 @@ void invalidate_bh_lrus(void) { on_each_cpu(invalidate_bh_lru, NULL, 1, 1); } +EXPORT_SYMBOL_GPL(invalidate_bh_lrus); void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset) @@ -3021,7 +3037,7 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free) do { struct buffer_head *next = bh->b_this_page; - if (!list_empty(&bh->b_assoc_buffers)) + if (bh->b_assoc_map) __remove_assoc_queue(bh); bh = next; } while (bh != head); diff --git a/fs/char_dev.c b/fs/char_dev.c index 2c7a8b5b4598..038674aa88a7 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -357,7 +357,7 @@ void cdev_put(struct cdev *p) /* * Called every time a character special file is opened */ -int chrdev_open(struct inode * inode, struct file * filp) +static int chrdev_open(struct inode *inode, struct file *filp) { struct cdev *p; struct cdev *new = NULL; diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index dd3bba4134b5..7f8838253410 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -259,18 +259,18 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, int err; mntget(newmnt); - err = do_add_mount(newmnt, nd, nd->mnt->mnt_flags, mntlist); + err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist); switch (err) { case 0: - dput(nd->dentry); - mntput(nd->mnt); - nd->mnt = newmnt; - nd->dentry = dget(newmnt->mnt_root); + dput(nd->path.dentry); + mntput(nd->path.mnt); + nd->path.mnt = newmnt; + nd->path.dentry = dget(newmnt->mnt_root); break; case -EBUSY: /* someone else made a mount here whilst we were busy */ - while (d_mountpoint(nd->dentry) && - follow_down(&nd->mnt, &nd->dentry)) + while (d_mountpoint(nd->path.dentry) && + follow_down(&nd->path.mnt, &nd->path.dentry)) ; err = 0; default: @@ -307,8 +307,8 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) xid = GetXid(); - dput(nd->dentry); - nd->dentry = dget(dentry); + dput(nd->path.dentry); + nd->path.dentry = dget(dentry); cifs_sb = CIFS_SB(dentry->d_inode->i_sb); ses = cifs_sb->tcon->ses; @@ -340,7 +340,8 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) rc = -EINVAL; goto out_err; } - mnt = cifs_dfs_do_refmount(nd->mnt, nd->dentry, + mnt = cifs_dfs_do_refmount(nd->path.mnt, + nd->path.dentry, referrals[i].node_name); cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p", __FUNCTION__, @@ -357,7 +358,7 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) if (IS_ERR(mnt)) goto out_err; - nd->mnt->mnt_flags |= MNT_SHRINKABLE; + nd->path.mnt->mnt_flags |= MNT_SHRINKABLE; rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list); out: @@ -367,7 +368,7 @@ out: cFYI(1, ("leaving %s" , __FUNCTION__)); return ERR_PTR(rc); out_err: - path_release(nd); + path_put(&nd->path); goto out; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 29bbf655b99c..a04b17e5a9d0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -147,10 +147,11 @@ cifs_read_super(struct super_block *sb, void *data, #endif sb->s_blocksize = CIFS_MAX_MSGSIZE; sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ - inode = iget(sb, ROOT_I); + inode = cifs_iget(sb, ROOT_I); - if (!inode) { - rc = -ENOMEM; + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + inode = NULL; goto out_no_root; } @@ -517,7 +518,6 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data) } static const struct super_operations cifs_super_ops = { - .read_inode = cifs_read_inode, .put_super = cifs_put_super, .statfs = cifs_statfs, .alloc_inode = cifs_alloc_inode, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 195b14de5567..68978306c3ca 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -44,6 +44,7 @@ extern void cifs_read_inode(struct inode *); /* Functions related to inodes */ extern const struct inode_operations cifs_dir_inode_ops; +extern struct inode *cifs_iget(struct super_block *, unsigned long); extern int cifs_create(struct inode *, struct dentry *, int, struct nameidata *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index e7cd392a796a..1d8aa0385ef7 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -566,10 +566,18 @@ static const struct inode_operations cifs_ipc_inode_ops = { }; /* gets root inode */ -void cifs_read_inode(struct inode *inode) +struct inode *cifs_iget(struct super_block *sb, unsigned long ino) { - int xid, rc; + int xid; struct cifs_sb_info *cifs_sb; + struct inode *inode; + long rc; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; cifs_sb = CIFS_SB(inode->i_sb); xid = GetXid(); @@ -586,10 +594,18 @@ void cifs_read_inode(struct inode *inode) inode->i_fop = &simple_dir_operations; inode->i_uid = cifs_sb->mnt_uid; inode->i_gid = cifs_sb->mnt_gid; + _FreeXid(xid); + iget_failed(inode); + return ERR_PTR(rc); } - /* can not call macro FreeXid here since in a void func */ + unlock_new_inode(inode); + + /* can not call macro FreeXid here since in a void func + * TODO: This is no longer true + */ _FreeXid(xid); + return inode; } int cifs_unlink(struct inode *inode, struct dentry *direntry) diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index 2bf3026adc80..c21a1f552a63 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -75,12 +75,12 @@ static int coda_pioctl(struct inode * inode, struct file * filp, if ( error ) { return error; } else { - target_inode = nd.dentry->d_inode; + target_inode = nd.path.dentry->d_inode; } /* return if it is not a Coda inode */ if ( target_inode->i_sb != inode->i_sb ) { - path_release(&nd); + path_put(&nd.path); return -EINVAL; } @@ -89,7 +89,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp, error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); - path_release(&nd); + path_put(&nd.path); return error; } diff --git a/fs/compat.c b/fs/compat.c index 69baca5ad608..2ce4456aad30 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -241,10 +241,10 @@ asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs error = user_path_walk(path, &nd); if (!error) { struct kstatfs tmp; - error = vfs_statfs(nd.dentry, &tmp); + error = vfs_statfs(nd.path.dentry, &tmp); if (!error) error = put_compat_statfs(buf, &tmp); - path_release(&nd); + path_put(&nd.path); } return error; } @@ -309,10 +309,10 @@ asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, s error = user_path_walk(path, &nd); if (!error) { struct kstatfs tmp; - error = vfs_statfs(nd.dentry, &tmp); + error = vfs_statfs(nd.path.dentry, &tmp); if (!error) error = put_compat_statfs64(buf, &tmp); - path_release(&nd); + path_put(&nd.path); } return error; } @@ -702,9 +702,6 @@ static int do_nfs4_super_data_conv(void *raw_data) real->flags = raw->flags; real->version = raw->version; } - else { - return -EINVAL; - } return 0; } @@ -2083,51 +2080,6 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2) #ifdef CONFIG_EPOLL -#ifdef CONFIG_HAS_COMPAT_EPOLL_EVENT -asmlinkage long compat_sys_epoll_ctl(int epfd, int op, int fd, - struct compat_epoll_event __user *event) -{ - long err = 0; - struct compat_epoll_event user; - struct epoll_event __user *kernel = NULL; - - if (event) { - if (copy_from_user(&user, event, sizeof(user))) - return -EFAULT; - kernel = compat_alloc_user_space(sizeof(struct epoll_event)); - err |= __put_user(user.events, &kernel->events); - err |= __put_user(user.data, &kernel->data); - } - - return err ? err : sys_epoll_ctl(epfd, op, fd, kernel); -} - - -asmlinkage long compat_sys_epoll_wait(int epfd, - struct compat_epoll_event __user *events, - int maxevents, int timeout) -{ - long i, ret, err = 0; - struct epoll_event __user *kbuf; - struct epoll_event ev; - - if ((maxevents <= 0) || - (maxevents > (INT_MAX / sizeof(struct epoll_event)))) - return -EINVAL; - kbuf = compat_alloc_user_space(sizeof(struct epoll_event) * maxevents); - ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout); - for (i = 0; i < ret; i++) { - err |= __get_user(ev.events, &kbuf[i].events); - err |= __get_user(ev.data, &kbuf[i].data); - err |= __put_user(ev.events, &events->events); - err |= __put_user_unaligned(ev.data, &events->data); - events++; - } - - return err ? -EFAULT: ret; -} -#endif /* CONFIG_HAS_COMPAT_EPOLL_EVENT */ - #ifdef TIF_RESTORE_SIGMASK asmlinkage long compat_sys_epoll_pwait(int epfd, struct compat_epoll_event __user *events, @@ -2153,11 +2105,7 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } -#ifdef CONFIG_HAS_COMPAT_EPOLL_EVENT - err = compat_sys_epoll_wait(epfd, events, maxevents, timeout); -#else err = sys_epoll_wait(epfd, events, maxevents, timeout); -#endif /* * If we changed the signal mask, we need to restore the original one. diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index ffdc022cae64..c6e72aebd16b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -78,7 +78,6 @@ #include <linux/mii.h> #include <linux/if_bonding.h> #include <linux/watchdog.h> -#include <linux/dm-ioctl.h> #include <linux/soundcard.h> #include <linux/lp.h> @@ -1993,39 +1992,6 @@ COMPATIBLE_IOCTL(STOP_ARRAY_RO) COMPATIBLE_IOCTL(RESTART_ARRAY_RW) COMPATIBLE_IOCTL(GET_BITMAP_FILE) ULONG_IOCTL(SET_BITMAP_FILE) -/* DM */ -COMPATIBLE_IOCTL(DM_VERSION_32) -COMPATIBLE_IOCTL(DM_REMOVE_ALL_32) -COMPATIBLE_IOCTL(DM_LIST_DEVICES_32) -COMPATIBLE_IOCTL(DM_DEV_CREATE_32) -COMPATIBLE_IOCTL(DM_DEV_REMOVE_32) -COMPATIBLE_IOCTL(DM_DEV_RENAME_32) -COMPATIBLE_IOCTL(DM_DEV_SUSPEND_32) -COMPATIBLE_IOCTL(DM_DEV_STATUS_32) -COMPATIBLE_IOCTL(DM_DEV_WAIT_32) -COMPATIBLE_IOCTL(DM_TABLE_LOAD_32) -COMPATIBLE_IOCTL(DM_TABLE_CLEAR_32) -COMPATIBLE_IOCTL(DM_TABLE_DEPS_32) -COMPATIBLE_IOCTL(DM_TABLE_STATUS_32) -COMPATIBLE_IOCTL(DM_LIST_VERSIONS_32) -COMPATIBLE_IOCTL(DM_TARGET_MSG_32) -COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY_32) -COMPATIBLE_IOCTL(DM_VERSION) -COMPATIBLE_IOCTL(DM_REMOVE_ALL) -COMPATIBLE_IOCTL(DM_LIST_DEVICES) -COMPATIBLE_IOCTL(DM_DEV_CREATE) -COMPATIBLE_IOCTL(DM_DEV_REMOVE) -COMPATIBLE_IOCTL(DM_DEV_RENAME) -COMPATIBLE_IOCTL(DM_DEV_SUSPEND) -COMPATIBLE_IOCTL(DM_DEV_STATUS) -COMPATIBLE_IOCTL(DM_DEV_WAIT) -COMPATIBLE_IOCTL(DM_TABLE_LOAD) -COMPATIBLE_IOCTL(DM_TABLE_CLEAR) -COMPATIBLE_IOCTL(DM_TABLE_DEPS) -COMPATIBLE_IOCTL(DM_TABLE_STATUS) -COMPATIBLE_IOCTL(DM_LIST_VERSIONS) -COMPATIBLE_IOCTL(DM_TARGET_MSG) -COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY) /* Big K */ COMPATIBLE_IOCTL(PIO_FONT) COMPATIBLE_IOCTL(GIO_FONT) @@ -2887,7 +2853,7 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd, /* find the name of the device. */ path = (char *)__get_free_page(GFP_KERNEL); if (path) { - fn = d_path(filp->f_path.dentry, filp->f_path.mnt, path, PAGE_SIZE); + fn = d_path(&filp->f_path, path, PAGE_SIZE); if (IS_ERR(fn)) fn = "?"; } @@ -2986,7 +2952,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, } do_ioctl: - error = vfs_ioctl(filp, fd, cmd, arg); + error = do_vfs_ioctl(filp, fd, cmd, arg); out_fput: fput_light(filp, fput_needed); out: diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 22700d2857da..78929ea84ff2 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -99,11 +99,11 @@ static int get_target(const char *symname, struct nameidata *nd, ret = path_lookup(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, nd); if (!ret) { - if (nd->dentry->d_sb == configfs_sb) { - *target = configfs_get_config_item(nd->dentry); + if (nd->path.dentry->d_sb == configfs_sb) { + *target = configfs_get_config_item(nd->path.dentry); if (!*target) { ret = -ENOENT; - path_release(nd); + path_put(&nd->path); } } else ret = -EPERM; @@ -141,7 +141,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna ret = create_link(parent_item, target_item, dentry); config_item_put(target_item); - path_release(&nd); + path_put(&nd.path); out_put: config_item_put(parent_item); diff --git a/fs/dcache.c b/fs/dcache.c index d9ca1e5ceb92..43455776711e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -89,12 +89,20 @@ static void d_free(struct dentry *dentry) if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); /* if dentry was never inserted into hash, immediate free is OK */ - if (dentry->d_hash.pprev == NULL) + if (hlist_unhashed(&dentry->d_hash)) __d_free(dentry); else call_rcu(&dentry->d_u.d_rcu, d_callback); } +static void dentry_lru_remove(struct dentry *dentry) +{ + if (!list_empty(&dentry->d_lru)) { + list_del_init(&dentry->d_lru); + dentry_stat.nr_unused--; + } +} + /* * Release the dentry's inode, using the filesystem * d_iput() operation if defined. @@ -211,13 +219,7 @@ repeat: unhash_it: __d_drop(dentry); kill_it: - /* If dentry was on d_lru list - * delete it from there - */ - if (!list_empty(&dentry->d_lru)) { - list_del(&dentry->d_lru); - dentry_stat.nr_unused--; - } + dentry_lru_remove(dentry); dentry = d_kill(dentry); if (dentry) goto repeat; @@ -285,10 +287,7 @@ int d_invalidate(struct dentry * dentry) static inline struct dentry * __dget_locked(struct dentry *dentry) { atomic_inc(&dentry->d_count); - if (!list_empty(&dentry->d_lru)) { - dentry_stat.nr_unused--; - list_del_init(&dentry->d_lru); - } + dentry_lru_remove(dentry); return dentry; } @@ -404,10 +403,7 @@ static void prune_one_dentry(struct dentry * dentry) if (dentry->d_op && dentry->d_op->d_delete) dentry->d_op->d_delete(dentry); - if (!list_empty(&dentry->d_lru)) { - list_del(&dentry->d_lru); - dentry_stat.nr_unused--; - } + dentry_lru_remove(dentry); __d_drop(dentry); dentry = d_kill(dentry); spin_lock(&dcache_lock); @@ -596,10 +592,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* detach this root from the system */ spin_lock(&dcache_lock); - if (!list_empty(&dentry->d_lru)) { - dentry_stat.nr_unused--; - list_del_init(&dentry->d_lru); - } + dentry_lru_remove(dentry); __d_drop(dentry); spin_unlock(&dcache_lock); @@ -613,11 +606,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_lock(&dcache_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { - if (!list_empty(&loop->d_lru)) { - dentry_stat.nr_unused--; - list_del_init(&loop->d_lru); - } - + dentry_lru_remove(loop); __d_drop(loop); cond_resched_lock(&dcache_lock); } @@ -799,10 +788,7 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - if (!list_empty(&dentry->d_lru)) { - dentry_stat.nr_unused--; - list_del_init(&dentry->d_lru); - } + dentry_lru_remove(dentry); /* * move only zero ref count dentries to the end * of the unused list for prune_dcache @@ -1408,9 +1394,6 @@ void d_delete(struct dentry * dentry) if (atomic_read(&dentry->d_count) == 1) { dentry_iput(dentry); fsnotify_nameremove(dentry, isdir); - - /* remove this and other inotify debug checks after 2.6.18 */ - dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; return; } @@ -1779,9 +1762,8 @@ shouldnt_be_hashed: * * "buflen" should be positive. Caller holds the dcache_lock. */ -static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, - struct dentry *root, struct vfsmount *rootmnt, - char *buffer, int buflen) +static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, + struct path *root, char *buffer, int buflen) { char * end = buffer+buflen; char * retval; @@ -1806,7 +1788,7 @@ static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, for (;;) { struct dentry * parent; - if (dentry == root && vfsmnt == rootmnt) + if (dentry == root->dentry && vfsmnt == root->mnt) break; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { /* Global root? */ @@ -1847,13 +1829,23 @@ Elong: return ERR_PTR(-ENAMETOOLONG); } -/* write full pathname into buffer and return start of pathname */ -char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, - char *buf, int buflen) +/** + * d_path - return the path of a dentry + * @path: path to report + * @buf: buffer to return value in + * @buflen: buffer length + * + * Convert a dentry into an ASCII path name. If the entry has been deleted + * the string " (deleted)" is appended. Note that this is ambiguous. + * + * Returns the buffer or an error code if the path was too long. + * + * "buflen" should be positive. Caller holds the dcache_lock. + */ +char *d_path(struct path *path, char *buf, int buflen) { char *res; - struct vfsmount *rootmnt; - struct dentry *root; + struct path root; /* * We have various synthetic filesystems that never get mounted. On @@ -1862,18 +1854,17 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, * user wants to identify the object in /proc/pid/fd/. The little hack * below allows us to generate a name for these objects on demand: */ - if (dentry->d_op && dentry->d_op->d_dname) - return dentry->d_op->d_dname(dentry, buf, buflen); + if (path->dentry->d_op && path->dentry->d_op->d_dname) + return path->dentry->d_op->d_dname(path->dentry, buf, buflen); read_lock(¤t->fs->lock); - rootmnt = mntget(current->fs->rootmnt); - root = dget(current->fs->root); + root = current->fs->root; + path_get(¤t->fs->root); read_unlock(¤t->fs->lock); spin_lock(&dcache_lock); - res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen); + res = __d_path(path->dentry, path->mnt, &root, buf, buflen); spin_unlock(&dcache_lock); - dput(root); - mntput(rootmnt); + path_put(&root); return res; } @@ -1919,28 +1910,27 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, asmlinkage long sys_getcwd(char __user *buf, unsigned long size) { int error; - struct vfsmount *pwdmnt, *rootmnt; - struct dentry *pwd, *root; + struct path pwd, root; char *page = (char *) __get_free_page(GFP_USER); if (!page) return -ENOMEM; read_lock(¤t->fs->lock); - pwdmnt = mntget(current->fs->pwdmnt); - pwd = dget(current->fs->pwd); - rootmnt = mntget(current->fs->rootmnt); - root = dget(current->fs->root); + pwd = current->fs->pwd; + path_get(¤t->fs->pwd); + root = current->fs->root; + path_get(¤t->fs->root); read_unlock(¤t->fs->lock); error = -ENOENT; /* Has the current directory has been unlinked? */ spin_lock(&dcache_lock); - if (pwd->d_parent == pwd || !d_unhashed(pwd)) { + if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { unsigned long len; char * cwd; - cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE); + cwd = __d_path(pwd.dentry, pwd.mnt, &root, page, PAGE_SIZE); spin_unlock(&dcache_lock); error = PTR_ERR(cwd); @@ -1958,10 +1948,8 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) spin_unlock(&dcache_lock); out: - dput(pwd); - mntput(pwdmnt); - dput(root); - mntput(rootmnt); + path_put(&pwd); + path_put(&root); free_page((unsigned long) page); return error; } diff --git a/fs/dcookies.c b/fs/dcookies.c index 792cbf55fa95..855d4b1d619a 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -24,6 +24,7 @@ #include <linux/errno.h> #include <linux/dcookies.h> #include <linux/mutex.h> +#include <linux/path.h> #include <asm/uaccess.h> /* The dcookies are allocated from a kmem_cache and @@ -31,8 +32,7 @@ * code here is particularly performance critical */ struct dcookie_struct { - struct dentry * dentry; - struct vfsmount * vfsmnt; + struct path path; struct list_head hash_list; }; @@ -51,7 +51,7 @@ static inline int is_live(void) /* The dentry is locked, its address will do for the cookie */ static inline unsigned long dcookie_value(struct dcookie_struct * dcs) { - return (unsigned long)dcs->dentry; + return (unsigned long)dcs->path.dentry; } @@ -89,19 +89,17 @@ static void hash_dcookie(struct dcookie_struct * dcs) } -static struct dcookie_struct * alloc_dcookie(struct dentry * dentry, - struct vfsmount * vfsmnt) +static struct dcookie_struct *alloc_dcookie(struct path *path) { - struct dcookie_struct * dcs = kmem_cache_alloc(dcookie_cache, GFP_KERNEL); + struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache, + GFP_KERNEL); if (!dcs) return NULL; - dentry->d_cookie = dcs; - - dcs->dentry = dget(dentry); - dcs->vfsmnt = mntget(vfsmnt); + path->dentry->d_cookie = dcs; + dcs->path = *path; + path_get(path); hash_dcookie(dcs); - return dcs; } @@ -109,8 +107,7 @@ static struct dcookie_struct * alloc_dcookie(struct dentry * dentry, /* This is the main kernel-side routine that retrieves the cookie * value for a dentry/vfsmnt pair. */ -int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt, - unsigned long * cookie) +int get_dcookie(struct path *path, unsigned long *cookie) { int err = 0; struct dcookie_struct * dcs; @@ -122,10 +119,10 @@ int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt, goto out; } - dcs = dentry->d_cookie; + dcs = path->dentry->d_cookie; if (!dcs) - dcs = alloc_dcookie(dentry, vfsmnt); + dcs = alloc_dcookie(path); if (!dcs) { err = -ENOMEM; @@ -174,7 +171,7 @@ asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user * buf, size_t len) goto out; /* FIXME: (deleted) ? */ - path = d_path(dcs->dentry, dcs->vfsmnt, kbuf, PAGE_SIZE); + path = d_path(&dcs->path, kbuf, PAGE_SIZE); if (IS_ERR(path)) { err = PTR_ERR(path); @@ -254,9 +251,8 @@ out_kmem: static void free_dcookie(struct dcookie_struct * dcs) { - dcs->dentry->d_cookie = NULL; - dput(dcs->dentry); - mntput(dcs->vfsmnt); + dcs->path.dentry->d_cookie = NULL; + path_put(&dcs->path); kmem_cache_free(dcookie_cache, dcs); } diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index fa6b7f7ff914..fddffe4851f5 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -56,13 +56,15 @@ const struct inode_operations debugfs_link_operations = { .follow_link = debugfs_follow_link, }; -static void debugfs_u8_set(void *data, u64 val) +static int debugfs_u8_set(void *data, u64 val) { *(u8 *)data = val; + return 0; } -static u64 debugfs_u8_get(void *data) +static int debugfs_u8_get(void *data, u64 *val) { - return *(u8 *)data; + *val = *(u8 *)data; + return 0; } DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); @@ -97,13 +99,15 @@ struct dentry *debugfs_create_u8(const char *name, mode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_u8); -static void debugfs_u16_set(void *data, u64 val) +static int debugfs_u16_set(void *data, u64 val) { *(u16 *)data = val; + return 0; } -static u64 debugfs_u16_get(void *data) +static int debugfs_u16_get(void *data, u64 *val) { - return *(u16 *)data; + *val = *(u16 *)data; + return 0; } DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); @@ -138,13 +142,15 @@ struct dentry *debugfs_create_u16(const char *name, mode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_u16); -static void debugfs_u32_set(void *data, u64 val) +static int debugfs_u32_set(void *data, u64 val) { *(u32 *)data = val; + return 0; } -static u64 debugfs_u32_get(void *data) +static int debugfs_u32_get(void *data, u64 *val) { - return *(u32 *)data; + *val = *(u32 *)data; + return 0; } DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); @@ -179,14 +185,16 @@ struct dentry *debugfs_create_u32(const char *name, mode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_u32); -static void debugfs_u64_set(void *data, u64 val) +static int debugfs_u64_set(void *data, u64 val) { *(u64 *)data = val; + return 0; } -static u64 debugfs_u64_get(void *data) +static int debugfs_u64_get(void *data, u64 *val) { - return *(u64 *)data; + *val = *(u64 *)data; + return 0; } DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n"); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 06ef9a255c76..f120e1207874 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -20,9 +20,12 @@ #include <linux/devpts_fs.h> #include <linux/parser.h> #include <linux/fsnotify.h> +#include <linux/seq_file.h> #define DEVPTS_SUPER_MAGIC 0x1cd1 +#define DEVPTS_DEFAULT_MODE 0600 + static struct vfsmount *devpts_mnt; static struct dentry *devpts_root; @@ -32,7 +35,7 @@ static struct { uid_t uid; gid_t gid; umode_t mode; -} config = {.mode = 0600}; +} config = {.mode = DEVPTS_DEFAULT_MODE}; enum { Opt_uid, Opt_gid, Opt_mode, @@ -54,7 +57,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) config.setgid = 0; config.uid = 0; config.gid = 0; - config.mode = 0600; + config.mode = DEVPTS_DEFAULT_MODE; while ((p = strsep(&data, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -81,7 +84,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) case Opt_mode: if (match_octal(&args[0], &option)) return -EINVAL; - config.mode = option & ~S_IFMT; + config.mode = option & S_IALLUGO; break; default: printk(KERN_ERR "devpts: called with bogus options\n"); @@ -92,9 +95,21 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) return 0; } +static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + if (config.setuid) + seq_printf(seq, ",uid=%u", config.uid); + if (config.setgid) + seq_printf(seq, ",gid=%u", config.gid); + seq_printf(seq, ",mode=%03o", config.mode); + + return 0; +} + static const struct super_operations devpts_sops = { .statfs = simple_statfs, .remount_fs = devpts_remount, + .show_options = devpts_show_options, }; static int diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 6308122890ca..8bf31e3fbf01 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -39,7 +39,6 @@ void dlm_add_ast(struct dlm_lkb *lkb, int type) dlm_user_add_ast(lkb, type); return; } - DLM_ASSERT(lkb->lkb_astaddr != DLM_FAKE_USER_AST, dlm_print_lkb(lkb);); spin_lock(&ast_queue_lock); if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { @@ -58,8 +57,8 @@ static void process_asts(void) struct dlm_ls *ls = NULL; struct dlm_rsb *r = NULL; struct dlm_lkb *lkb; - void (*cast) (long param); - void (*bast) (long param, int mode); + void (*cast) (void *astparam); + void (*bast) (void *astparam, int mode); int type = 0, found, bmode; for (;;) { @@ -83,8 +82,8 @@ static void process_asts(void) if (!found) break; - cast = lkb->lkb_astaddr; - bast = lkb->lkb_bastaddr; + cast = lkb->lkb_astfn; + bast = lkb->lkb_bastfn; bmode = lkb->lkb_bastmode; if ((type & AST_COMP) && cast) diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 2f8e3c81bc19..c3ad1dff3b25 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -604,7 +604,7 @@ static struct clusters clusters_root = { }, }; -int dlm_config_init(void) +int __init dlm_config_init(void) { config_group_init(&clusters_root.subsys.su_group); mutex_init(&clusters_root.subsys.su_mutex); diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 12c3bfd5e660..8fc24f4507a3 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -162,14 +162,12 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s) static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r) { - struct dlm_user_args *ua; unsigned int waiting = 0; uint64_t xid = 0; if (lkb->lkb_flags & DLM_IFL_USER) { - ua = (struct dlm_user_args *) lkb->lkb_astparam; - if (ua) - xid = ua->xid; + if (lkb->lkb_ua) + xid = lkb->lkb_ua->xid; } if (lkb->lkb_timestamp) @@ -543,7 +541,7 @@ void dlm_delete_debug_file(struct dlm_ls *ls) debugfs_remove(ls->ls_debug_locks_dentry); } -int dlm_register_debugfs(void) +int __init dlm_register_debugfs(void) { mutex_init(&debug_buf_lock); dlm_root = debugfs_create_dir("dlm", NULL); diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index ff97ba924333..85defeb64df4 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -220,6 +220,7 @@ int dlm_recover_directory(struct dlm_ls *ls) last_len = 0; for (;;) { + int left; error = dlm_recovery_stopped(ls); if (error) goto out_free; @@ -235,12 +236,21 @@ int dlm_recover_directory(struct dlm_ls *ls) * pick namelen/name pairs out of received buffer */ - b = ls->ls_recover_buf + sizeof(struct dlm_rcom); + b = ls->ls_recover_buf->rc_buf; + left = ls->ls_recover_buf->rc_header.h_length; + left -= sizeof(struct dlm_rcom); for (;;) { - memcpy(&namelen, b, sizeof(uint16_t)); - namelen = be16_to_cpu(namelen); - b += sizeof(uint16_t); + __be16 v; + + error = -EINVAL; + if (left < sizeof(__be16)) + goto out_free; + + memcpy(&v, b, sizeof(__be16)); + namelen = be16_to_cpu(v); + b += sizeof(__be16); + left -= sizeof(__be16); /* namelen of 0xFFFFF marks end of names for this node; namelen of 0 marks end of the @@ -251,6 +261,12 @@ int dlm_recover_directory(struct dlm_ls *ls) if (!namelen) break; + if (namelen > left) + goto out_free; + + if (namelen > DLM_RESNAME_MAXLEN) + goto out_free; + error = -ENOMEM; de = get_free_de(ls, namelen); if (!de) @@ -262,6 +278,7 @@ int dlm_recover_directory(struct dlm_ls *ls) memcpy(de->name, b, namelen); memcpy(last_name, b, namelen); b += namelen; + left -= namelen; add_entry_to_hash(ls, de); count++; @@ -302,6 +319,9 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, write_unlock(&ls->ls_dirtbl[bucket].lock); + if (namelen > DLM_RESNAME_MAXLEN) + return -EINVAL; + de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL); if (!de) return -ENOMEM; diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index ec61bbaf25df..d30ea8b433a2 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -92,8 +92,6 @@ do { \ } \ } -#define DLM_FAKE_USER_AST ERR_PTR(-EINVAL) - struct dlm_direntry { struct list_head list; @@ -146,9 +144,9 @@ struct dlm_recover { struct dlm_args { uint32_t flags; - void *astaddr; - long astparam; - void *bastaddr; + void (*astfn) (void *astparam); + void *astparam; + void (*bastfn) (void *astparam, int mode); int mode; struct dlm_lksb *lksb; unsigned long timeout; @@ -253,9 +251,12 @@ struct dlm_lkb { char *lkb_lvbptr; struct dlm_lksb *lkb_lksb; /* caller's status block */ - void *lkb_astaddr; /* caller's ast function */ - void *lkb_bastaddr; /* caller's bast function */ - long lkb_astparam; /* caller's ast arg */ + void (*lkb_astfn) (void *astparam); + void (*lkb_bastfn) (void *astparam, int mode); + union { + void *lkb_astparam; /* caller's ast arg */ + struct dlm_user_args *lkb_ua; + }; }; @@ -403,28 +404,34 @@ struct dlm_rcom { char rc_buf[0]; }; +union dlm_packet { + struct dlm_header header; /* common to other two */ + struct dlm_message message; + struct dlm_rcom rcom; +}; + struct rcom_config { - uint32_t rf_lvblen; - uint32_t rf_lsflags; - uint64_t rf_unused; + __le32 rf_lvblen; + __le32 rf_lsflags; + __le64 rf_unused; }; struct rcom_lock { - uint32_t rl_ownpid; - uint32_t rl_lkid; - uint32_t rl_remid; - uint32_t rl_parent_lkid; - uint32_t rl_parent_remid; - uint32_t rl_exflags; - uint32_t rl_flags; - uint32_t rl_lvbseq; - int rl_result; + __le32 rl_ownpid; + __le32 rl_lkid; + __le32 rl_remid; + __le32 rl_parent_lkid; + __le32 rl_parent_remid; + __le32 rl_exflags; + __le32 rl_flags; + __le32 rl_lvbseq; + __le32 rl_result; int8_t rl_rqmode; int8_t rl_grmode; int8_t rl_status; int8_t rl_asts; - uint16_t rl_wait_type; - uint16_t rl_namelen; + __le16 rl_wait_type; + __le16 rl_namelen; char rl_name[DLM_RESNAME_MAXLEN]; char rl_lvb[0]; }; @@ -494,7 +501,7 @@ struct dlm_ls { struct rw_semaphore ls_recv_active; /* block dlm_recv */ struct list_head ls_requestqueue;/* queue remote requests */ struct mutex ls_requestqueue_mutex; - char *ls_recover_buf; + struct dlm_rcom *ls_recover_buf; int ls_recover_nodeid; /* for debugging */ uint64_t ls_rcom_seq; spinlock_t ls_rcom_spin; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index ff4a198fa677..8f250ac8b928 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -436,11 +436,15 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen, { struct dlm_rsb *r, *tmp; uint32_t hash, bucket; - int error = 0; + int error = -EINVAL; + + if (namelen > DLM_RESNAME_MAXLEN) + goto out; if (dlm_no_directory(ls)) flags |= R_CREATE; + error = 0; hash = jhash(name, namelen, 0); bucket = hash & (ls->ls_rsbtbl_size - 1); @@ -1222,6 +1226,8 @@ static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; if (b == 1) { int len = receive_extralen(ms); + if (len > DLM_RESNAME_MAXLEN) + len = DLM_RESNAME_MAXLEN; memcpy(lkb->lkb_lvbptr, ms->m_extra, len); lkb->lkb_lvbseq = ms->m_lvbseq; } @@ -1775,7 +1781,7 @@ static void grant_pending_locks(struct dlm_rsb *r) */ list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) { - if (lkb->lkb_bastaddr && lock_requires_bast(lkb, high, cw)) { + if (lkb->lkb_bastfn && lock_requires_bast(lkb, high, cw)) { if (cw && high == DLM_LOCK_PR) queue_bast(r, lkb, DLM_LOCK_CW); else @@ -1805,7 +1811,7 @@ static void send_bast_queue(struct dlm_rsb *r, struct list_head *head, struct dlm_lkb *gr; list_for_each_entry(gr, head, lkb_statequeue) { - if (gr->lkb_bastaddr && modes_require_bast(gr, lkb)) { + if (gr->lkb_bastfn && modes_require_bast(gr, lkb)) { queue_bast(r, gr, lkb->lkb_rqmode); gr->lkb_highbast = lkb->lkb_rqmode; } @@ -1960,8 +1966,11 @@ static void confirm_master(struct dlm_rsb *r, int error) } static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, - int namelen, unsigned long timeout_cs, void *ast, - void *astarg, void *bast, struct dlm_args *args) + int namelen, unsigned long timeout_cs, + void (*ast) (void *astparam), + void *astparam, + void (*bast) (void *astparam, int mode), + struct dlm_args *args) { int rv = -EINVAL; @@ -2011,9 +2020,9 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, an active lkb cannot be modified before locking the rsb */ args->flags = flags; - args->astaddr = ast; - args->astparam = (long) astarg; - args->bastaddr = bast; + args->astfn = ast; + args->astparam = astparam; + args->bastfn = bast; args->timeout = timeout_cs; args->mode = mode; args->lksb = lksb; @@ -2032,7 +2041,7 @@ static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args) return -EINVAL; args->flags = flags; - args->astparam = (long) astarg; + args->astparam = astarg; return 0; } @@ -2062,9 +2071,9 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, lkb->lkb_exflags = args->flags; lkb->lkb_sbflags = 0; - lkb->lkb_astaddr = args->astaddr; + lkb->lkb_astfn = args->astfn; lkb->lkb_astparam = args->astparam; - lkb->lkb_bastaddr = args->bastaddr; + lkb->lkb_bastfn = args->bastfn; lkb->lkb_rqmode = args->mode; lkb->lkb_lksb = args->lksb; lkb->lkb_lvbptr = args->lksb->sb_lvbptr; @@ -2711,9 +2720,9 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb, /* m_result and m_bastmode are set from function args, not from lkb fields */ - if (lkb->lkb_bastaddr) + if (lkb->lkb_bastfn) ms->m_asts |= AST_BAST; - if (lkb->lkb_astaddr) + if (lkb->lkb_astfn) ms->m_asts |= AST_COMP; /* compare with switch in create_message; send_remove() doesn't @@ -2989,11 +2998,23 @@ static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb, if (!lkb->lkb_lvbptr) return -ENOMEM; len = receive_extralen(ms); + if (len > DLM_RESNAME_MAXLEN) + len = DLM_RESNAME_MAXLEN; memcpy(lkb->lkb_lvbptr, ms->m_extra, len); } return 0; } +static void fake_bastfn(void *astparam, int mode) +{ + log_print("fake_bastfn should not be called"); +} + +static void fake_astfn(void *astparam) +{ + log_print("fake_astfn should not be called"); +} + static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_message *ms) { @@ -3002,8 +3023,9 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, lkb->lkb_remid = ms->m_lkid; lkb->lkb_grmode = DLM_LOCK_IV; lkb->lkb_rqmode = ms->m_rqmode; - lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST); - lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP); + + lkb->lkb_bastfn = (ms->m_asts & AST_BAST) ? &fake_bastfn : NULL; + lkb->lkb_astfn = (ms->m_asts & AST_COMP) ? &fake_astfn : NULL; if (lkb->lkb_exflags & DLM_LKF_VALBLK) { /* lkb was just created so there won't be an lvb yet */ @@ -3802,7 +3824,7 @@ static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) { if (dlm_locking_stopped(ls)) { - dlm_add_requestqueue(ls, nodeid, (struct dlm_header *) ms); + dlm_add_requestqueue(ls, nodeid, ms); } else { dlm_wait_requestqueue(ls); _receive_message(ls, ms); @@ -3822,21 +3844,20 @@ void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms) standard locking activity) or an RCOM (recovery message sent as part of lockspace recovery). */ -void dlm_receive_buffer(struct dlm_header *hd, int nodeid) +void dlm_receive_buffer(union dlm_packet *p, int nodeid) { - struct dlm_message *ms = (struct dlm_message *) hd; - struct dlm_rcom *rc = (struct dlm_rcom *) hd; + struct dlm_header *hd = &p->header; struct dlm_ls *ls; int type = 0; switch (hd->h_cmd) { case DLM_MSG: - dlm_message_in(ms); - type = ms->m_type; + dlm_message_in(&p->message); + type = p->message.m_type; break; case DLM_RCOM: - dlm_rcom_in(rc); - type = rc->rc_type; + dlm_rcom_in(&p->rcom); + type = p->rcom.rc_type; break; default: log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid); @@ -3856,7 +3877,7 @@ void dlm_receive_buffer(struct dlm_header *hd, int nodeid) hd->h_lockspace, nodeid, hd->h_cmd, type); if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS) - dlm_send_ls_not_ready(nodeid, rc); + dlm_send_ls_not_ready(nodeid, &p->rcom); return; } @@ -3865,9 +3886,9 @@ void dlm_receive_buffer(struct dlm_header *hd, int nodeid) down_read(&ls->ls_recv_active); if (hd->h_cmd == DLM_MSG) - dlm_receive_message(ls, ms, nodeid); + dlm_receive_message(ls, &p->message, nodeid); else - dlm_receive_rcom(ls, rc, nodeid); + dlm_receive_rcom(ls, &p->rcom, nodeid); up_read(&ls->ls_recv_active); dlm_put_lockspace(ls); @@ -4267,32 +4288,34 @@ static struct dlm_lkb *search_remid(struct dlm_rsb *r, int nodeid, return NULL; } +/* needs at least dlm_rcom + rcom_lock */ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_rsb *r, struct dlm_rcom *rc) { struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; - int lvblen; lkb->lkb_nodeid = rc->rc_header.h_nodeid; - lkb->lkb_ownpid = rl->rl_ownpid; - lkb->lkb_remid = rl->rl_lkid; - lkb->lkb_exflags = rl->rl_exflags; - lkb->lkb_flags = rl->rl_flags & 0x0000FFFF; + lkb->lkb_ownpid = le32_to_cpu(rl->rl_ownpid); + lkb->lkb_remid = le32_to_cpu(rl->rl_lkid); + lkb->lkb_exflags = le32_to_cpu(rl->rl_exflags); + lkb->lkb_flags = le32_to_cpu(rl->rl_flags) & 0x0000FFFF; lkb->lkb_flags |= DLM_IFL_MSTCPY; - lkb->lkb_lvbseq = rl->rl_lvbseq; + lkb->lkb_lvbseq = le32_to_cpu(rl->rl_lvbseq); lkb->lkb_rqmode = rl->rl_rqmode; lkb->lkb_grmode = rl->rl_grmode; /* don't set lkb_status because add_lkb wants to itself */ - lkb->lkb_bastaddr = (void *) (long) (rl->rl_asts & AST_BAST); - lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP); + lkb->lkb_bastfn = (rl->rl_asts & AST_BAST) ? &fake_bastfn : NULL; + lkb->lkb_astfn = (rl->rl_asts & AST_COMP) ? &fake_astfn : NULL; if (lkb->lkb_exflags & DLM_LKF_VALBLK) { + int lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) - + sizeof(struct rcom_lock); + if (lvblen > ls->ls_lvblen) + return -EINVAL; lkb->lkb_lvbptr = dlm_allocate_lvb(ls); if (!lkb->lkb_lvbptr) return -ENOMEM; - lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) - - sizeof(struct rcom_lock); memcpy(lkb->lkb_lvbptr, rl->rl_lvb, lvblen); } @@ -4300,7 +4323,8 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, The real granted mode of these converting locks cannot be determined until all locks have been rebuilt on the rsb (recover_conversion) */ - if (rl->rl_wait_type == DLM_MSG_CONVERT && middle_conversion(lkb)) { + if (rl->rl_wait_type == cpu_to_le16(DLM_MSG_CONVERT) && + middle_conversion(lkb)) { rl->rl_status = DLM_LKSTS_CONVERT; lkb->lkb_grmode = DLM_LOCK_IV; rsb_set_flag(r, RSB_RECOVER_CONVERT); @@ -4315,6 +4339,7 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, the given values and send back our lkid. We send back our lkid by sending back the rcom_lock struct we got but with the remid field filled in. */ +/* needs at least dlm_rcom + rcom_lock */ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) { struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; @@ -4327,13 +4352,14 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) goto out; } - error = find_rsb(ls, rl->rl_name, rl->rl_namelen, R_MASTER, &r); + error = find_rsb(ls, rl->rl_name, le16_to_cpu(rl->rl_namelen), + R_MASTER, &r); if (error) goto out; lock_rsb(r); - lkb = search_remid(r, rc->rc_header.h_nodeid, rl->rl_lkid); + lkb = search_remid(r, rc->rc_header.h_nodeid, le32_to_cpu(rl->rl_lkid)); if (lkb) { error = -EEXIST; goto out_remid; @@ -4356,18 +4382,20 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) out_remid: /* this is the new value returned to the lock holder for saving in its process-copy lkb */ - rl->rl_remid = lkb->lkb_id; + rl->rl_remid = cpu_to_le32(lkb->lkb_id); out_unlock: unlock_rsb(r); put_rsb(r); out: if (error) - log_debug(ls, "recover_master_copy %d %x", error, rl->rl_lkid); - rl->rl_result = error; + log_debug(ls, "recover_master_copy %d %x", error, + le32_to_cpu(rl->rl_lkid)); + rl->rl_result = cpu_to_le32(error); return error; } +/* needs at least dlm_rcom + rcom_lock */ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) { struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; @@ -4375,15 +4403,16 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) struct dlm_lkb *lkb; int error; - error = find_lkb(ls, rl->rl_lkid, &lkb); + error = find_lkb(ls, le32_to_cpu(rl->rl_lkid), &lkb); if (error) { - log_error(ls, "recover_process_copy no lkid %x", rl->rl_lkid); + log_error(ls, "recover_process_copy no lkid %x", + le32_to_cpu(rl->rl_lkid)); return error; } DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); - error = rl->rl_result; + error = le32_to_cpu(rl->rl_result); r = lkb->lkb_resource; hold_rsb(r); @@ -4402,7 +4431,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) log_debug(ls, "master copy exists %x", lkb->lkb_id); /* fall through */ case 0: - lkb->lkb_remid = rl->rl_remid; + lkb->lkb_remid = le32_to_cpu(rl->rl_remid); break; default: log_error(ls, "dlm_recover_process_copy unknown error %d %x", @@ -4451,7 +4480,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, lock and that lkb_astparam is the dlm_user_args structure. */ error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs, - DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args); + fake_astfn, ua, fake_bastfn, &args); lkb->lkb_flags |= DLM_IFL_USER; ua->old_mode = DLM_LOCK_IV; @@ -4504,7 +4533,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, /* user can change the params on its lock when it converts it, or add an lvb that didn't exist before */ - ua = (struct dlm_user_args *)lkb->lkb_astparam; + ua = lkb->lkb_ua; if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL); @@ -4525,7 +4554,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, ua->old_mode = lkb->lkb_grmode; error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs, - DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args); + fake_astfn, ua, fake_bastfn, &args); if (error) goto out_put; @@ -4555,7 +4584,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error) goto out; - ua = (struct dlm_user_args *)lkb->lkb_astparam; + ua = lkb->lkb_ua; if (lvb_in && ua->lksb.sb_lvbptr) memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); @@ -4604,7 +4633,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error) goto out; - ua = (struct dlm_user_args *)lkb->lkb_astparam; + ua = lkb->lkb_ua; if (ua_tmp->castparam) ua->castparam = ua_tmp->castparam; ua->user_lksb = ua_tmp->user_lksb; @@ -4642,7 +4671,7 @@ int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid) if (error) goto out; - ua = (struct dlm_user_args *)lkb->lkb_astparam; + ua = lkb->lkb_ua; error = set_unlock_args(flags, ua, &args); if (error) @@ -4681,7 +4710,6 @@ int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid) static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) { - struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam; struct dlm_args args; int error; @@ -4690,7 +4718,7 @@ static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) list_add_tail(&lkb->lkb_ownqueue, &ls->ls_orphans); mutex_unlock(&ls->ls_orphans_mutex); - set_unlock_args(0, ua, &args); + set_unlock_args(0, lkb->lkb_ua, &args); error = cancel_lock(ls, lkb, &args); if (error == -DLM_ECANCEL) @@ -4703,11 +4731,10 @@ static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) { - struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam; struct dlm_args args; int error; - set_unlock_args(DLM_LKF_FORCEUNLOCK, ua, &args); + set_unlock_args(DLM_LKF_FORCEUNLOCK, lkb->lkb_ua, &args); error = unlock_lock(ls, lkb, &args); if (error == -DLM_EUNLOCK) diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 27b6ed302911..05d9c82e646b 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -17,7 +17,7 @@ void dlm_print_rsb(struct dlm_rsb *r); void dlm_dump_rsb(struct dlm_rsb *r); void dlm_print_lkb(struct dlm_lkb *lkb); void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); -void dlm_receive_buffer(struct dlm_header *hd, int nodeid); +void dlm_receive_buffer(union dlm_packet *p, int nodeid); int dlm_modes_compat(int mode1, int mode2); void dlm_put_rsb(struct dlm_rsb *r); void dlm_hold_rsb(struct dlm_rsb *r); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index b180fdc51085..b64e55e0515d 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -191,7 +191,7 @@ static int do_uevent(struct dlm_ls *ls, int in) } -int dlm_lockspace_init(void) +int __init dlm_lockspace_init(void) { ls_count = 0; mutex_init(&ls_lock); diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index f7783867491a..54c14c6d06cb 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -18,7 +18,7 @@ static struct kmem_cache *lkb_cache; -int dlm_memory_init(void) +int __init dlm_memory_init(void) { int ret = 0; @@ -80,7 +80,7 @@ void dlm_free_lkb(struct dlm_lkb *lkb) { if (lkb->lkb_flags & DLM_IFL_USER) { struct dlm_user_args *ua; - ua = (struct dlm_user_args *)lkb->lkb_astparam; + ua = lkb->lkb_ua; if (ua) { if (ua->lksb.sb_lvbptr) kfree(ua->lksb.sb_lvbptr); diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index e69926e984db..07ac709f3ed7 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -61,9 +61,9 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, union { unsigned char __buf[DLM_INBUF_LEN]; /* this is to force proper alignment on some arches */ - struct dlm_header dlm; + union dlm_packet p; } __tmp; - struct dlm_header *msg = &__tmp.dlm; + union dlm_packet *p = &__tmp.p; int ret = 0; int err = 0; uint16_t msglen; @@ -75,15 +75,22 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, message may wrap around the end of the buffer back to the start, so we need to use a temp buffer and copy_from_cb. */ - copy_from_cb(msg, base, offset, sizeof(struct dlm_header), + copy_from_cb(p, base, offset, sizeof(struct dlm_header), limit); - msglen = le16_to_cpu(msg->h_length); - lockspace = msg->h_lockspace; + msglen = le16_to_cpu(p->header.h_length); + lockspace = p->header.h_lockspace; err = -EINVAL; if (msglen < sizeof(struct dlm_header)) break; + if (p->header.h_cmd == DLM_MSG) { + if (msglen < sizeof(struct dlm_message)) + break; + } else { + if (msglen < sizeof(struct dlm_rcom)) + break; + } err = -E2BIG; if (msglen > dlm_config.ci_buffer_size) { log_print("message size %d from %d too big, buf len %d", @@ -104,26 +111,26 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, in the buffer on the stack (which should work for most ordinary messages). */ - if (msglen > DLM_INBUF_LEN && msg == &__tmp.dlm) { - msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); - if (msg == NULL) + if (msglen > sizeof(__tmp) && p == &__tmp.p) { + p = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); + if (p == NULL) return ret; } - copy_from_cb(msg, base, offset, msglen, limit); + copy_from_cb(p, base, offset, msglen, limit); - BUG_ON(lockspace != msg->h_lockspace); + BUG_ON(lockspace != p->header.h_lockspace); ret += msglen; offset += msglen; offset &= (limit - 1); len -= msglen; - dlm_receive_buffer(msg, nodeid); + dlm_receive_buffer(p, nodeid); } - if (msg != &__tmp.dlm) - kfree(msg); + if (p != &__tmp.p) + kfree(p); return err ? err : ret; } diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 863b87d0dc71..714593621f4f 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -78,7 +78,7 @@ static struct genl_ops dlm_nl_ops = { .doit = user_cmd, }; -int dlm_netlink_init(void) +int __init dlm_netlink_init(void) { int rv; @@ -95,7 +95,7 @@ int dlm_netlink_init(void) return rv; } -void dlm_netlink_exit(void) +void __exit dlm_netlink_exit(void) { genl_unregister_ops(&family, &dlm_nl_ops); genl_unregister_family(&family); @@ -104,7 +104,6 @@ void dlm_netlink_exit(void) static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb) { struct dlm_rsb *r = lkb->lkb_resource; - struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam; memset(data, 0, sizeof(struct dlm_lock_data)); @@ -117,8 +116,8 @@ static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb) data->grmode = lkb->lkb_grmode; data->rqmode = lkb->lkb_rqmode; data->timestamp = lkb->lkb_timestamp; - if (ua) - data->xid = ua->xid; + if (lkb->lkb_ua) + data->xid = lkb->lkb_ua->xid; if (r) { data->lockspace_id = r->res_ls->ls_global_id; data->resource_namelen = r->res_length; diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 026824cd3acb..035e6f9990b0 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -78,13 +78,14 @@ static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh, static void make_config(struct dlm_ls *ls, struct rcom_config *rf) { - rf->rf_lvblen = ls->ls_lvblen; - rf->rf_lsflags = ls->ls_exflags; + rf->rf_lvblen = cpu_to_le32(ls->ls_lvblen); + rf->rf_lsflags = cpu_to_le32(ls->ls_exflags); } static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) { struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; + size_t conf_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_config); if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { log_error(ls, "version mismatch: %x nodeid %d: %x", @@ -93,11 +94,18 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) return -EPROTO; } - if (rf->rf_lvblen != ls->ls_lvblen || - rf->rf_lsflags != ls->ls_exflags) { + if (rc->rc_header.h_length < conf_size) { + log_error(ls, "config too short: %d nodeid %d", + rc->rc_header.h_length, nodeid); + return -EPROTO; + } + + if (le32_to_cpu(rf->rf_lvblen) != ls->ls_lvblen || + le32_to_cpu(rf->rf_lsflags) != ls->ls_exflags) { log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", - ls->ls_lvblen, ls->ls_exflags, - nodeid, rf->rf_lvblen, rf->rf_lsflags); + ls->ls_lvblen, ls->ls_exflags, nodeid, + le32_to_cpu(rf->rf_lvblen), + le32_to_cpu(rf->rf_lsflags)); return -EPROTO; } return 0; @@ -128,7 +136,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) ls->ls_recover_nodeid = nodeid; if (nodeid == dlm_our_nodeid()) { - rc = (struct dlm_rcom *) ls->ls_recover_buf; + rc = ls->ls_recover_buf; rc->rc_result = dlm_recover_status(ls); goto out; } @@ -147,7 +155,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) if (error) goto out; - rc = (struct dlm_rcom *) ls->ls_recover_buf; + rc = ls->ls_recover_buf; if (rc->rc_result == -ESRCH) { /* we pretend the remote lockspace exists with 0 status */ @@ -201,14 +209,15 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) { struct dlm_rcom *rc; struct dlm_mhandle *mh; - int error = 0, len = sizeof(struct dlm_rcom); + int error = 0; + int max_size = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom); ls->ls_recover_nodeid = nodeid; if (nodeid == dlm_our_nodeid()) { dlm_copy_master_names(ls, last_name, last_len, - ls->ls_recover_buf + len, - dlm_config.ci_buffer_size - len, nodeid); + ls->ls_recover_buf->rc_buf, + max_size, nodeid); goto out; } @@ -299,22 +308,22 @@ static void pack_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb, { memset(rl, 0, sizeof(*rl)); - rl->rl_ownpid = lkb->lkb_ownpid; - rl->rl_lkid = lkb->lkb_id; - rl->rl_exflags = lkb->lkb_exflags; - rl->rl_flags = lkb->lkb_flags; - rl->rl_lvbseq = lkb->lkb_lvbseq; + rl->rl_ownpid = cpu_to_le32(lkb->lkb_ownpid); + rl->rl_lkid = cpu_to_le32(lkb->lkb_id); + rl->rl_exflags = cpu_to_le32(lkb->lkb_exflags); + rl->rl_flags = cpu_to_le32(lkb->lkb_flags); + rl->rl_lvbseq = cpu_to_le32(lkb->lkb_lvbseq); rl->rl_rqmode = lkb->lkb_rqmode; rl->rl_grmode = lkb->lkb_grmode; rl->rl_status = lkb->lkb_status; - rl->rl_wait_type = lkb->lkb_wait_type; + rl->rl_wait_type = cpu_to_le16(lkb->lkb_wait_type); - if (lkb->lkb_bastaddr) + if (lkb->lkb_bastfn) rl->rl_asts |= AST_BAST; - if (lkb->lkb_astaddr) + if (lkb->lkb_astfn) rl->rl_asts |= AST_COMP; - rl->rl_namelen = r->res_length; + rl->rl_namelen = cpu_to_le16(r->res_length); memcpy(rl->rl_name, r->res_name, r->res_length); /* FIXME: might we have an lvb without DLM_LKF_VALBLK set ? @@ -348,6 +357,7 @@ int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) return error; } +/* needs at least dlm_rcom + rcom_lock */ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in) { struct dlm_rcom *rc; @@ -401,7 +411,7 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) rc->rc_result = -ESRCH; rf = (struct rcom_config *) rc->rc_buf; - rf->rf_lvblen = -1; + rf->rf_lvblen = cpu_to_le32(~0U); dlm_rcom_out(rc); dlm_lowcomms_commit_buffer(mh); @@ -439,6 +449,8 @@ static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc) void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) { + int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock); + if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { log_debug(ls, "ignoring recovery message %x from %d", rc->rc_type, nodeid); @@ -462,6 +474,8 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) break; case DLM_RCOM_LOCK: + if (rc->rc_header.h_length < lock_size) + goto Eshort; receive_rcom_lock(ls, rc); break; @@ -478,13 +492,18 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) break; case DLM_RCOM_LOCK_REPLY: + if (rc->rc_header.h_length < lock_size) + goto Eshort; dlm_recover_process_copy(ls, rc); break; default: log_error(ls, "receive_rcom bad type %d", rc->rc_type); } - out: +out: return; +Eshort: + log_error(ls, "recovery message %x from %d is too short", + rc->rc_type, nodeid); } diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index df075dc300fa..80aba5bdd4a4 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -94,7 +94,7 @@ void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) { - struct dlm_rcom *rc = (struct dlm_rcom *) ls->ls_recover_buf; + struct dlm_rcom *rc = ls->ls_recover_buf; struct dlm_member *memb; int error = 0, delay; @@ -123,7 +123,7 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status) { - struct dlm_rcom *rc = (struct dlm_rcom *) ls->ls_recover_buf; + struct dlm_rcom *rc = ls->ls_recover_buf; int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; for (;;) { diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index 0de04f17ccea..daa4183fbb84 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -20,7 +20,7 @@ struct rq_entry { struct list_head list; int nodeid; - char request[0]; + struct dlm_message request; }; /* @@ -30,10 +30,10 @@ struct rq_entry { * lockspace is enabled on some while still suspended on others. */ -void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) +void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms) { struct rq_entry *e; - int length = hd->h_length; + int length = ms->m_header.h_length - sizeof(struct dlm_message); e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); if (!e) { @@ -42,7 +42,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) } e->nodeid = nodeid; - memcpy(e->request, hd, length); + memcpy(&e->request, ms, ms->m_header.h_length); mutex_lock(&ls->ls_requestqueue_mutex); list_add_tail(&e->list, &ls->ls_requestqueue); @@ -76,7 +76,7 @@ int dlm_process_requestqueue(struct dlm_ls *ls) e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); mutex_unlock(&ls->ls_requestqueue_mutex); - dlm_receive_message_saved(ls, (struct dlm_message *)e->request); + dlm_receive_message_saved(ls, &e->request); mutex_lock(&ls->ls_requestqueue_mutex); list_del(&e->list); @@ -176,7 +176,7 @@ void dlm_purge_requestqueue(struct dlm_ls *ls) mutex_lock(&ls->ls_requestqueue_mutex); list_for_each_entry_safe(e, safe, &ls->ls_requestqueue, list) { - ms = (struct dlm_message *) e->request; + ms = &e->request; if (purge_request(ls, ms, e->nodeid)) { list_del(&e->list); diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h index aba34fc05ee4..10ce449b77da 100644 --- a/fs/dlm/requestqueue.h +++ b/fs/dlm/requestqueue.h @@ -13,7 +13,7 @@ #ifndef __REQUESTQUEUE_DOT_H__ #define __REQUESTQUEUE_DOT_H__ -void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); +void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms); int dlm_process_requestqueue(struct dlm_ls *ls); void dlm_wait_requestqueue(struct dlm_ls *ls); void dlm_purge_requestqueue(struct dlm_ls *ls); diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 7cbc6826239b..ebbcf38fd33b 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -82,7 +82,7 @@ struct dlm_lock_result32 { static void compat_input(struct dlm_write_request *kb, struct dlm_write_request32 *kb32, - int max_namelen) + size_t count) { kb->version[0] = kb32->version[0]; kb->version[1] = kb32->version[1]; @@ -94,7 +94,8 @@ static void compat_input(struct dlm_write_request *kb, kb->cmd == DLM_USER_REMOVE_LOCKSPACE) { kb->i.lspace.flags = kb32->i.lspace.flags; kb->i.lspace.minor = kb32->i.lspace.minor; - strcpy(kb->i.lspace.name, kb32->i.lspace.name); + memcpy(kb->i.lspace.name, kb32->i.lspace.name, count - + offsetof(struct dlm_write_request32, i.lspace.name)); } else if (kb->cmd == DLM_USER_PURGE) { kb->i.purge.nodeid = kb32->i.purge.nodeid; kb->i.purge.pid = kb32->i.purge.pid; @@ -112,11 +113,8 @@ static void compat_input(struct dlm_write_request *kb, kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); - if (kb->i.lock.namelen <= max_namelen) - memcpy(kb->i.lock.name, kb32->i.lock.name, - kb->i.lock.namelen); - else - kb->i.lock.namelen = max_namelen; + memcpy(kb->i.lock.name, kb32->i.lock.name, count - + offsetof(struct dlm_write_request32, i.lock.name)); } } @@ -197,8 +195,8 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) goto out; - DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb);); - ua = (struct dlm_user_args *)lkb->lkb_astparam; + DLM_ASSERT(lkb->lkb_ua, dlm_print_lkb(lkb);); + ua = lkb->lkb_ua; proc = ua->proc; if (type == AST_BAST && ua->bastaddr == NULL) @@ -508,7 +506,7 @@ static ssize_t device_write(struct file *file, const char __user *buf, #endif return -EINVAL; - kbuf = kmalloc(count, GFP_KERNEL); + kbuf = kzalloc(count + 1, GFP_KERNEL); if (!kbuf) return -ENOMEM; @@ -526,15 +524,14 @@ static ssize_t device_write(struct file *file, const char __user *buf, if (!kbuf->is64bit) { struct dlm_write_request32 *k32buf; k32buf = (struct dlm_write_request32 *)kbuf; - kbuf = kmalloc(count + (sizeof(struct dlm_write_request) - + kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - sizeof(struct dlm_write_request32)), GFP_KERNEL); if (!kbuf) return -ENOMEM; if (proc) set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); - compat_input(kbuf, k32buf, - count - sizeof(struct dlm_write_request32)); + compat_input(kbuf, k32buf, count + 1); kfree(k32buf); } #endif @@ -774,7 +771,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, { struct dlm_user_proc *proc = file->private_data; struct dlm_lkb *lkb; - struct dlm_user_args *ua; DECLARE_WAITQUEUE(wait, current); int error, type=0, bmode=0, removed = 0; @@ -845,8 +841,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, } spin_unlock(&proc->asts_spin); - ua = (struct dlm_user_args *)lkb->lkb_astparam; - error = copy_result_to_user(ua, + error = copy_result_to_user(lkb->lkb_ua, test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags), type, bmode, buf, count); @@ -907,7 +902,7 @@ static struct miscdevice ctl_device = { .minor = MISC_DYNAMIC_MINOR, }; -int dlm_user_init(void) +int __init dlm_user_init(void) { int error; diff --git a/fs/dlm/util.c b/fs/dlm/util.c index 4d9c1f4e1bd1..e36520af7cc0 100644 --- a/fs/dlm/util.c +++ b/fs/dlm/util.c @@ -131,52 +131,8 @@ void dlm_message_in(struct dlm_message *ms) ms->m_result = from_dlm_errno(le32_to_cpu(ms->m_result)); } -static void rcom_lock_out(struct rcom_lock *rl) -{ - rl->rl_ownpid = cpu_to_le32(rl->rl_ownpid); - rl->rl_lkid = cpu_to_le32(rl->rl_lkid); - rl->rl_remid = cpu_to_le32(rl->rl_remid); - rl->rl_parent_lkid = cpu_to_le32(rl->rl_parent_lkid); - rl->rl_parent_remid = cpu_to_le32(rl->rl_parent_remid); - rl->rl_exflags = cpu_to_le32(rl->rl_exflags); - rl->rl_flags = cpu_to_le32(rl->rl_flags); - rl->rl_lvbseq = cpu_to_le32(rl->rl_lvbseq); - rl->rl_result = cpu_to_le32(rl->rl_result); - rl->rl_wait_type = cpu_to_le16(rl->rl_wait_type); - rl->rl_namelen = cpu_to_le16(rl->rl_namelen); -} - -static void rcom_lock_in(struct rcom_lock *rl) -{ - rl->rl_ownpid = le32_to_cpu(rl->rl_ownpid); - rl->rl_lkid = le32_to_cpu(rl->rl_lkid); - rl->rl_remid = le32_to_cpu(rl->rl_remid); - rl->rl_parent_lkid = le32_to_cpu(rl->rl_parent_lkid); - rl->rl_parent_remid = le32_to_cpu(rl->rl_parent_remid); - rl->rl_exflags = le32_to_cpu(rl->rl_exflags); - rl->rl_flags = le32_to_cpu(rl->rl_flags); - rl->rl_lvbseq = le32_to_cpu(rl->rl_lvbseq); - rl->rl_result = le32_to_cpu(rl->rl_result); - rl->rl_wait_type = le16_to_cpu(rl->rl_wait_type); - rl->rl_namelen = le16_to_cpu(rl->rl_namelen); -} - -static void rcom_config_out(struct rcom_config *rf) -{ - rf->rf_lvblen = cpu_to_le32(rf->rf_lvblen); - rf->rf_lsflags = cpu_to_le32(rf->rf_lsflags); -} - -static void rcom_config_in(struct rcom_config *rf) -{ - rf->rf_lvblen = le32_to_cpu(rf->rf_lvblen); - rf->rf_lsflags = le32_to_cpu(rf->rf_lsflags); -} - void dlm_rcom_out(struct dlm_rcom *rc) { - int type = rc->rc_type; - header_out(&rc->rc_header); rc->rc_type = cpu_to_le32(rc->rc_type); @@ -184,18 +140,10 @@ void dlm_rcom_out(struct dlm_rcom *rc) rc->rc_id = cpu_to_le64(rc->rc_id); rc->rc_seq = cpu_to_le64(rc->rc_seq); rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply); - - if ((type == DLM_RCOM_LOCK) || (type == DLM_RCOM_LOCK_REPLY)) - rcom_lock_out((struct rcom_lock *) rc->rc_buf); - - else if (type == DLM_RCOM_STATUS_REPLY) - rcom_config_out((struct rcom_config *) rc->rc_buf); } void dlm_rcom_in(struct dlm_rcom *rc) { - int type; - header_in(&rc->rc_header); rc->rc_type = le32_to_cpu(rc->rc_type); @@ -203,13 +151,4 @@ void dlm_rcom_in(struct dlm_rcom *rc) rc->rc_id = le64_to_cpu(rc->rc_id); rc->rc_seq = le64_to_cpu(rc->rc_seq); rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply); - - type = rc->rc_type; - - if ((type == DLM_RCOM_LOCK) || (type == DLM_RCOM_LOCK_REPLY)) - rcom_lock_in((struct rcom_lock *) rc->rc_buf); - - else if (type == DLM_RCOM_STATUS_REPLY) - rcom_config_in((struct rcom_config *) rc->rc_buf); } - diff --git a/fs/dquot.c b/fs/dquot.c index cee7c6f428f0..9c7feb62eed1 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -696,9 +696,8 @@ static int dqinit_needed(struct inode *inode, int type) /* This routine is guarded by dqonoff_mutex mutex */ static void add_dquot_ref(struct super_block *sb, int type) { - struct inode *inode; + struct inode *inode, *old_inode = NULL; -restart: spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { if (!atomic_read(&inode->i_writecount)) @@ -711,12 +710,18 @@ restart: __iget(inode); spin_unlock(&inode_lock); + iput(old_inode); sb->dq_op->initialize(inode, type); - iput(inode); - /* As we may have blocked we had better restart... */ - goto restart; + /* We hold a reference to 'inode' so it couldn't have been + * removed from s_inodes list while we dropped the inode_lock. + * We cannot iput the inode now as we can be holding the last + * reference and we cannot iput it under inode_lock. So we + * keep the reference and iput it later. */ + old_inode = inode; + spin_lock(&inode_lock); } spin_unlock(&inode_lock); + iput(old_inode); } /* Return 0 if dqput() won't block (note that 1 doesn't necessarily mean blocking) */ @@ -1628,16 +1633,17 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) error = path_lookup(path, LOOKUP_FOLLOW, &nd); if (error < 0) return error; - error = security_quota_on(nd.dentry); + error = security_quota_on(nd.path.dentry); if (error) goto out_path; /* Quota file not on the same filesystem? */ - if (nd.mnt->mnt_sb != sb) + if (nd.path.mnt->mnt_sb != sb) error = -EXDEV; else - error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id); + error = vfs_quota_on_inode(nd.path.dentry->d_inode, type, + format_id); out_path: - path_release(&nd); + path_put(&nd.path); return error; } diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f8ef0af919e7..a066e109ad9c 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -355,8 +355,11 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, } /* Consider doing this once, when the file is opened */ mutex_lock(&crypt_stat->cs_tfm_mutex); - rc = crypto_blkcipher_setkey(crypt_stat->tfm, crypt_stat->key, - crypt_stat->key_size); + if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) { + rc = crypto_blkcipher_setkey(crypt_stat->tfm, crypt_stat->key, + crypt_stat->key_size); + crypt_stat->flags |= ECRYPTFS_KEY_SET; + } if (rc) { ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", rc); @@ -376,11 +379,10 @@ out: * * Convert an eCryptfs page index into a lower byte offset */ -void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, - struct ecryptfs_crypt_stat *crypt_stat) +static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, + struct ecryptfs_crypt_stat *crypt_stat) { - (*offset) = ((crypt_stat->extent_size - * crypt_stat->num_header_extents_at_front) + (*offset) = (crypt_stat->num_header_bytes_at_front + (crypt_stat->extent_size * extent_num)); } @@ -842,15 +844,13 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat) set_extent_mask_and_shift(crypt_stat); crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) - crypt_stat->num_header_extents_at_front = 0; + crypt_stat->num_header_bytes_at_front = 0; else { if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) - crypt_stat->num_header_extents_at_front = - (ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE - / crypt_stat->extent_size); + crypt_stat->num_header_bytes_at_front = + ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; else - crypt_stat->num_header_extents_at_front = - (PAGE_CACHE_SIZE / crypt_stat->extent_size); + crypt_stat->num_header_bytes_at_front = PAGE_CACHE_SIZE; } } @@ -1128,7 +1128,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, struct ecryptfs_cipher_code_str_map_elem { char cipher_str[16]; - u16 cipher_code; + u8 cipher_code; }; /* Add support for additional ciphers by adding elements here. The @@ -1152,10 +1152,10 @@ ecryptfs_cipher_code_str_map[] = { * * Returns zero on no match, or the cipher code on match */ -u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) +u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) { int i; - u16 code = 0; + u8 code = 0; struct ecryptfs_cipher_code_str_map_elem *map = ecryptfs_cipher_code_str_map; @@ -1187,7 +1187,7 @@ u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) * * Returns zero on success */ -int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code) +int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code) { int rc = 0; int i; @@ -1236,7 +1236,8 @@ ecryptfs_write_header_metadata(char *virt, header_extent_size = (u32)crypt_stat->extent_size; num_header_extents_at_front = - (u16)crypt_stat->num_header_extents_at_front; + (u16)(crypt_stat->num_header_bytes_at_front + / crypt_stat->extent_size); header_extent_size = cpu_to_be32(header_extent_size); memcpy(virt, &header_extent_size, 4); virt += 4; @@ -1311,40 +1312,16 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t *size, static int ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat, struct dentry *ecryptfs_dentry, - char *page_virt) + char *virt) { - int current_header_page; - int header_pages; int rc; - rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, page_virt, - 0, PAGE_CACHE_SIZE); - if (rc) { + rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt, + 0, crypt_stat->num_header_bytes_at_front); + if (rc) printk(KERN_ERR "%s: Error attempting to write header " "information to lower file; rc = [%d]\n", __FUNCTION__, rc); - goto out; - } - header_pages = ((crypt_stat->extent_size - * crypt_stat->num_header_extents_at_front) - / PAGE_CACHE_SIZE); - memset(page_virt, 0, PAGE_CACHE_SIZE); - current_header_page = 1; - while (current_header_page < header_pages) { - loff_t offset; - - offset = (((loff_t)current_header_page) << PAGE_CACHE_SHIFT); - if ((rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, - page_virt, offset, - PAGE_CACHE_SIZE))) { - printk(KERN_ERR "%s: Error attempting to write header " - "information to lower file; rc = [%d]\n", - __FUNCTION__, rc); - goto out; - } - current_header_page++; - } -out: return rc; } @@ -1370,15 +1347,13 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry, * retrieved via a prompt. Exactly what happens at this point should * be policy-dependent. * - * TODO: Support header information spanning multiple pages - * * Returns zero on success; non-zero on error */ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) { struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; - char *page_virt; + char *virt; size_t size = 0; int rc = 0; @@ -1389,40 +1364,39 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) goto out; } } else { + printk(KERN_WARNING "%s: Encrypted flag not set\n", + __FUNCTION__); rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, - "Called with crypt_stat->encrypted == 0\n"); goto out; } /* Released in this function */ - page_virt = kmem_cache_zalloc(ecryptfs_header_cache_0, GFP_USER); - if (!page_virt) { - ecryptfs_printk(KERN_ERR, "Out of memory\n"); + virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); + if (!virt) { + printk(KERN_ERR "%s: Out of memory\n", __FUNCTION__); rc = -ENOMEM; goto out; } - rc = ecryptfs_write_headers_virt(page_virt, &size, crypt_stat, - ecryptfs_dentry); + rc = ecryptfs_write_headers_virt(virt, &size, crypt_stat, + ecryptfs_dentry); if (unlikely(rc)) { - ecryptfs_printk(KERN_ERR, "Error whilst writing headers\n"); - memset(page_virt, 0, PAGE_CACHE_SIZE); + printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", + __FUNCTION__, rc); goto out_free; } if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, - crypt_stat, page_virt, - size); + crypt_stat, virt, size); else rc = ecryptfs_write_metadata_to_contents(crypt_stat, - ecryptfs_dentry, - page_virt); + ecryptfs_dentry, virt); if (rc) { - printk(KERN_ERR "Error writing metadata out to lower file; " - "rc = [%d]\n", rc); + printk(KERN_ERR "%s: Error writing metadata out to lower file; " + "rc = [%d]\n", __FUNCTION__, rc); goto out_free; } out_free: - kmem_cache_free(ecryptfs_header_cache_0, page_virt); + memset(virt, 0, crypt_stat->num_header_bytes_at_front); + kfree(virt); out: return rc; } @@ -1442,16 +1416,16 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, virt += sizeof(u32); memcpy(&num_header_extents_at_front, virt, sizeof(u16)); num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front); - crypt_stat->num_header_extents_at_front = - (int)num_header_extents_at_front; + crypt_stat->num_header_bytes_at_front = + (((size_t)num_header_extents_at_front + * (size_t)header_extent_size)); (*bytes_read) = (sizeof(u32) + sizeof(u16)); if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) - && ((crypt_stat->extent_size - * crypt_stat->num_header_extents_at_front) + && (crypt_stat->num_header_bytes_at_front < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { rc = -EINVAL; - printk(KERN_WARNING "Invalid number of header extents: [%zd]\n", - crypt_stat->num_header_extents_at_front); + printk(KERN_WARNING "Invalid header size: [%zd]\n", + crypt_stat->num_header_bytes_at_front); } return rc; } @@ -1466,7 +1440,8 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, */ static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) { - crypt_stat->num_header_extents_at_front = 2; + crypt_stat->num_header_bytes_at_front = + ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; } /** @@ -1552,9 +1527,10 @@ int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode) size = ecryptfs_getxattr_lower(lower_dentry, ECRYPTFS_XATTR_NAME, page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE); if (size < 0) { - printk(KERN_ERR "Error attempting to read the [%s] " - "xattr from the lower file; return value = [%zd]\n", - ECRYPTFS_XATTR_NAME, size); + if (unlikely(ecryptfs_verbosity > 0)) + printk(KERN_INFO "Error attempting to read the [%s] " + "xattr from the lower file; return value = " + "[%zd]\n", ECRYPTFS_XATTR_NAME, size); rc = -EINVAL; goto out; } @@ -1802,7 +1778,7 @@ out: } struct kmem_cache *ecryptfs_key_tfm_cache; -struct list_head key_tfm_list; +static struct list_head key_tfm_list; struct mutex key_tfm_list_mutex; int ecryptfs_init_crypto(void) @@ -1812,6 +1788,11 @@ int ecryptfs_init_crypto(void) return 0; } +/** + * ecryptfs_destroy_crypto - free all cached key_tfms on key_tfm_list + * + * Called only at module unload time + */ int ecryptfs_destroy_crypto(void) { struct ecryptfs_key_tfm *key_tfm, *key_tfm_tmp; @@ -1835,6 +1816,8 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name, struct ecryptfs_key_tfm *tmp_tfm; int rc = 0; + BUG_ON(!mutex_is_locked(&key_tfm_list_mutex)); + tmp_tfm = kmem_cache_alloc(ecryptfs_key_tfm_cache, GFP_KERNEL); if (key_tfm != NULL) (*key_tfm) = tmp_tfm; @@ -1861,13 +1844,50 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name, (*key_tfm) = NULL; goto out; } - mutex_lock(&key_tfm_list_mutex); list_add(&tmp_tfm->key_tfm_list, &key_tfm_list); - mutex_unlock(&key_tfm_list_mutex); out: return rc; } +/** + * ecryptfs_tfm_exists - Search for existing tfm for cipher_name. + * @cipher_name: the name of the cipher to search for + * @key_tfm: set to corresponding tfm if found + * + * Searches for cached key_tfm matching @cipher_name + * Must be called with &key_tfm_list_mutex held + * Returns 1 if found, with @key_tfm set + * Returns 0 if not found, with @key_tfm set to NULL + */ +int ecryptfs_tfm_exists(char *cipher_name, struct ecryptfs_key_tfm **key_tfm) +{ + struct ecryptfs_key_tfm *tmp_key_tfm; + + BUG_ON(!mutex_is_locked(&key_tfm_list_mutex)); + + list_for_each_entry(tmp_key_tfm, &key_tfm_list, key_tfm_list) { + if (strcmp(tmp_key_tfm->cipher_name, cipher_name) == 0) { + if (key_tfm) + (*key_tfm) = tmp_key_tfm; + return 1; + } + } + if (key_tfm) + (*key_tfm) = NULL; + return 0; +} + +/** + * ecryptfs_get_tfm_and_mutex_for_cipher_name + * + * @tfm: set to cached tfm found, or new tfm created + * @tfm_mutex: set to mutex for cached tfm found, or new tfm created + * @cipher_name: the name of the cipher to search for and/or add + * + * Sets pointers to @tfm & @tfm_mutex matching @cipher_name. + * Searches for cached item first, and creates new if not found. + * Returns 0 on success, non-zero if adding new cipher failed + */ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, struct mutex **tfm_mutex, char *cipher_name) @@ -1877,22 +1897,17 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, (*tfm) = NULL; (*tfm_mutex) = NULL; + mutex_lock(&key_tfm_list_mutex); - list_for_each_entry(key_tfm, &key_tfm_list, key_tfm_list) { - if (strcmp(key_tfm->cipher_name, cipher_name) == 0) { - (*tfm) = key_tfm->key_tfm; - (*tfm_mutex) = &key_tfm->key_tfm_mutex; - mutex_unlock(&key_tfm_list_mutex); + if (!ecryptfs_tfm_exists(cipher_name, &key_tfm)) { + rc = ecryptfs_add_new_key_tfm(&key_tfm, cipher_name, 0); + if (rc) { + printk(KERN_ERR "Error adding new key_tfm to list; " + "rc = [%d]\n", rc); goto out; } } mutex_unlock(&key_tfm_list_mutex); - rc = ecryptfs_add_new_key_tfm(&key_tfm, cipher_name, 0); - if (rc) { - printk(KERN_ERR "Error adding new key_tfm to list; rc = [%d]\n", - rc); - goto out; - } (*tfm) = key_tfm->key_tfm; (*tfm_mutex) = &key_tfm->key_tfm_mutex; out: diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index cb20b964419f..841a032050a7 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -51,13 +51,13 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) goto out; - dentry_save = nd->dentry; - vfsmount_save = nd->mnt; - nd->dentry = lower_dentry; - nd->mnt = lower_mnt; + dentry_save = nd->path.dentry; + vfsmount_save = nd->path.mnt; + nd->path.dentry = lower_dentry; + nd->path.mnt = lower_mnt; rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); - nd->dentry = dentry_save; - nd->mnt = vfsmount_save; + nd->path.dentry = dentry_save; + nd->path.mnt = vfsmount_save; if (dentry->d_inode) { struct inode *lower_inode = ecryptfs_inode_to_lower(dentry->d_inode); diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index ce7a5d4aec36..5007f788da01 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -234,10 +234,11 @@ struct ecryptfs_crypt_stat { #define ECRYPTFS_KEY_VALID 0x00000080 #define ECRYPTFS_METADATA_IN_XATTR 0x00000100 #define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000200 +#define ECRYPTFS_KEY_SET 0x00000400 u32 flags; unsigned int file_version; size_t iv_bytes; - size_t num_header_extents_at_front; + size_t num_header_bytes_at_front; size_t extent_size; /* Data extent size; default is 4096 */ size_t key_size; size_t extent_shift; @@ -322,7 +323,6 @@ struct ecryptfs_key_tfm { unsigned char cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; }; -extern struct list_head key_tfm_list; extern struct mutex key_tfm_list_mutex; /** @@ -521,11 +521,9 @@ extern struct kmem_cache *ecryptfs_file_info_cache; extern struct kmem_cache *ecryptfs_dentry_info_cache; extern struct kmem_cache *ecryptfs_inode_info_cache; extern struct kmem_cache *ecryptfs_sb_info_cache; -extern struct kmem_cache *ecryptfs_header_cache_0; extern struct kmem_cache *ecryptfs_header_cache_1; extern struct kmem_cache *ecryptfs_header_cache_2; extern struct kmem_cache *ecryptfs_xattr_cache; -extern struct kmem_cache *ecryptfs_lower_page_cache; extern struct kmem_cache *ecryptfs_key_record_cache; extern struct kmem_cache *ecryptfs_key_sig_cache; extern struct kmem_cache *ecryptfs_global_auth_tok_cache; @@ -562,8 +560,8 @@ int ecryptfs_read_and_validate_header_region(char *data, struct inode *ecryptfs_inode); int ecryptfs_read_and_validate_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry); -u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); -int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code); +u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); +int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code); void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); int ecryptfs_generate_key_packet_set(char *dest_base, struct ecryptfs_crypt_stat *crypt_stat, @@ -576,8 +574,6 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode); int ecryptfs_inode_set(struct inode *inode, void *lower_inode); void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode); -ssize_t ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, - size_t size); ssize_t ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, void *value, size_t size); @@ -623,6 +619,7 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name, size_t key_size); int ecryptfs_init_crypto(void); int ecryptfs_destroy_crypto(void); +int ecryptfs_tfm_exists(char *cipher_name, struct ecryptfs_key_tfm **key_tfm); int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, struct mutex **tfm_mutex, char *cipher_name); @@ -631,8 +628,6 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, char *sig); int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, int num_zeros); -void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, - struct ecryptfs_crypt_stat *crypt_stat); int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, loff_t offset, size_t size); int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, @@ -646,8 +641,6 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, pgoff_t page_index, size_t offset_in_page, size_t size, struct inode *ecryptfs_inode); -int ecryptfs_read(char *data, loff_t offset, size_t size, - struct file *ecryptfs_file); struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index c98c4690a771..2b8f5ed4adea 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -209,9 +209,10 @@ static int ecryptfs_open(struct inode *inode, struct file *file) if (!(mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { rc = -EIO; - printk(KERN_WARNING "Attempt to read file that " + printk(KERN_WARNING "Either the lower file " "is not in a valid eCryptfs format, " - "and plaintext passthrough mode is not " + "or the key could not be retrieved. " + "Plaintext passthrough mode is not " "enabled; returning -EIO\n"); mutex_unlock(&crypt_stat->cs_mutex); goto out_free; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5a719180983c..e23861152101 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -77,13 +77,13 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode, struct vfsmount *vfsmount_save; int rc; - dentry_save = nd->dentry; - vfsmount_save = nd->mnt; - nd->dentry = lower_dentry; - nd->mnt = lower_mnt; + dentry_save = nd->path.dentry; + vfsmount_save = nd->path.mnt; + nd->path.dentry = lower_dentry; + nd->path.mnt = lower_mnt; rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); - nd->dentry = dentry_save; - nd->mnt = vfsmount_save; + nd->path.dentry = dentry_save; + nd->path.mnt = vfsmount_save; return rc; } @@ -365,8 +365,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, dentry->d_sb)->mount_crypt_stat; if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) - file_size = ((crypt_stat->extent_size - * crypt_stat->num_header_extents_at_front) + file_size = (crypt_stat->num_header_bytes_at_front + i_size_read(lower_dentry->d_inode)); else file_size = i_size_read(lower_dentry->d_inode); @@ -685,7 +684,7 @@ ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) * @crypt_stat: Crypt_stat associated with file * @upper_size: Size of the upper file * - * Calculate the requried size of the lower file based on the + * Calculate the required size of the lower file based on the * specified size of the upper file. This calculation is based on the * number of headers in the underlying file and the extent size. * @@ -697,8 +696,7 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat, { loff_t lower_size; - lower_size = (crypt_stat->extent_size - * crypt_stat->num_header_extents_at_front); + lower_size = crypt_stat->num_header_bytes_at_front; if (upper_size != 0) { loff_t num_extents; @@ -821,14 +819,14 @@ ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd) int rc; if (nd) { - struct vfsmount *vfsmnt_save = nd->mnt; - struct dentry *dentry_save = nd->dentry; + struct vfsmount *vfsmnt_save = nd->path.mnt; + struct dentry *dentry_save = nd->path.dentry; - nd->mnt = ecryptfs_dentry_to_lower_mnt(nd->dentry); - nd->dentry = ecryptfs_dentry_to_lower(nd->dentry); + nd->path.mnt = ecryptfs_dentry_to_lower_mnt(nd->path.dentry); + nd->path.dentry = ecryptfs_dentry_to_lower(nd->path.dentry); rc = permission(ecryptfs_inode_to_lower(inode), mask, nd); - nd->mnt = vfsmnt_save; - nd->dentry = dentry_save; + nd->path.mnt = vfsmnt_save; + nd->path.dentry = dentry_save; } else rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL); return rc; @@ -875,11 +873,11 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) if (!(mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { rc = -EIO; - printk(KERN_WARNING "Attempt to read file that " + printk(KERN_WARNING "Either the lower file " "is not in a valid eCryptfs format, " - "and plaintext passthrough mode is not " + "or the key could not be retrieved. " + "Plaintext passthrough mode is not " "enabled; returning -EIO\n"); - mutex_unlock(&crypt_stat->cs_mutex); goto out; } @@ -954,7 +952,7 @@ out: return rc; } -ssize_t +static ssize_t ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index f458c1f35565..682b1b2482c2 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -189,7 +189,7 @@ out: } static int -parse_tag_65_packet(struct ecryptfs_session_key *session_key, u16 *cipher_code, +parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code, struct ecryptfs_message *msg) { size_t i = 0; @@ -275,7 +275,7 @@ out: static int -write_tag_66_packet(char *signature, size_t cipher_code, +write_tag_66_packet(char *signature, u8 cipher_code, struct ecryptfs_crypt_stat *crypt_stat, char **packet, size_t *packet_len) { @@ -428,7 +428,7 @@ static int decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_crypt_stat *crypt_stat) { - u16 cipher_code = 0; + u8 cipher_code = 0; struct ecryptfs_msg_ctx *msg_ctx; struct ecryptfs_message *msg = NULL; char *auth_tok_sig; @@ -1537,7 +1537,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, struct scatterlist dst_sg; struct scatterlist src_sg; struct mutex *tfm_mutex = NULL; - size_t cipher_code; + u8 cipher_code; size_t packet_size_length; size_t max_packet_size; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 0249aa4ae181..d25ac9500a92 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -117,7 +117,7 @@ void __ecryptfs_printk(const char *fmt, ...) * * Returns zero on success; non-zero otherwise */ -int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) +static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) { struct ecryptfs_inode_info *inode_info = ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); @@ -226,17 +226,15 @@ out: return rc; } -enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_debug, - ecryptfs_opt_ecryptfs_debug, ecryptfs_opt_cipher, - ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes, +enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, + ecryptfs_opt_cipher, ecryptfs_opt_ecryptfs_cipher, + ecryptfs_opt_ecryptfs_key_bytes, ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; static match_table_t tokens = { {ecryptfs_opt_sig, "sig=%s"}, {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, - {ecryptfs_opt_debug, "debug=%u"}, - {ecryptfs_opt_ecryptfs_debug, "ecryptfs_debug=%u"}, {ecryptfs_opt_cipher, "cipher=%s"}, {ecryptfs_opt_ecryptfs_cipher, "ecryptfs_cipher=%s"}, {ecryptfs_opt_ecryptfs_key_bytes, "ecryptfs_key_bytes=%u"}, @@ -313,7 +311,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) substring_t args[MAX_OPT_ARGS]; int token; char *sig_src; - char *debug_src; char *cipher_name_dst; char *cipher_name_src; char *cipher_key_bytes_src; @@ -341,16 +338,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) } sig_set = 1; break; - case ecryptfs_opt_debug: - case ecryptfs_opt_ecryptfs_debug: - debug_src = args[0].from; - ecryptfs_verbosity = - (int)simple_strtol(debug_src, &debug_src, - 0); - ecryptfs_printk(KERN_DEBUG, - "Verbosity set to [%d]" "\n", - ecryptfs_verbosity); - break; case ecryptfs_opt_cipher: case ecryptfs_opt_ecryptfs_cipher: cipher_name_src = args[0].from; @@ -423,9 +410,13 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) if (!cipher_key_bytes_set) { mount_crypt_stat->global_default_cipher_key_size = 0; } - rc = ecryptfs_add_new_key_tfm( - NULL, mount_crypt_stat->global_default_cipher_name, - mount_crypt_stat->global_default_cipher_key_size); + mutex_lock(&key_tfm_list_mutex); + if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name, + NULL)) + rc = ecryptfs_add_new_key_tfm( + NULL, mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size); + mutex_unlock(&key_tfm_list_mutex); if (rc) { printk(KERN_ERR "Error attempting to initialize cipher with " "name = [%s] and key size = [%td]; rc = [%d]\n", @@ -522,8 +513,8 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); goto out; } - lower_root = nd.dentry; - lower_mnt = nd.mnt; + lower_root = nd.path.dentry; + lower_mnt = nd.path.mnt; ecryptfs_set_superblock_lower(sb, lower_root->d_sb); sb->s_maxbytes = lower_root->d_sb->s_maxbytes; sb->s_blocksize = lower_root->d_sb->s_blocksize; @@ -535,7 +526,7 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) rc = 0; goto out; out_free: - path_release(&nd); + path_put(&nd.path); out: return rc; } @@ -654,11 +645,6 @@ static struct ecryptfs_cache_info { .size = sizeof(struct ecryptfs_sb_info), }, { - .cache = &ecryptfs_header_cache_0, - .name = "ecryptfs_headers_0", - .size = PAGE_CACHE_SIZE, - }, - { .cache = &ecryptfs_header_cache_1, .name = "ecryptfs_headers_1", .size = PAGE_CACHE_SIZE, @@ -821,6 +807,10 @@ static int __init ecryptfs_init(void) "rc = [%d]\n", rc); goto out_release_messaging; } + if (ecryptfs_verbosity > 0) + printk(KERN_CRIT "eCryptfs verbosity set to %d. Secret values " + "will be written to the syslog!\n", ecryptfs_verbosity); + goto out; out_release_messaging: ecryptfs_release_messaging(ecryptfs_transport); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 0535412d8c64..dc74b186145d 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -34,8 +34,6 @@ #include <linux/scatterlist.h> #include "ecryptfs_kernel.h" -struct kmem_cache *ecryptfs_lower_page_cache; - /** * ecryptfs_get_locked_page * @@ -102,13 +100,14 @@ static void set_header_info(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat) { size_t written; - int save_num_header_extents_at_front = - crypt_stat->num_header_extents_at_front; + size_t save_num_header_bytes_at_front = + crypt_stat->num_header_bytes_at_front; - crypt_stat->num_header_extents_at_front = 1; + crypt_stat->num_header_bytes_at_front = + ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; ecryptfs_write_header_metadata(page_virt + 20, crypt_stat, &written); - crypt_stat->num_header_extents_at_front = - save_num_header_extents_at_front; + crypt_stat->num_header_bytes_at_front = + save_num_header_bytes_at_front; } /** @@ -134,8 +133,11 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, loff_t view_extent_num = ((((loff_t)page->index) * num_extents_per_page) + extent_num_in_page); + size_t num_header_extents_at_front = + (crypt_stat->num_header_bytes_at_front + / crypt_stat->extent_size); - if (view_extent_num < crypt_stat->num_header_extents_at_front) { + if (view_extent_num < num_header_extents_at_front) { /* This is a header extent */ char *page_virt; @@ -157,9 +159,8 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, } else { /* This is an encrypted data extent */ loff_t lower_offset = - ((view_extent_num - - crypt_stat->num_header_extents_at_front) - * crypt_stat->extent_size); + ((view_extent_num * crypt_stat->extent_size) + - crypt_stat->num_header_bytes_at_front); rc = ecryptfs_read_lower_page_segment( page, (lower_offset >> PAGE_CACHE_SHIFT), diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 948f57624c05..0c4928623bbc 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -293,6 +293,7 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, return rc; } +#if 0 /** * ecryptfs_read * @data: The virtual address into which to write the data read (and @@ -371,3 +372,4 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, out: return rc; } +#endif /* 0 */ diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 4859c4eecd65..c27ac2b358a1 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -156,32 +156,38 @@ static void ecryptfs_clear_inode(struct inode *inode) /** * ecryptfs_show_options * - * Prints the directory we are currently mounted over. - * Returns zero on success; non-zero otherwise + * Prints the mount options for a given superblock. + * Returns zero; does not fail. */ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; - struct dentry *lower_root_dentry = ecryptfs_dentry_to_lower(sb->s_root); - struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(sb->s_root); - char *tmp_page; - char *path; - int rc = 0; - - tmp_page = (char *)__get_free_page(GFP_KERNEL); - if (!tmp_page) { - rc = -ENOMEM; - goto out; - } - path = d_path(lower_root_dentry, lower_mnt, tmp_page, PAGE_SIZE); - if (IS_ERR(path)) { - rc = PTR_ERR(path); - goto out; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; + struct ecryptfs_global_auth_tok *walker; + + mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); + list_for_each_entry(walker, + &mount_crypt_stat->global_auth_tok_list, + mount_crypt_stat_list) { + seq_printf(m, ",ecryptfs_sig=%s", walker->sig); } - seq_printf(m, ",dir=%s", path); - free_page((unsigned long)tmp_page); -out: - return rc; + mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); + + seq_printf(m, ",ecryptfs_cipher=%s", + mount_crypt_stat->global_default_cipher_name); + + if (mount_crypt_stat->global_default_cipher_key_size) + seq_printf(m, ",ecryptfs_key_bytes=%zd", + mount_crypt_stat->global_default_cipher_key_size); + if (mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED) + seq_printf(m, ",ecryptfs_passthrough"); + if (mount_crypt_stat->flags & ECRYPTFS_XATTR_METADATA_ENABLED) + seq_printf(m, ",ecryptfs_xattr_metadata"); + if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) + seq_printf(m, ",ecryptfs_encrypted_view"); + + return 0; } const struct super_operations ecryptfs_sops = { diff --git a/fs/efs/inode.c b/fs/efs/inode.c index 174696f9bf14..627c3026946d 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c @@ -45,17 +45,26 @@ static inline void extent_copy(efs_extent *src, efs_extent *dst) { return; } -void efs_read_inode(struct inode *inode) +struct inode *efs_iget(struct super_block *super, unsigned long ino) { int i, inode_index; dev_t device; u32 rdev; struct buffer_head *bh; - struct efs_sb_info *sb = SUPER_INFO(inode->i_sb); - struct efs_inode_info *in = INODE_INFO(inode); + struct efs_sb_info *sb = SUPER_INFO(super); + struct efs_inode_info *in; efs_block_t block, offset; struct efs_dinode *efs_inode; - + struct inode *inode; + + inode = iget_locked(super, ino); + if (IS_ERR(inode)) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + in = INODE_INFO(inode); + /* ** EFS layout: ** @@ -159,13 +168,13 @@ void efs_read_inode(struct inode *inode) break; } - return; + unlock_new_inode(inode); + return inode; read_inode_error: printk(KERN_WARNING "EFS: failed to read inode %lu\n", inode->i_ino); - make_bad_inode(inode); - - return; + iget_failed(inode); + return ERR_PTR(-EIO); } static inline efs_block_t diff --git a/fs/efs/namei.c b/fs/efs/namei.c index f7f407075be1..e26704742d41 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -66,9 +66,10 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei lock_kernel(); inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len); if (inodenum) { - if (!(inode = iget(dir->i_sb, inodenum))) { + inode = efs_iget(dir->i_sb, inodenum); + if (IS_ERR(inode)) { unlock_kernel(); - return ERR_PTR(-EACCES); + return ERR_CAST(inode); } } unlock_kernel(); @@ -84,12 +85,11 @@ static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, if (ino == 0) return ERR_PTR(-ESTALE); - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); + inode = efs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + if (generation && inode->i_generation != generation) { iput(inode); return ERR_PTR(-ESTALE); } @@ -116,7 +116,7 @@ struct dentry *efs_get_parent(struct dentry *child) struct dentry *parent; struct inode *inode; efs_ino_t ino; - int error; + long error; lock_kernel(); @@ -125,10 +125,11 @@ struct dentry *efs_get_parent(struct dentry *child) if (!ino) goto fail; - error = -EACCES; - inode = iget(child->d_inode->i_sb, ino); - if (!inode) + inode = efs_iget(child->d_inode->i_sb, ino); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); goto fail; + } error = -ENOMEM; parent = d_alloc_anon(inode); diff --git a/fs/efs/super.c b/fs/efs/super.c index c79bc627f107..14082405cdd1 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -107,7 +107,6 @@ static int efs_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations efs_superblock_operations = { .alloc_inode = efs_alloc_inode, .destroy_inode = efs_destroy_inode, - .read_inode = efs_read_inode, .put_super = efs_put_super, .statfs = efs_statfs, .remount_fs = efs_remount, @@ -247,6 +246,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) struct efs_sb_info *sb; struct buffer_head *bh; struct inode *root; + int ret = -EINVAL; sb = kzalloc(sizeof(struct efs_sb_info), GFP_KERNEL); if (!sb) @@ -303,12 +303,18 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) } s->s_op = &efs_superblock_operations; s->s_export_op = &efs_export_ops; - root = iget(s, EFS_ROOTINODE); + root = efs_iget(s, EFS_ROOTINODE); + if (IS_ERR(root)) { + printk(KERN_ERR "EFS: get root inode failed\n"); + ret = PTR_ERR(root); + goto out_no_fs; + } + s->s_root = d_alloc_root(root); - if (!(s->s_root)) { - printk(KERN_ERR "EFS: get root inode failed\n"); + printk(KERN_ERR "EFS: get root dentry failed\n"); iput(root); + ret = -ENOMEM; goto out_no_fs; } @@ -318,7 +324,7 @@ out_no_fs_ul: out_no_fs: s->s_fs_info = NULL; kfree(sb); - return -EINVAL; + return ret; } static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) { diff --git a/fs/eventfd.c b/fs/eventfd.c index 2ce19c000d2a..a9f130cd50ac 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -15,6 +15,7 @@ #include <linux/spinlock.h> #include <linux/anon_inodes.h> #include <linux/eventfd.h> +#include <linux/syscalls.h> struct eventfd_ctx { wait_queue_head_t wqh; diff --git a/fs/exec.c b/fs/exec.c index be923e4bc389..a44b142fb460 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -112,14 +112,14 @@ asmlinkage long sys_uselib(const char __user * library) goto out; error = -EINVAL; - if (!S_ISREG(nd.dentry->d_inode->i_mode)) + if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) goto exit; error = vfs_permission(&nd, MAY_READ | MAY_EXEC); if (error) goto exit; - file = nameidata_to_filp(&nd, O_RDONLY); + file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -148,7 +148,7 @@ out: return error; exit: release_open_intent(&nd); - path_release(&nd); + path_put(&nd.path); goto out; } @@ -652,13 +652,14 @@ struct file *open_exec(const char *name) file = ERR_PTR(err); if (!err) { - struct inode *inode = nd.dentry->d_inode; + struct inode *inode = nd.path.dentry->d_inode; file = ERR_PTR(-EACCES); if (S_ISREG(inode->i_mode)) { int err = vfs_permission(&nd, MAY_EXEC); file = ERR_PTR(err); if (!err) { - file = nameidata_to_filp(&nd, O_RDONLY); + file = nameidata_to_filp(&nd, + O_RDONLY|O_LARGEFILE); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { @@ -671,7 +672,7 @@ out: } } release_open_intent(&nd); - path_release(&nd); + path_put(&nd.path); } goto out; } @@ -782,26 +783,8 @@ static int de_thread(struct task_struct *tsk) zap_other_threads(tsk); read_unlock(&tasklist_lock); - /* - * Account for the thread group leader hanging around: - */ - count = 1; - if (!thread_group_leader(tsk)) { - count = 2; - /* - * The SIGALRM timer survives the exec, but needs to point - * at us as the new group leader now. We have a race with - * a timer firing now getting the old leader, so we need to - * synchronize with any firing (by calling del_timer_sync) - * before we can safely let the old group leader die. - */ - sig->tsk = tsk; - spin_unlock_irq(lock); - if (hrtimer_cancel(&sig->real_timer)) - hrtimer_restart(&sig->real_timer); - spin_lock_irq(lock); - } - + /* Account for the thread group leader hanging around: */ + count = thread_group_leader(tsk) ? 1 : 2; sig->notify_count = count; while (atomic_read(&sig->count) > count) { __set_current_state(TASK_UNINTERRUPTIBLE); @@ -1184,7 +1167,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) { int try,retval; struct linux_binfmt *fmt; -#ifdef __alpha__ +#if defined(__alpha__) && defined(CONFIG_ARCH_SUPPORTS_AOUT) /* handle /sbin/loader.. */ { struct exec * eh = (struct exec *) bprm->buf; diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 377ad172d74b..e7b2bafa1dd9 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -69,9 +69,53 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, return desc + offset; } +static int ext2_valid_block_bitmap(struct super_block *sb, + struct ext2_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh) +{ + ext2_grpblk_t offset; + ext2_grpblk_t next_zero_bit; + ext2_fsblk_t bitmap_blk; + ext2_fsblk_t group_first_block; + + group_first_block = ext2_group_first_block_no(sb, block_group); + + /* check whether block bitmap block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); + offset = bitmap_blk - group_first_block; + if (!ext2_test_bit(offset, bh->b_data)) + /* bad block bitmap */ + goto err_out; + + /* check whether the inode bitmap block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap); + offset = bitmap_blk - group_first_block; + if (!ext2_test_bit(offset, bh->b_data)) + /* bad block bitmap */ + goto err_out; + + /* check whether the inode table block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_inode_table); + offset = bitmap_blk - group_first_block; + next_zero_bit = ext2_find_next_zero_bit(bh->b_data, + offset + EXT2_SB(sb)->s_itb_per_group, + offset); + if (next_zero_bit >= offset + EXT2_SB(sb)->s_itb_per_group) + /* good bitmap for inode tables */ + return 1; + +err_out: + ext2_error(sb, __FUNCTION__, + "Invalid block bitmap - " + "block_group = %d, block = %lu", + block_group, bitmap_blk); + return 0; +} + /* - * Read the bitmap for a given block_group, reading into the specified - * slot in the superblock's bitmap cache. + * Read the bitmap for a given block_group,and validate the + * bits for block/inode/inode tables are set in the bitmaps * * Return buffer_head on success or NULL in case of failure. */ @@ -80,17 +124,36 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext2_group_desc * desc; struct buffer_head * bh = NULL; - - desc = ext2_get_group_desc (sb, block_group, NULL); + ext2_fsblk_t bitmap_blk; + + desc = ext2_get_group_desc(sb, block_group, NULL); if (!desc) - goto error_out; - bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); - if (!bh) - ext2_error (sb, "read_block_bitmap", + return NULL; + bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); + bh = sb_getblk(sb, bitmap_blk); + if (unlikely(!bh)) { + ext2_error(sb, __FUNCTION__, + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %u", + block_group, le32_to_cpu(desc->bg_block_bitmap)); + return NULL; + } + if (likely(bh_uptodate_or_lock(bh))) + return bh; + + if (bh_submit_read(bh) < 0) { + brelse(bh); + ext2_error(sb, __FUNCTION__, "Cannot read block bitmap - " "block_group = %d, block_bitmap = %u", block_group, le32_to_cpu(desc->bg_block_bitmap)); -error_out: + return NULL; + } + if (!ext2_valid_block_bitmap(sb, desc, block_group, bh)) { + brelse(bh); + return NULL; + } + return bh; } @@ -474,11 +537,13 @@ do_more: in_range (block, le32_to_cpu(desc->bg_inode_table), sbi->s_itb_per_group) || in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), - sbi->s_itb_per_group)) + sbi->s_itb_per_group)) { ext2_error (sb, "ext2_free_blocks", "Freeing blocks in system zones - " "Block = %lu, count = %lu", block, count); + goto error_return; + } for (i = 0, group_freed = 0; i < count; i++) { if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group), @@ -1250,8 +1315,8 @@ retry_alloc: smp_rmb(); /* - * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. + * Now search the rest of the groups. We assume that + * group_no and gdp correctly point to the last group visited. */ for (bgi = 0; bgi < ngroups; bgi++) { group_no++; @@ -1311,11 +1376,13 @@ allocated: in_range(ret_block, le32_to_cpu(gdp->bg_inode_table), EXT2_SB(sb)->s_itb_per_group) || in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table), - EXT2_SB(sb)->s_itb_per_group)) + EXT2_SB(sb)->s_itb_per_group)) { ext2_error(sb, "ext2_new_blocks", "Allocating block in system zone - " "blocks from "E2FSBLK", length %lu", ret_block, num); + goto out; + } performed_allocation = 1; @@ -1466,9 +1533,6 @@ int ext2_bg_has_super(struct super_block *sb, int group) */ unsigned long ext2_bg_num_gdb(struct super_block *sb, int group) { - if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&& - !ext2_group_sparse(group)) - return 0; - return EXT2_SB(sb)->s_gdb_count; + return ext2_bg_has_super(sb, group) ? EXT2_SB(sb)->s_gdb_count : 0; } diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index d868e26c15eb..8dededd80fe2 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -703,7 +703,7 @@ const struct file_operations ext2_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = ext2_readdir, - .ioctl = ext2_ioctl, + .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index c87ae29c19cb..47d88da2d33b 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -124,9 +124,8 @@ extern void ext2_check_inodes_bitmap (struct super_block *); extern unsigned long ext2_count_free (struct buffer_head *, unsigned); /* inode.c */ -extern void ext2_read_inode (struct inode *); +extern struct inode *ext2_iget (struct super_block *, unsigned long); extern int ext2_write_inode (struct inode *, int); -extern void ext2_put_inode (struct inode *); extern void ext2_delete_inode (struct inode *); extern int ext2_sync_inode (struct inode *); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); @@ -139,8 +138,7 @@ int __ext2_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata); /* ioctl.c */ -extern int ext2_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); +extern long ext2_ioctl(struct file *, unsigned int, unsigned long); extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long); /* namei.c */ diff --git a/fs/ext2/file.c b/fs/ext2/file.c index c051798459a1..5f2fa9c36293 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -48,7 +48,7 @@ const struct file_operations ext2_file_operations = { .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, - .ioctl = ext2_ioctl, + .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif @@ -65,7 +65,7 @@ const struct file_operations ext2_xip_file_operations = { .llseek = generic_file_llseek, .read = xip_file_read, .write = xip_file_write, - .ioctl = ext2_ioctl, + .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index b1ab32ab5a77..c62006805427 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -286,15 +286,12 @@ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind) * ext2_find_goal - find a prefered place for allocation. * @inode: owner * @block: block we want - * @chain: chain of indirect blocks * @partial: pointer to the last triple within a chain * * Returns preferred place for a block (the goal). */ -static inline int ext2_find_goal(struct inode *inode, - long block, - Indirect chain[4], +static inline int ext2_find_goal(struct inode *inode, long block, Indirect *partial) { struct ext2_block_alloc_info *block_i; @@ -569,7 +566,6 @@ static void ext2_splice_branch(struct inode *inode, * * `handle' can be NULL if create == 0. * - * The BKL may not be held on entry here. Be sure to take it early. * return > 0, # of blocks mapped or allocated. * return = 0, if plain lookup failed. * return < 0, error case. @@ -639,7 +635,7 @@ reread: if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) ext2_init_block_alloc_info(inode); - goal = ext2_find_goal(inode, iblock, chain, partial); + goal = ext2_find_goal(inode, iblock, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; @@ -1185,22 +1181,33 @@ void ext2_get_inode_flags(struct ext2_inode_info *ei) ei->i_flags |= EXT2_DIRSYNC_FL; } -void ext2_read_inode (struct inode * inode) +struct inode *ext2_iget (struct super_block *sb, unsigned long ino) { - struct ext2_inode_info *ei = EXT2_I(inode); - ino_t ino = inode->i_ino; + struct ext2_inode_info *ei; struct buffer_head * bh; - struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + struct ext2_inode *raw_inode; + struct inode *inode; + long ret = -EIO; int n; + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ei = EXT2_I(inode); #ifdef CONFIG_EXT2_FS_POSIX_ACL ei->i_acl = EXT2_ACL_NOT_CACHED; ei->i_default_acl = EXT2_ACL_NOT_CACHED; #endif ei->i_block_alloc_info = NULL; - if (IS_ERR(raw_inode)) + raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + if (IS_ERR(raw_inode)) { + ret = PTR_ERR(raw_inode); goto bad_inode; + } inode->i_mode = le16_to_cpu(raw_inode->i_mode); inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); @@ -1224,6 +1231,7 @@ void ext2_read_inode (struct inode * inode) if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { /* this inode is deleted */ brelse (bh); + ret = -ESTALE; goto bad_inode; } inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); @@ -1290,11 +1298,12 @@ void ext2_read_inode (struct inode * inode) } brelse (bh); ext2_set_inode_flags(inode); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(ret); } static int ext2_update_inode(struct inode * inode, int do_sync) diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 320b2cb3d4d2..b8ea11fee5c6 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -17,9 +17,9 @@ #include <asm/uaccess.h> -int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_dentry->d_inode; struct ext2_inode_info *ei = EXT2_I(inode); unsigned int flags; unsigned short rsv_window_size; @@ -141,9 +141,6 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, #ifdef CONFIG_COMPAT long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int ret; - /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case EXT2_IOC32_GETFLAGS: @@ -161,9 +158,6 @@ long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) default: return -ENOIOCTLCMD; } - lock_kernel(); - ret = ext2_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; + return ext2_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index e69beed839ac..80c97fd8c571 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -63,9 +63,9 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str ino = ext2_inode_by_name(dir, dentry); inode = NULL; if (ino) { - inode = iget(dir->i_sb, ino); - if (!inode) - return ERR_PTR(-EACCES); + inode = ext2_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); } return d_splice_alias(inode, dentry); } @@ -83,10 +83,10 @@ struct dentry *ext2_get_parent(struct dentry *child) ino = ext2_inode_by_name(child->d_inode, &dotdot); if (!ino) return ERR_PTR(-ENOENT); - inode = iget(child->d_inode->i_sb, ino); + inode = ext2_iget(child->d_inode->i_sb, ino); - if (!inode) - return ERR_PTR(-EACCES); + if (IS_ERR(inode)) + return ERR_CAST(inode); parent = d_alloc_anon(inode); if (!parent) { iput(inode); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 6abaf75163f0..088b011bb97e 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -234,16 +234,16 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) le16_to_cpu(es->s_def_resgid) != EXT2_DEF_RESGID) { seq_printf(seq, ",resgid=%u", sbi->s_resgid); } - if (test_opt(sb, ERRORS_CONT)) { + if (test_opt(sb, ERRORS_RO)) { int def_errors = le16_to_cpu(es->s_errors); if (def_errors == EXT2_ERRORS_PANIC || - def_errors == EXT2_ERRORS_RO) { - seq_puts(seq, ",errors=continue"); + def_errors == EXT2_ERRORS_CONTINUE) { + seq_puts(seq, ",errors=remount-ro"); } } - if (test_opt(sb, ERRORS_RO)) - seq_puts(seq, ",errors=remount-ro"); + if (test_opt(sb, ERRORS_CONT)) + seq_puts(seq, ",errors=continue"); if (test_opt(sb, ERRORS_PANIC)) seq_puts(seq, ",errors=panic"); if (test_opt(sb, NO_UID32)) @@ -285,6 +285,9 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",xip"); #endif + if (!test_opt(sb, RESERVATION)) + seq_puts(seq, ",noreservation"); + return 0; } @@ -296,7 +299,6 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *da static const struct super_operations ext2_sops = { .alloc_inode = ext2_alloc_inode, .destroy_inode = ext2_destroy_inode, - .read_inode = ext2_read_inode, .write_inode = ext2_write_inode, .delete_inode = ext2_delete_inode, .put_super = ext2_put_super, @@ -326,11 +328,10 @@ static struct inode *ext2_nfs_get_inode(struct super_block *sb, * it might be "neater" to call ext2_get_inode first and check * if the inode is valid..... */ - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + inode = ext2_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { /* we didn't find the right inode.. */ iput(inode); return ERR_PTR(-ESTALE); @@ -617,27 +618,24 @@ static int ext2_setup_super (struct super_block * sb, return res; } -static int ext2_check_descriptors (struct super_block * sb) +static int ext2_check_descriptors(struct super_block *sb) { int i; - int desc_block = 0; struct ext2_sb_info *sbi = EXT2_SB(sb); unsigned long first_block = le32_to_cpu(sbi->s_es->s_first_data_block); unsigned long last_block; - struct ext2_group_desc * gdp = NULL; ext2_debug ("Checking group descriptors"); - for (i = 0; i < sbi->s_groups_count; i++) - { + for (i = 0; i < sbi->s_groups_count; i++) { + struct ext2_group_desc *gdp = ext2_get_group_desc(sb, i, NULL); + if (i == sbi->s_groups_count - 1) last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; else last_block = first_block + (EXT2_BLOCKS_PER_GROUP(sb) - 1); - if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) - gdp = (struct ext2_group_desc *) sbi->s_group_desc[desc_block++]->b_data; if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || le32_to_cpu(gdp->bg_block_bitmap) > last_block) { @@ -667,7 +665,6 @@ static int ext2_check_descriptors (struct super_block * sb) return 0; } first_block += EXT2_BLOCKS_PER_GROUP(sb); - gdp++; } return 1; } @@ -750,6 +747,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) unsigned long logic_sb_block; unsigned long offset = 0; unsigned long def_mount_opts; + long ret = -EINVAL; int blocksize = BLOCK_SIZE; int db_count; int i, j; @@ -820,10 +818,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC) set_opt(sbi->s_mount_opt, ERRORS_PANIC); - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO) - set_opt(sbi->s_mount_opt, ERRORS_RO); - else + else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_CONTINUE) set_opt(sbi->s_mount_opt, ERRORS_CONT); + else + set_opt(sbi->s_mount_opt, ERRORS_RO); sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); @@ -868,8 +866,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - if ((ext2_use_xip(sb)) && ((blocksize != PAGE_SIZE) || - (sb->s_blocksize != blocksize))) { + if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) { if (!silent) printk("XIP: Unsupported blocksize\n"); goto failed_mount; @@ -1046,19 +1043,24 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ext2_sops; sb->s_export_op = &ext2_export_ops; sb->s_xattr = ext2_xattr_handlers; - root = iget(sb, EXT2_ROOT_INO); - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - iput(root); - printk(KERN_ERR "EXT2-fs: get root inode failed\n"); + root = ext2_iget(sb, EXT2_ROOT_INO); + if (IS_ERR(root)) { + ret = PTR_ERR(root); goto failed_mount3; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - dput(sb->s_root); - sb->s_root = NULL; + iput(root); printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n"); goto failed_mount3; } + + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + iput(root); + printk(KERN_ERR "EXT2-fs: get root inode failed\n"); + ret = -ENOMEM; + goto failed_mount3; + } if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) ext2_warning(sb, __FUNCTION__, "mounting ext3 filesystem as ext2"); @@ -1085,7 +1087,7 @@ failed_mount: failed_sbi: sb->s_fs_info = NULL; kfree(sbi); - return -EINVAL; + return ret; } static void ext2_commit_super (struct super_block * sb, diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index a8ba7e831278..da0cb2c0e437 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -80,13 +80,57 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, return desc + offset; } +static int ext3_valid_block_bitmap(struct super_block *sb, + struct ext3_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh) +{ + ext3_grpblk_t offset; + ext3_grpblk_t next_zero_bit; + ext3_fsblk_t bitmap_blk; + ext3_fsblk_t group_first_block; + + group_first_block = ext3_group_first_block_no(sb, block_group); + + /* check whether block bitmap block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); + offset = bitmap_blk - group_first_block; + if (!ext3_test_bit(offset, bh->b_data)) + /* bad block bitmap */ + goto err_out; + + /* check whether the inode bitmap block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap); + offset = bitmap_blk - group_first_block; + if (!ext3_test_bit(offset, bh->b_data)) + /* bad block bitmap */ + goto err_out; + + /* check whether the inode table block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_inode_table); + offset = bitmap_blk - group_first_block; + next_zero_bit = ext3_find_next_zero_bit(bh->b_data, + offset + EXT3_SB(sb)->s_itb_per_group, + offset); + if (next_zero_bit >= offset + EXT3_SB(sb)->s_itb_per_group) + /* good bitmap for inode tables */ + return 1; + +err_out: + ext3_error(sb, __FUNCTION__, + "Invalid block bitmap - " + "block_group = %d, block = %lu", + block_group, bitmap_blk); + return 0; +} + /** * read_block_bitmap() * @sb: super block * @block_group: given block group * - * Read the bitmap for a given block_group, reading into the specified - * slot in the superblock's bitmap cache. + * Read the bitmap for a given block_group,and validate the + * bits for block/inode/inode tables are set in the bitmaps * * Return buffer_head on success or NULL in case of failure. */ @@ -95,17 +139,35 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext3_group_desc * desc; struct buffer_head * bh = NULL; + ext3_fsblk_t bitmap_blk; - desc = ext3_get_group_desc (sb, block_group, NULL); + desc = ext3_get_group_desc(sb, block_group, NULL); if (!desc) - goto error_out; - bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); - if (!bh) - ext3_error (sb, "read_block_bitmap", + return NULL; + bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); + bh = sb_getblk(sb, bitmap_blk); + if (unlikely(!bh)) { + ext3_error(sb, __FUNCTION__, "Cannot read block bitmap - " "block_group = %d, block_bitmap = %u", block_group, le32_to_cpu(desc->bg_block_bitmap)); -error_out: + return NULL; + } + if (likely(bh_uptodate_or_lock(bh))) + return bh; + + if (bh_submit_read(bh) < 0) { + brelse(bh); + ext3_error(sb, __FUNCTION__, + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %u", + block_group, le32_to_cpu(desc->bg_block_bitmap)); + return NULL; + } + if (!ext3_valid_block_bitmap(sb, desc, block_group, bh)) { + brelse(bh); + return NULL; + } return bh; } /* @@ -468,11 +530,13 @@ do_more: in_range (block, le32_to_cpu(desc->bg_inode_table), sbi->s_itb_per_group) || in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), - sbi->s_itb_per_group)) + sbi->s_itb_per_group)) { ext3_error (sb, "ext3_free_blocks", "Freeing blocks in system zones - " "Block = "E3FSBLK", count = %lu", block, count); + goto error_return; + } /* * We are about to start releasing blocks in the bitmap, @@ -566,9 +630,7 @@ do_more: jbd_unlock_bh_state(bitmap_bh); spin_lock(sb_bgl_lock(sbi, block_group)); - desc->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + - group_freed); + le16_add_cpu(&desc->bg_free_blocks_count, group_freed); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, count); @@ -1508,7 +1570,7 @@ retry_alloc: /* * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. + * group_no and gdp correctly point to the last group visited. */ for (bgi = 0; bgi < ngroups; bgi++) { group_no++; @@ -1575,11 +1637,13 @@ allocated: in_range(ret_block, le32_to_cpu(gdp->bg_inode_table), EXT3_SB(sb)->s_itb_per_group) || in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table), - EXT3_SB(sb)->s_itb_per_group)) + EXT3_SB(sb)->s_itb_per_group)) { ext3_error(sb, "ext3_new_block", "Allocating block in system zone - " "blocks from "E3FSBLK", length %lu", ret_block, num); + goto out; + } performed_allocation = 1; @@ -1630,8 +1694,7 @@ allocated: ret_block, goal_hits, goal_attempts); spin_lock(sb_bgl_lock(sbi, group_no)); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num); + le16_add_cpu(&gdp->bg_free_blocks_count, -num); spin_unlock(sb_bgl_lock(sbi, group_no)); percpu_counter_sub(&sbi->s_freeblocks_counter, num); @@ -1782,11 +1845,7 @@ static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group) static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group) { - if (EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) && - !ext3_group_sparse(group)) - return 0; - return EXT3_SB(sb)->s_gdb_count; + return ext3_bg_has_super(sb, group) ? EXT3_SB(sb)->s_gdb_count : 0; } /** diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 1bc8cd89c51d..4f4020c54683 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -164,11 +164,9 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) if (gdp) { spin_lock(sb_bgl_lock(sbi, block_group)); - gdp->bg_free_inodes_count = cpu_to_le16( - le16_to_cpu(gdp->bg_free_inodes_count) + 1); + le16_add_cpu(&gdp->bg_free_inodes_count, 1); if (is_directory) - gdp->bg_used_dirs_count = cpu_to_le16( - le16_to_cpu(gdp->bg_used_dirs_count) - 1); + le16_add_cpu(&gdp->bg_used_dirs_count, -1); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_inc(&sbi->s_freeinodes_counter); if (is_directory) @@ -527,11 +525,9 @@ got: err = ext3_journal_get_write_access(handle, bh2); if (err) goto fail; spin_lock(sb_bgl_lock(sbi, group)); - gdp->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); + le16_add_cpu(&gdp->bg_free_inodes_count, -1); if (S_ISDIR(mode)) { - gdp->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); + le16_add_cpu(&gdp->bg_used_dirs_count, 1); } spin_unlock(sb_bgl_lock(sbi, group)); BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); @@ -642,14 +638,15 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count); unsigned long block_group; int bit; - struct buffer_head *bitmap_bh = NULL; + struct buffer_head *bitmap_bh; struct inode *inode = NULL; + long err = -EIO; /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { ext3_warning(sb, __FUNCTION__, "bad orphan ino %lu! e2fsck was run?", ino); - goto out; + goto error; } block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); @@ -658,38 +655,49 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) if (!bitmap_bh) { ext3_warning(sb, __FUNCTION__, "inode bitmap error for orphan %lu", ino); - goto out; + goto error; } /* Having the inode bit set should be a 100% indicator that this * is a valid orphan (no e2fsck run on fs). Orphans also include * inodes that were being truncated, so we can't check i_nlink==0. */ - if (!ext3_test_bit(bit, bitmap_bh->b_data) || - !(inode = iget(sb, ino)) || is_bad_inode(inode) || - NEXT_ORPHAN(inode) > max_ino) { - ext3_warning(sb, __FUNCTION__, - "bad orphan inode %lu! e2fsck was run?", ino); - printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", - bit, (unsigned long long)bitmap_bh->b_blocknr, - ext3_test_bit(bit, bitmap_bh->b_data)); - printk(KERN_NOTICE "inode=%p\n", inode); - if (inode) { - printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", - is_bad_inode(inode)); - printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", - NEXT_ORPHAN(inode)); - printk(KERN_NOTICE "max_ino=%lu\n", max_ino); - } + if (!ext3_test_bit(bit, bitmap_bh->b_data)) + goto bad_orphan; + + inode = ext3_iget(sb, ino); + if (IS_ERR(inode)) + goto iget_failed; + + if (NEXT_ORPHAN(inode) > max_ino) + goto bad_orphan; + brelse(bitmap_bh); + return inode; + +iget_failed: + err = PTR_ERR(inode); + inode = NULL; +bad_orphan: + ext3_warning(sb, __FUNCTION__, + "bad orphan inode %lu! e2fsck was run?", ino); + printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", + bit, (unsigned long long)bitmap_bh->b_blocknr, + ext3_test_bit(bit, bitmap_bh->b_data)); + printk(KERN_NOTICE "inode=%p\n", inode); + if (inode) { + printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", + is_bad_inode(inode)); + printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", + NEXT_ORPHAN(inode)); + printk(KERN_NOTICE "max_ino=%lu\n", max_ino); /* Avoid freeing blocks if we got a bad deleted inode */ - if (inode && inode->i_nlink == 0) + if (inode->i_nlink == 0) inode->i_blocks = 0; iput(inode); - inode = NULL; } -out: brelse(bitmap_bh); - return inode; +error: + return ERR_PTR(err); } unsigned long ext3_count_free_inodes (struct super_block * sb) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 077535439288..eb95670a27eb 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -439,16 +439,14 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind) * ext3_find_goal - find a prefered place for allocation. * @inode: owner * @block: block we want - * @chain: chain of indirect blocks * @partial: pointer to the last triple within a chain - * @goal: place to store the result. * * Normally this function find the prefered place for block allocation, - * stores it in *@goal and returns zero. + * returns it. */ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block, - Indirect chain[4], Indirect *partial) + Indirect *partial) { struct ext3_block_alloc_info *block_i; @@ -884,7 +882,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) ext3_init_block_alloc_info(inode); - goal = ext3_find_goal(inode, iblock, chain, partial); + goal = ext3_find_goal(inode, iblock, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; @@ -941,55 +939,45 @@ out: return err; } -#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32) +/* Maximum number of blocks we map for direct IO at once. */ +#define DIO_MAX_BLOCKS 4096 +/* + * Number of credits we need for writing DIO_MAX_BLOCKS: + * We need sb + group descriptor + bitmap + inode -> 4 + * For B blocks with A block pointers per block we need: + * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). + * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. + */ +#define DIO_CREDITS 25 static int ext3_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { handle_t *handle = ext3_journal_current_handle(); - int ret = 0; + int ret = 0, started = 0; unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - if (!create) - goto get_block; /* A read */ - - if (max_blocks == 1) - goto get_block; /* A single block get */ - - if (handle->h_transaction->t_state == T_LOCKED) { - /* - * Huge direct-io writes can hold off commits for long - * periods of time. Let this commit run. - */ - ext3_journal_stop(handle); - handle = ext3_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) + if (create && !handle) { /* Direct IO write... */ + if (max_blocks > DIO_MAX_BLOCKS) + max_blocks = DIO_MAX_BLOCKS; + handle = ext3_journal_start(inode, DIO_CREDITS + + 2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto get_block; - } - - if (handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) { - /* - * Getting low on buffer credits... - */ - ret = ext3_journal_extend(handle, DIO_CREDITS); - if (ret > 0) { - /* - * Couldn't extend the transaction. Start a new one. - */ - ret = ext3_journal_restart(handle, DIO_CREDITS); + goto out; } + started = 1; } -get_block: - if (ret == 0) { - ret = ext3_get_blocks_handle(handle, inode, iblock, + ret = ext3_get_blocks_handle(handle, inode, iblock, max_blocks, bh_result, create, 0); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } + if (ret > 0) { + bh_result->b_size = (ret << inode->i_blkbits); + ret = 0; } + if (started) + ext3_journal_stop(handle); +out: return ret; } @@ -1680,7 +1668,8 @@ static int ext3_releasepage(struct page *page, gfp_t wait) * if the machine crashes during the write. * * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. + * crashes then stale disk data _may_ be exposed inside the file. But current + * VFS code falls back into buffered path in that case so we are safe. */ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, @@ -1689,7 +1678,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ext3_inode_info *ei = EXT3_I(inode); - handle_t *handle = NULL; + handle_t *handle; ssize_t ret; int orphan = 0; size_t count = iov_length(iov, nr_segs); @@ -1697,17 +1686,21 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, if (rw == WRITE) { loff_t final_size = offset + count; - handle = ext3_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } if (final_size > inode->i_size) { + /* Credits for sb + inode write */ + handle = ext3_journal_start(inode, 2); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } ret = ext3_orphan_add(handle, inode); - if (ret) - goto out_stop; + if (ret) { + ext3_journal_stop(handle); + goto out; + } orphan = 1; ei->i_disksize = inode->i_size; + ext3_journal_stop(handle); } } @@ -1715,18 +1708,21 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, offset, nr_segs, ext3_get_block, NULL); - /* - * Reacquire the handle: ext3_get_block() can restart the transaction - */ - handle = ext3_journal_current_handle(); - -out_stop: - if (handle) { + if (orphan) { int err; - if (orphan && inode->i_nlink) + /* Credits for sb + inode write */ + handle = ext3_journal_start(inode, 2); + if (IS_ERR(handle)) { + /* This is really bad luck. We've written the data + * but cannot extend i_size. Bail out and pretend + * the write failed... */ + ret = PTR_ERR(handle); + goto out; + } + if (inode->i_nlink) ext3_orphan_del(handle, inode); - if (orphan && ret > 0) { + if (ret > 0) { loff_t end = offset + ret; if (end > inode->i_size) { ei->i_disksize = end; @@ -2658,21 +2654,31 @@ void ext3_get_inode_flags(struct ext3_inode_info *ei) ei->i_flags |= EXT3_DIRSYNC_FL; } -void ext3_read_inode(struct inode * inode) +struct inode *ext3_iget(struct super_block *sb, unsigned long ino) { struct ext3_iloc iloc; struct ext3_inode *raw_inode; - struct ext3_inode_info *ei = EXT3_I(inode); + struct ext3_inode_info *ei; struct buffer_head *bh; + struct inode *inode; + long ret; int block; + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ei = EXT3_I(inode); #ifdef CONFIG_EXT3_FS_POSIX_ACL ei->i_acl = EXT3_ACL_NOT_CACHED; ei->i_default_acl = EXT3_ACL_NOT_CACHED; #endif ei->i_block_alloc_info = NULL; - if (__ext3_get_inode_loc(inode, &iloc, 0)) + ret = __ext3_get_inode_loc(inode, &iloc, 0); + if (ret < 0) goto bad_inode; bh = iloc.bh; raw_inode = ext3_raw_inode(&iloc); @@ -2703,6 +2709,7 @@ void ext3_read_inode(struct inode * inode) !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { /* this inode is deleted */ brelse (bh); + ret = -ESTALE; goto bad_inode; } /* The only unlinked inodes we let through here have @@ -2746,6 +2753,7 @@ void ext3_read_inode(struct inode * inode) if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > EXT3_INODE_SIZE(inode->i_sb)) { brelse (bh); + ret = -EIO; goto bad_inode; } if (ei->i_extra_isize == 0) { @@ -2787,11 +2795,12 @@ void ext3_read_inode(struct inode * inode) } brelse (iloc.bh); ext3_set_inode_flags(inode); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(ret); } /* diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 4ab6f76e63d0..dec3e0d88ab1 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -860,14 +860,10 @@ static struct buffer_head * ext3_find_entry (struct dentry *dentry, int nblocks, i, err; struct inode *dir = dentry->d_parent->d_inode; int namelen; - const u8 *name; - unsigned blocksize; *res_dir = NULL; sb = dir->i_sb; - blocksize = sb->s_blocksize; namelen = dentry->d_name.len; - name = dentry->d_name.name; if (namelen > EXT3_NAME_LEN) return NULL; if (is_dx(dir)) { @@ -1041,17 +1037,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str if (!ext3_valid_inum(dir->i_sb, ino)) { ext3_error(dir->i_sb, "ext3_lookup", "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(dir->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - - if (is_bad_inode(inode)) { - iput(inode); - return ERR_PTR(-ENOENT); + return ERR_PTR(-EIO); } + inode = ext3_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); } return d_splice_alias(inode, dentry); } @@ -1080,18 +1070,13 @@ struct dentry *ext3_get_parent(struct dentry *child) if (!ext3_valid_inum(child->d_inode->i_sb, ino)) { ext3_error(child->d_inode->i_sb, "ext3_get_parent", "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(child->d_inode->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - - if (is_bad_inode(inode)) { - iput(inode); - return ERR_PTR(-ENOENT); + return ERR_PTR(-EIO); } + inode = ext3_iget(child->d_inode->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + parent = d_alloc_anon(inode); if (!parent) { iput(inode); diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 44de1453c301..9397d779c43d 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -518,8 +518,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, EXT3_SB(sb)->s_gdb_count++; kfree(o_group_desc); - es->s_reserved_gdt_blocks = - cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1); + le16_add_cpu(&es->s_reserved_gdt_blocks, -1); ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); return 0; @@ -795,12 +794,11 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) "No reserved GDT blocks, can't resize"); return -EPERM; } - inode = iget(sb, EXT3_RESIZE_INO); - if (!inode || is_bad_inode(inode)) { + inode = ext3_iget(sb, EXT3_RESIZE_INO); + if (IS_ERR(inode)) { ext3_warning(sb, __FUNCTION__, "Error opening resize inode"); - iput(inode); - return -ENOENT; + return PTR_ERR(inode); } } @@ -891,10 +889,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) * blocks/inodes before the group is live won't actually let us * allocate the new space yet. */ - es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) + - input->blocks_count); - es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + - EXT3_INODES_PER_GROUP(sb)); + le32_add_cpu(&es->s_blocks_count, input->blocks_count); + le32_add_cpu(&es->s_inodes_count, EXT3_INODES_PER_GROUP(sb)); /* * We need to protect s_groups_count against other CPUs seeing @@ -927,8 +923,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) /* Update the reserved block counts only once the new group is * active. */ - es->s_r_blocks_count = cpu_to_le32(le32_to_cpu(es->s_r_blocks_count) + - input->reserved_blocks); + le32_add_cpu(&es->s_r_blocks_count, input->reserved_blocks); /* Update the free space counts */ percpu_counter_add(&sbi->s_freeblocks_counter, diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f3675cc630e9..18769cc32377 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -575,16 +575,16 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) { seq_printf(seq, ",resgid=%u", sbi->s_resgid); } - if (test_opt(sb, ERRORS_CONT)) { + if (test_opt(sb, ERRORS_RO)) { int def_errors = le16_to_cpu(es->s_errors); if (def_errors == EXT3_ERRORS_PANIC || - def_errors == EXT3_ERRORS_RO) { - seq_puts(seq, ",errors=continue"); + def_errors == EXT3_ERRORS_CONTINUE) { + seq_puts(seq, ",errors=remount-ro"); } } - if (test_opt(sb, ERRORS_RO)) - seq_puts(seq, ",errors=remount-ro"); + if (test_opt(sb, ERRORS_CONT)) + seq_puts(seq, ",errors=continue"); if (test_opt(sb, ERRORS_PANIC)) seq_puts(seq, ",errors=panic"); if (test_opt(sb, NO_UID32)) @@ -649,11 +649,10 @@ static struct inode *ext3_nfs_get_inode(struct super_block *sb, * Currently we don't know the generation for parent directory, so * a generation of 0 means "accept any" */ - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + inode = ext3_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { iput(inode); return ERR_PTR(-ESTALE); } @@ -722,7 +721,6 @@ static struct quotactl_ops ext3_qctl_operations = { static const struct super_operations ext3_sops = { .alloc_inode = ext3_alloc_inode, .destroy_inode = ext3_destroy_inode, - .read_inode = ext3_read_inode, .write_inode = ext3_write_inode, .dirty_inode = ext3_dirty_inode, .delete_inode = ext3_delete_inode, @@ -1224,7 +1222,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, #endif if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); - es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); + le16_add_cpu(&es->s_mnt_count, 1); es->s_mtime = cpu_to_le32(get_seconds()); ext3_update_dynamic_rev(sb); EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); @@ -1252,28 +1250,24 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, } /* Called at mount-time, super-block is locked */ -static int ext3_check_descriptors (struct super_block * sb) +static int ext3_check_descriptors(struct super_block *sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); ext3_fsblk_t last_block; - struct ext3_group_desc * gdp = NULL; - int desc_block = 0; int i; ext3_debug ("Checking group descriptors"); - for (i = 0; i < sbi->s_groups_count; i++) - { + for (i = 0; i < sbi->s_groups_count; i++) { + struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL); + if (i == sbi->s_groups_count - 1) last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; else last_block = first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); - if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0) - gdp = (struct ext3_group_desc *) - sbi->s_group_desc[desc_block++]->b_data; if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || le32_to_cpu(gdp->bg_block_bitmap) > last_block) { @@ -1306,7 +1300,6 @@ static int ext3_check_descriptors (struct super_block * sb) return 0; } first_block += EXT3_BLOCKS_PER_GROUP(sb); - gdp++; } sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); @@ -1383,8 +1376,8 @@ static void ext3_orphan_cleanup (struct super_block * sb, while (es->s_last_orphan) { struct inode *inode; - if (!(inode = - ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { + inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); + if (IS_ERR(inode)) { es->s_last_orphan = 0; break; } @@ -1513,6 +1506,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) int db_count; int i; int needs_recovery; + int ret = -EINVAL; __le32 features; int err; @@ -1583,10 +1577,10 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC) set_opt(sbi->s_mount_opt, ERRORS_PANIC); - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO) - set_opt(sbi->s_mount_opt, ERRORS_RO); - else + else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE) set_opt(sbi->s_mount_opt, ERRORS_CONT); + else + set_opt(sbi->s_mount_opt, ERRORS_RO); sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); @@ -1882,19 +1876,24 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) * so we can safely mount the rest of the filesystem now. */ - root = iget(sb, EXT3_ROOT_INO); - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { + root = ext3_iget(sb, EXT3_ROOT_INO); + if (IS_ERR(root)) { printk(KERN_ERR "EXT3-fs: get root inode failed\n"); - iput(root); + ret = PTR_ERR(root); goto failed_mount4; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - dput(sb->s_root); - sb->s_root = NULL; + iput(root); printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n"); goto failed_mount4; } + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + printk(KERN_ERR "EXT3-fs: get root dentry failed\n"); + iput(root); + ret = -ENOMEM; + goto failed_mount4; + } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); /* @@ -1946,7 +1945,7 @@ out_fail: sb->s_fs_info = NULL; kfree(sbi); lock_kernel(); - return -EINVAL; + return ret; } /* @@ -1982,8 +1981,8 @@ static journal_t *ext3_get_journal(struct super_block *sb, * things happen if we iget() an unused inode, as the subsequent * iput() will try to delete it. */ - journal_inode = iget(sb, journal_inum); - if (!journal_inode) { + journal_inode = ext3_iget(sb, journal_inum); + if (IS_ERR(journal_inode)) { printk(KERN_ERR "EXT3-fs: no journal found.\n"); return NULL; } @@ -1996,7 +1995,7 @@ static journal_t *ext3_get_journal(struct super_block *sb, jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", journal_inode, journal_inode->i_size); - if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { + if (!S_ISREG(journal_inode->i_mode)) { printk(KERN_ERR "EXT3-fs: invalid journal inode.\n"); iput(journal_inode); return NULL; @@ -2759,16 +2758,16 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, if (err) return err; /* Quotafile not on the same filesystem? */ - if (nd.mnt->mnt_sb != sb) { - path_release(&nd); + if (nd.path.mnt->mnt_sb != sb) { + path_put(&nd.path); return -EXDEV; } /* Quotafile not of fs root? */ - if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) printk(KERN_WARNING "EXT3-fs: Quota file not on filesystem root. " "Journalled quota will not work.\n"); - path_release(&nd); + path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path); } diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 408373819e34..fb89c299bece 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -492,8 +492,7 @@ ext3_xattr_release_block(handle_t *handle, struct inode *inode, get_bh(bh); ext3_forget(handle, 1, inode, bh, bh->b_blocknr); } else { - BHDR(bh)->h_refcount = cpu_to_le32( - le32_to_cpu(BHDR(bh)->h_refcount) - 1); + le32_add_cpu(&BHDR(bh)->h_refcount, -1); error = ext3_journal_dirty_metadata(handle, bh); if (IS_SYNC(inode)) handle->h_sync = 1; @@ -780,8 +779,7 @@ inserted: if (error) goto cleanup_dquot; lock_buffer(new_bh); - BHDR(new_bh)->h_refcount = cpu_to_le32(1 + - le32_to_cpu(BHDR(new_bh)->h_refcount)); + le32_add_cpu(&BHDR(new_bh)->h_refcount, 1); ea_bdebug(new_bh, "reusing; refcount now=%d", le32_to_cpu(BHDR(new_bh)->h_refcount)); unlock_buffer(new_bh); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index ac75ea953d83..0737e05ba3dd 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -1700,7 +1700,7 @@ retry_alloc: /* * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. + * group_no and gdp correctly point to the last group visited. */ for (bgi = 0; bgi < ngroups; bgi++) { group_no++; @@ -2011,11 +2011,7 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, ext4_group_t group) { - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && - !ext4_group_sparse(group)) - return 0; - return EXT4_SB(sb)->s_gdb_count; + return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; } /** diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 575b5215c808..da18a74b966a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -782,14 +782,15 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); ext4_group_t block_group; int bit; - struct buffer_head *bitmap_bh = NULL; + struct buffer_head *bitmap_bh; struct inode *inode = NULL; + long err = -EIO; /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { ext4_warning(sb, __FUNCTION__, "bad orphan ino %lu! e2fsck was run?", ino); - goto out; + goto error; } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); @@ -798,38 +799,49 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) if (!bitmap_bh) { ext4_warning(sb, __FUNCTION__, "inode bitmap error for orphan %lu", ino); - goto out; + goto error; } /* Having the inode bit set should be a 100% indicator that this * is a valid orphan (no e2fsck run on fs). Orphans also include * inodes that were being truncated, so we can't check i_nlink==0. */ - if (!ext4_test_bit(bit, bitmap_bh->b_data) || - !(inode = iget(sb, ino)) || is_bad_inode(inode) || - NEXT_ORPHAN(inode) > max_ino) { - ext4_warning(sb, __FUNCTION__, - "bad orphan inode %lu! e2fsck was run?", ino); - printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", - bit, (unsigned long long)bitmap_bh->b_blocknr, - ext4_test_bit(bit, bitmap_bh->b_data)); - printk(KERN_NOTICE "inode=%p\n", inode); - if (inode) { - printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", - is_bad_inode(inode)); - printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", - NEXT_ORPHAN(inode)); - printk(KERN_NOTICE "max_ino=%lu\n", max_ino); - } + if (!ext4_test_bit(bit, bitmap_bh->b_data)) + goto bad_orphan; + + inode = ext4_iget(sb, ino); + if (IS_ERR(inode)) + goto iget_failed; + + if (NEXT_ORPHAN(inode) > max_ino) + goto bad_orphan; + brelse(bitmap_bh); + return inode; + +iget_failed: + err = PTR_ERR(inode); + inode = NULL; +bad_orphan: + ext4_warning(sb, __FUNCTION__, + "bad orphan inode %lu! e2fsck was run?", ino); + printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", + bit, (unsigned long long)bitmap_bh->b_blocknr, + ext4_test_bit(bit, bitmap_bh->b_data)); + printk(KERN_NOTICE "inode=%p\n", inode); + if (inode) { + printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", + is_bad_inode(inode)); + printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", + NEXT_ORPHAN(inode)); + printk(KERN_NOTICE "max_ino=%lu\n", max_ino); /* Avoid freeing blocks if we got a bad deleted inode */ - if (inode && inode->i_nlink == 0) + if (inode->i_nlink == 0) inode->i_blocks = 0; iput(inode); - inode = NULL; } -out: brelse(bitmap_bh); - return inode; +error: + return ERR_PTR(err); } unsigned long ext4_count_free_inodes (struct super_block * sb) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 05c4145dd27d..7dd9b50d5ebc 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -429,16 +429,13 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) * ext4_find_goal - find a prefered place for allocation. * @inode: owner * @block: block we want - * @chain: chain of indirect blocks * @partial: pointer to the last triple within a chain - * @goal: place to store the result. * * Normally this function find the prefered place for block allocation, - * stores it in *@goal and returns zero. + * returns it. */ - static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, - Indirect chain[4], Indirect *partial) + Indirect *partial) { struct ext4_block_alloc_info *block_i; @@ -839,7 +836,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) ext4_init_block_alloc_info(inode); - goal = ext4_find_goal(inode, iblock, chain, partial); + goal = ext4_find_goal(inode, iblock, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; @@ -895,7 +892,16 @@ out: return err; } -#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) +/* Maximum number of blocks we map for direct IO at once. */ +#define DIO_MAX_BLOCKS 4096 +/* + * Number of credits we need for writing DIO_MAX_BLOCKS: + * We need sb + group descriptor + bitmap + inode -> 4 + * For B blocks with A block pointers per block we need: + * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). + * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. + */ +#define DIO_CREDITS 25 int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, unsigned long max_blocks, struct buffer_head *bh, @@ -942,49 +948,31 @@ static int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { handle_t *handle = ext4_journal_current_handle(); - int ret = 0; + int ret = 0, started = 0; unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - if (!create) - goto get_block; /* A read */ - - if (max_blocks == 1) - goto get_block; /* A single block get */ - - if (handle->h_transaction->t_state == T_LOCKED) { - /* - * Huge direct-io writes can hold off commits for long - * periods of time. Let this commit run. - */ - ext4_journal_stop(handle); - handle = ext4_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) + if (create && !handle) { + /* Direct IO write... */ + if (max_blocks > DIO_MAX_BLOCKS) + max_blocks = DIO_MAX_BLOCKS; + handle = ext4_journal_start(inode, DIO_CREDITS + + 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto get_block; - } - - if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) { - /* - * Getting low on buffer credits... - */ - ret = ext4_journal_extend(handle, DIO_CREDITS); - if (ret > 0) { - /* - * Couldn't extend the transaction. Start a new one. - */ - ret = ext4_journal_restart(handle, DIO_CREDITS); + goto out; } + started = 1; } -get_block: - if (ret == 0) { - ret = ext4_get_blocks_wrap(handle, inode, iblock, + ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, bh_result, create, 0); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } + if (ret > 0) { + bh_result->b_size = (ret << inode->i_blkbits); + ret = 0; } + if (started) + ext4_journal_stop(handle); +out: return ret; } @@ -1674,7 +1662,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) * if the machine crashes during the write. * * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. + * crashes then stale disk data _may_ be exposed inside the file. But current + * VFS code falls back into buffered path in that case so we are safe. */ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, @@ -1683,7 +1672,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ext4_inode_info *ei = EXT4_I(inode); - handle_t *handle = NULL; + handle_t *handle; ssize_t ret; int orphan = 0; size_t count = iov_length(iov, nr_segs); @@ -1691,17 +1680,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, if (rw == WRITE) { loff_t final_size = offset + count; - handle = ext4_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } if (final_size > inode->i_size) { + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, 2); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } ret = ext4_orphan_add(handle, inode); - if (ret) - goto out_stop; + if (ret) { + ext4_journal_stop(handle); + goto out; + } orphan = 1; ei->i_disksize = inode->i_size; + ext4_journal_stop(handle); } } @@ -1709,18 +1702,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, offset, nr_segs, ext4_get_block, NULL); - /* - * Reacquire the handle: ext4_get_block() can restart the transaction - */ - handle = ext4_journal_current_handle(); - -out_stop: - if (handle) { + if (orphan) { int err; - if (orphan && inode->i_nlink) + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, 2); + if (IS_ERR(handle)) { + /* This is really bad luck. We've written the data + * but cannot extend i_size. Bail out and pretend + * the write failed... */ + ret = PTR_ERR(handle); + goto out; + } + if (inode->i_nlink) ext4_orphan_del(handle, inode); - if (orphan && ret > 0) { + if (ret > 0) { loff_t end = offset + ret; if (end > inode->i_size) { ei->i_disksize = end; @@ -2683,21 +2679,31 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, } } -void ext4_read_inode(struct inode * inode) +struct inode *ext4_iget(struct super_block *sb, unsigned long ino) { struct ext4_iloc iloc; struct ext4_inode *raw_inode; - struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_inode_info *ei; struct buffer_head *bh; + struct inode *inode; + long ret; int block; + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ei = EXT4_I(inode); #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL ei->i_acl = EXT4_ACL_NOT_CACHED; ei->i_default_acl = EXT4_ACL_NOT_CACHED; #endif ei->i_block_alloc_info = NULL; - if (__ext4_get_inode_loc(inode, &iloc, 0)) + ret = __ext4_get_inode_loc(inode, &iloc, 0); + if (ret < 0) goto bad_inode; bh = iloc.bh; raw_inode = ext4_raw_inode(&iloc); @@ -2723,6 +2729,7 @@ void ext4_read_inode(struct inode * inode) !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { /* this inode is deleted */ brelse (bh); + ret = -ESTALE; goto bad_inode; } /* The only unlinked inodes we let through here have @@ -2750,17 +2757,12 @@ void ext4_read_inode(struct inode * inode) ei->i_data[block] = raw_inode->i_block[block]; INIT_LIST_HEAD(&ei->i_orphan); - if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 && - EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { - /* - * When mke2fs creates big inodes it does not zero out - * the unused bytes above EXT4_GOOD_OLD_INODE_SIZE, - * so ignore those first few inodes. - */ + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > EXT4_INODE_SIZE(inode->i_sb)) { brelse (bh); + ret = -EIO; goto bad_inode; } if (ei->i_extra_isize == 0) { @@ -2814,11 +2816,12 @@ void ext4_read_inode(struct inode * inode) } brelse (iloc.bh); ext4_set_inode_flags(inode); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(ret); } static int ext4_inode_blocks_set(handle_t *handle, diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 76e5fedc0a0b..dd0fcfcb35ce 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -420,6 +420,7 @@ #define MB_DEFAULT_GROUP_PREALLOC 512 static struct kmem_cache *ext4_pspace_cachep; +static struct kmem_cache *ext4_ac_cachep; #ifdef EXT4_BB_MAX_BLOCKS #undef EXT4_BB_MAX_BLOCKS @@ -680,7 +681,6 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) { char *bb; - /* FIXME!! is this needed */ BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); BUG_ON(max == NULL); @@ -964,7 +964,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, grp->bb_fragments = fragments; if (free != grp->bb_free) { - printk(KERN_DEBUG + ext4_error(sb, __FUNCTION__, "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", group, free, grp->bb_free); grp->bb_free = free; @@ -1821,13 +1821,24 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, i = ext4_find_next_zero_bit(bitmap, EXT4_BLOCKS_PER_GROUP(sb), i); if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { - BUG_ON(free != 0); + /* + * IF we corrupt the bitmap we won't find any + * free blocks even though group info says we + * we have free blocks + */ + ext4_error(sb, __FUNCTION__, "%d free blocks as per " + "group info. But bitmap says 0\n", + free); break; } mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); BUG_ON(ex.fe_len <= 0); - BUG_ON(free < ex.fe_len); + if (free < ex.fe_len) { + ext4_error(sb, __FUNCTION__, "%d free blocks as per " + "group info. But got %d blocks\n", + free, ex.fe_len); + } ext4_mb_measure_extent(ac, &ex, e4b); @@ -2959,12 +2970,19 @@ int __init init_ext4_mballoc(void) if (ext4_pspace_cachep == NULL) return -ENOMEM; + ext4_ac_cachep = + kmem_cache_create("ext4_alloc_context", + sizeof(struct ext4_allocation_context), + 0, SLAB_RECLAIM_ACCOUNT, NULL); + if (ext4_ac_cachep == NULL) { + kmem_cache_destroy(ext4_pspace_cachep); + return -ENOMEM; + } #ifdef CONFIG_PROC_FS proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); if (proc_root_ext4 == NULL) printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); #endif - return 0; } @@ -2972,6 +2990,7 @@ void exit_ext4_mballoc(void) { /* XXX: synchronize_rcu(); */ kmem_cache_destroy(ext4_pspace_cachep); + kmem_cache_destroy(ext4_ac_cachep); #ifdef CONFIG_PROC_FS remove_proc_entry(EXT4_ROOT, proc_root_fs); #endif @@ -3069,7 +3088,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, out_err: sb->s_dirt = 1; - put_bh(bitmap_bh); + brelse(bitmap_bh); return err; } @@ -3354,13 +3373,10 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, ac->ac_pa = pa; /* we don't correct pa_pstart or pa_plen here to avoid - * possible race when tte group is being loaded concurrently + * possible race when the group is being loaded concurrently * instead we correct pa later, after blocks are marked - * in on-disk bitmap -- see ext4_mb_release_context() */ - /* - * FIXME!! but the other CPUs can look at this particular - * pa and think that it have enought free blocks if we - * don't update pa_free here right ? + * in on-disk bitmap -- see ext4_mb_release_context() + * Other CPUs are prevented from allocating from this pa by lg_mutex */ mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); } @@ -3699,7 +3715,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, struct ext4_prealloc_space *pa) { - struct ext4_allocation_context ac; + struct ext4_allocation_context *ac; struct super_block *sb = e4b->bd_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned long end; @@ -3715,9 +3731,13 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, BUG_ON(group != e4b->bd_group && pa->pa_len != 0); end = bit + pa->pa_len; - ac.ac_sb = sb; - ac.ac_inode = pa->pa_inode; - ac.ac_op = EXT4_MB_HISTORY_DISCARD; + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + + if (ac) { + ac->ac_sb = sb; + ac->ac_inode = pa->pa_inode; + ac->ac_op = EXT4_MB_HISTORY_DISCARD; + } while (bit < end) { bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit); @@ -3733,24 +3753,28 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, (unsigned) group); free += next - bit; - ac.ac_b_ex.fe_group = group; - ac.ac_b_ex.fe_start = bit; - ac.ac_b_ex.fe_len = next - bit; - ac.ac_b_ex.fe_logical = 0; - ext4_mb_store_history(&ac); + if (ac) { + ac->ac_b_ex.fe_group = group; + ac->ac_b_ex.fe_start = bit; + ac->ac_b_ex.fe_len = next - bit; + ac->ac_b_ex.fe_logical = 0; + ext4_mb_store_history(ac); + } mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); bit = next + 1; } if (free != pa->pa_free) { - printk(KERN_ERR "pa %p: logic %lu, phys. %lu, len %lu\n", + printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n", pa, (unsigned long) pa->pa_lstart, (unsigned long) pa->pa_pstart, (unsigned long) pa->pa_len); - printk(KERN_ERR "free %u, pa_free %u\n", free, pa->pa_free); + ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", + free, pa->pa_free); } - BUG_ON(free != pa->pa_free); atomic_add(free, &sbi->s_mb_discarded); + if (ac) + kmem_cache_free(ext4_ac_cachep, ac); return err; } @@ -3758,12 +3782,15 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, struct ext4_prealloc_space *pa) { - struct ext4_allocation_context ac; + struct ext4_allocation_context *ac; struct super_block *sb = e4b->bd_sb; ext4_group_t group; ext4_grpblk_t bit; - ac.ac_op = EXT4_MB_HISTORY_DISCARD; + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + + if (ac) + ac->ac_op = EXT4_MB_HISTORY_DISCARD; BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); @@ -3771,13 +3798,16 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); - ac.ac_sb = sb; - ac.ac_inode = NULL; - ac.ac_b_ex.fe_group = group; - ac.ac_b_ex.fe_start = bit; - ac.ac_b_ex.fe_len = pa->pa_len; - ac.ac_b_ex.fe_logical = 0; - ext4_mb_store_history(&ac); + if (ac) { + ac->ac_sb = sb; + ac->ac_inode = NULL; + ac->ac_b_ex.fe_group = group; + ac->ac_b_ex.fe_start = bit; + ac->ac_b_ex.fe_len = pa->pa_len; + ac->ac_b_ex.fe_logical = 0; + ext4_mb_store_history(ac); + kmem_cache_free(ext4_ac_cachep, ac); + } return 0; } @@ -4231,7 +4261,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, struct ext4_allocation_request *ar, int *errp) { - struct ext4_allocation_context ac; + struct ext4_allocation_context *ac = NULL; struct ext4_sb_info *sbi; struct super_block *sb; ext4_fsblk_t block = 0; @@ -4257,53 +4287,60 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, } inquota = ar->len; + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + if (!ac) { + *errp = -ENOMEM; + return 0; + } + ext4_mb_poll_new_transaction(sb, handle); - *errp = ext4_mb_initialize_context(&ac, ar); + *errp = ext4_mb_initialize_context(ac, ar); if (*errp) { ar->len = 0; goto out; } - ac.ac_op = EXT4_MB_HISTORY_PREALLOC; - if (!ext4_mb_use_preallocated(&ac)) { + ac->ac_op = EXT4_MB_HISTORY_PREALLOC; + if (!ext4_mb_use_preallocated(ac)) { - ac.ac_op = EXT4_MB_HISTORY_ALLOC; - ext4_mb_normalize_request(&ac, ar); + ac->ac_op = EXT4_MB_HISTORY_ALLOC; + ext4_mb_normalize_request(ac, ar); repeat: /* allocate space in core */ - ext4_mb_regular_allocator(&ac); + ext4_mb_regular_allocator(ac); /* as we've just preallocated more space than * user requested orinally, we store allocated * space in a special descriptor */ - if (ac.ac_status == AC_STATUS_FOUND && - ac.ac_o_ex.fe_len < ac.ac_b_ex.fe_len) - ext4_mb_new_preallocation(&ac); + if (ac->ac_status == AC_STATUS_FOUND && + ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) + ext4_mb_new_preallocation(ac); } - if (likely(ac.ac_status == AC_STATUS_FOUND)) { - ext4_mb_mark_diskspace_used(&ac, handle); + if (likely(ac->ac_status == AC_STATUS_FOUND)) { + ext4_mb_mark_diskspace_used(ac, handle); *errp = 0; - block = ext4_grp_offs_to_block(sb, &ac.ac_b_ex); - ar->len = ac.ac_b_ex.fe_len; + block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); + ar->len = ac->ac_b_ex.fe_len; } else { - freed = ext4_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len); + freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); if (freed) goto repeat; *errp = -ENOSPC; - ac.ac_b_ex.fe_len = 0; + ac->ac_b_ex.fe_len = 0; ar->len = 0; - ext4_mb_show_ac(&ac); + ext4_mb_show_ac(ac); } - ext4_mb_release_context(&ac); + ext4_mb_release_context(ac); out: if (ar->len < inquota) DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); + kmem_cache_free(ext4_ac_cachep, ac); return block; } static void ext4_mb_poll_new_transaction(struct super_block *sb, @@ -4405,9 +4442,9 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, unsigned long block, unsigned long count, int metadata, unsigned long *freed) { - struct buffer_head *bitmap_bh = 0; + struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; - struct ext4_allocation_context ac; + struct ext4_allocation_context *ac = NULL; struct ext4_group_desc *gdp; struct ext4_super_block *es; unsigned long overflow; @@ -4436,9 +4473,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, ext4_debug("freeing block %lu\n", block); - ac.ac_op = EXT4_MB_HISTORY_FREE; - ac.ac_inode = inode; - ac.ac_sb = sb; + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + if (ac) { + ac->ac_op = EXT4_MB_HISTORY_FREE; + ac->ac_inode = inode; + ac->ac_sb = sb; + } do_more: overflow = 0; @@ -4504,10 +4544,12 @@ do_more: BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); err = ext4_journal_dirty_metadata(handle, bitmap_bh); - ac.ac_b_ex.fe_group = block_group; - ac.ac_b_ex.fe_start = bit; - ac.ac_b_ex.fe_len = count; - ext4_mb_store_history(&ac); + if (ac) { + ac->ac_b_ex.fe_group = block_group; + ac->ac_b_ex.fe_start = bit; + ac->ac_b_ex.fe_len = count; + ext4_mb_store_history(ac); + } if (metadata) { /* blocks being freed are metadata. these blocks shouldn't @@ -4548,5 +4590,7 @@ do_more: error_return: brelse(bitmap_bh); ext4_std_error(sb, err); + if (ac) + kmem_cache_free(ext4_ac_cachep, ac); return; } diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 3ebc2332f52e..8c6c685b9d22 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -61,10 +61,9 @@ static int finish_range(handle_t *handle, struct inode *inode, retval = ext4_journal_restart(handle, needed); if (retval) goto err_out; - } - if (needed) { + } else if (needed) { retval = ext4_journal_extend(handle, needed); - if (retval != 0) { + if (retval) { /* * IF not able to extend the journal restart the journal */ @@ -220,6 +219,26 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode, } +static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) +{ + int retval = 0, needed; + + if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) + return 0; + /* + * We are freeing a blocks. During this we touch + * superblock, group descriptor and block bitmap. + * So allocate a credit of 3. We may update + * quota (user and group). + */ + needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); + + if (ext4_journal_extend(handle, needed) != 0) + retval = ext4_journal_restart(handle, needed); + + return retval; +} + static int free_dind_blocks(handle_t *handle, struct inode *inode, __le32 i_data) { @@ -234,11 +253,14 @@ static int free_dind_blocks(handle_t *handle, tmp_idata = (__le32 *)bh->b_data; for (i = 0; i < max_entries; i++) { - if (tmp_idata[i]) + if (tmp_idata[i]) { + extend_credit_for_blkdel(handle, inode); ext4_free_blocks(handle, inode, le32_to_cpu(tmp_idata[i]), 1, 1); + } } put_bh(bh); + extend_credit_for_blkdel(handle, inode); ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); return 0; } @@ -267,29 +289,32 @@ static int free_tind_blocks(handle_t *handle, } } put_bh(bh); + extend_credit_for_blkdel(handle, inode); ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); return 0; } -static int free_ind_block(handle_t *handle, struct inode *inode) +static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) { int retval; - struct ext4_inode_info *ei = EXT4_I(inode); - if (ei->i_data[EXT4_IND_BLOCK]) + /* ei->i_data[EXT4_IND_BLOCK] */ + if (i_data[0]) { + extend_credit_for_blkdel(handle, inode); ext4_free_blocks(handle, inode, - le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1); + le32_to_cpu(i_data[0]), 1, 1); + } - if (ei->i_data[EXT4_DIND_BLOCK]) { - retval = free_dind_blocks(handle, inode, - ei->i_data[EXT4_DIND_BLOCK]); + /* ei->i_data[EXT4_DIND_BLOCK] */ + if (i_data[1]) { + retval = free_dind_blocks(handle, inode, i_data[1]); if (retval) return retval; } - if (ei->i_data[EXT4_TIND_BLOCK]) { - retval = free_tind_blocks(handle, inode, - ei->i_data[EXT4_TIND_BLOCK]); + /* ei->i_data[EXT4_TIND_BLOCK] */ + if (i_data[2]) { + retval = free_tind_blocks(handle, inode, i_data[2]); if (retval) return retval; } @@ -297,15 +322,13 @@ static int free_ind_block(handle_t *handle, struct inode *inode) } static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, - struct inode *tmp_inode, int retval) + struct inode *tmp_inode) { + int retval; + __le32 i_data[3]; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); - retval = free_ind_block(handle, inode); - if (retval) - goto err_out; - /* * One credit accounted for writing the * i_data field of the original inode @@ -317,6 +340,11 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, goto err_out; } + i_data[0] = ei->i_data[EXT4_IND_BLOCK]; + i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; + i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; + + down_write(&EXT4_I(inode)->i_data_sem); /* * We have the extent map build with the tmp inode. * Now copy the i_data across @@ -336,8 +364,15 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, spin_lock(&inode->i_lock); inode->i_blocks += tmp_inode->i_blocks; spin_unlock(&inode->i_lock); + up_write(&EXT4_I(inode)->i_data_sem); + /* + * We mark the inode dirty after, because we decrement the + * i_blocks when freeing the indirect meta-data blocks + */ + retval = free_ind_block(handle, inode, i_data); ext4_mark_inode_dirty(handle, inode); + err_out: return retval; } @@ -365,6 +400,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, } } put_bh(bh); + extend_credit_for_blkdel(handle, inode); ext4_free_blocks(handle, inode, block, 1, 1); return retval; } @@ -414,7 +450,12 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) return -EINVAL; - down_write(&EXT4_I(inode)->i_data_sem); + if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) + /* + * don't migrate fast symlink + */ + return retval; + handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + @@ -448,13 +489,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, ext4_orphan_add(handle, tmp_inode); ext4_journal_stop(handle); - ei = EXT4_I(inode); - i_data = ei->i_data; - memset(&lb, 0, sizeof(lb)); - - /* 32 bit block address 4 bytes */ - max_entries = inode->i_sb->s_blocksize >> 2; - /* * start with one credit accounted for * superblock modification. @@ -463,7 +497,20 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, * trascation that created the inode. Later as and * when we add extents we extent the journal */ + /* + * inode_mutex prevent write and truncate on the file. Read still goes + * through. We take i_data_sem in ext4_ext_swap_inode_data before we + * switch the inode format to prevent read. + */ + mutex_lock(&(inode->i_mutex)); handle = ext4_journal_start(inode, 1); + + ei = EXT4_I(inode); + i_data = ei->i_data; + memset(&lb, 0, sizeof(lb)); + + /* 32 bit block address 4 bytes */ + max_entries = inode->i_sb->s_blocksize >> 2; for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { if (i_data[i]) { retval = update_extent_range(handle, tmp_inode, @@ -501,19 +548,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, */ retval = finish_range(handle, tmp_inode, &lb); err_out: - /* - * We are either freeing extent information or indirect - * blocks. During this we touch superblock, group descriptor - * and block bitmap. Later we mark the tmp_inode dirty - * via ext4_ext_tree_init. So allocate a credit of 4 - * We may update quota (user and group). - * - * FIXME!! we may be touching bitmaps in different block groups. - */ - if (ext4_journal_extend(handle, - 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) - ext4_journal_restart(handle, - 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); if (retval) /* * Failure case delete the extent information with the @@ -522,7 +556,11 @@ err_out: free_ext_block(handle, tmp_inode); else retval = ext4_ext_swap_inode_data(handle, inode, - tmp_inode, retval); + tmp_inode); + + /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ + if (ext4_journal_extend(handle, 1) != 0) + ext4_journal_restart(handle, 1); /* * Mark the tmp_inode as of size zero @@ -550,8 +588,7 @@ err_out: tmp_inode->i_nlink = 0; ext4_journal_stop(handle); - - up_write(&EXT4_I(inode)->i_data_sem); + mutex_unlock(&(inode->i_mutex)); if (tmp_inode) iput(tmp_inode); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 67b6d8a1ceff..a9347fb43bcc 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1039,17 +1039,11 @@ static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, str if (!ext4_valid_inum(dir->i_sb, ino)) { ext4_error(dir->i_sb, "ext4_lookup", "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(dir->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - - if (is_bad_inode(inode)) { - iput(inode); - return ERR_PTR(-ENOENT); + return ERR_PTR(-EIO); } + inode = ext4_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); } return d_splice_alias(inode, dentry); } @@ -1078,18 +1072,13 @@ struct dentry *ext4_get_parent(struct dentry *child) if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { ext4_error(child->d_inode->i_sb, "ext4_get_parent", "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(child->d_inode->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - - if (is_bad_inode(inode)) { - iput(inode); - return ERR_PTR(-ENOENT); + return ERR_PTR(-EIO); } + inode = ext4_iget(child->d_inode->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + parent = d_alloc_anon(inode); if (!parent) { iput(inode); @@ -2234,6 +2223,7 @@ retry: inode->i_op = &ext4_fast_symlink_inode_operations; memcpy((char*)&EXT4_I(inode)->i_data,symname,l); inode->i_size = l-1; + EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; } EXT4_I(inode)->i_disksize = inode->i_size; err = ext4_add_nondir(handle, dentry, inode); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 4fbba60816f4..9477a2bd6ff2 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -779,12 +779,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) "No reserved GDT blocks, can't resize"); return -EPERM; } - inode = iget(sb, EXT4_RESIZE_INO); - if (!inode || is_bad_inode(inode)) { + inode = ext4_iget(sb, EXT4_RESIZE_INO); + if (IS_ERR(inode)) { ext4_warning(sb, __FUNCTION__, "Error opening resize inode"); - iput(inode); - return -ENOENT; + return PTR_ERR(inode); } } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 055a0cd0168e..13383ba18f1d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -777,11 +777,10 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, * Currently we don't know the generation for parent directory, so * a generation of 0 means "accept any" */ - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + inode = ext4_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { iput(inode); return ERR_PTR(-ESTALE); } @@ -850,7 +849,6 @@ static struct quotactl_ops ext4_qctl_operations = { static const struct super_operations ext4_sops = { .alloc_inode = ext4_alloc_inode, .destroy_inode = ext4_destroy_inode, - .read_inode = ext4_read_inode, .write_inode = ext4_write_inode, .dirty_inode = ext4_dirty_inode, .delete_inode = ext4_delete_inode, @@ -1458,7 +1456,7 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, } /* Called at mount-time, super-block is locked */ -static int ext4_check_descriptors (struct super_block * sb) +static int ext4_check_descriptors(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); @@ -1466,8 +1464,6 @@ static int ext4_check_descriptors (struct super_block * sb) ext4_fsblk_t block_bitmap; ext4_fsblk_t inode_bitmap; ext4_fsblk_t inode_table; - struct ext4_group_desc * gdp = NULL; - int desc_block = 0; int flexbg_flag = 0; ext4_group_t i; @@ -1476,17 +1472,15 @@ static int ext4_check_descriptors (struct super_block * sb) ext4_debug ("Checking group descriptors"); - for (i = 0; i < sbi->s_groups_count; i++) - { + for (i = 0; i < sbi->s_groups_count; i++) { + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); + if (i == sbi->s_groups_count - 1 || flexbg_flag) last_block = ext4_blocks_count(sbi->s_es) - 1; else last_block = first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); - if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0) - gdp = (struct ext4_group_desc *) - sbi->s_group_desc[desc_block++]->b_data; block_bitmap = ext4_block_bitmap(sb, gdp); if (block_bitmap < first_block || block_bitmap > last_block) { @@ -1524,8 +1518,6 @@ static int ext4_check_descriptors (struct super_block * sb) } if (!flexbg_flag) first_block += EXT4_BLOCKS_PER_GROUP(sb); - gdp = (struct ext4_group_desc *) - ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); } ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); @@ -1811,6 +1803,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; + int ret = -EINVAL; int blocksize; int db_count; int i; @@ -1926,6 +1919,17 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) printk(KERN_WARNING "EXT4-fs warning: feature flags set on rev 0 fs, " "running e2fsck is recommended\n"); + + /* + * Since ext4 is still considered development code, we require + * that the TEST_FILESYS flag in s->flags be set. + */ + if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) { + printk(KERN_WARNING "EXT4-fs: %s: not marked " + "OK to use with test code.\n", sb->s_id); + goto failed_mount; + } + /* * Check feature flags regardless of the revision level, since we * previously didn't change the revision level when setting the flags, @@ -2243,19 +2247,24 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) * so we can safely mount the rest of the filesystem now. */ - root = iget(sb, EXT4_ROOT_INO); - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { + root = ext4_iget(sb, EXT4_ROOT_INO); + if (IS_ERR(root)) { printk(KERN_ERR "EXT4-fs: get root inode failed\n"); - iput(root); + ret = PTR_ERR(root); goto failed_mount4; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - dput(sb->s_root); - sb->s_root = NULL; + iput(root); printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); goto failed_mount4; } + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); + iput(root); + ret = -ENOMEM; + goto failed_mount4; + } ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -2336,7 +2345,7 @@ out_fail: sb->s_fs_info = NULL; kfree(sbi); lock_kernel(); - return -EINVAL; + return ret; } /* @@ -2372,8 +2381,8 @@ static journal_t *ext4_get_journal(struct super_block *sb, * things happen if we iget() an unused inode, as the subsequent * iput() will try to delete it. */ - journal_inode = iget(sb, journal_inum); - if (!journal_inode) { + journal_inode = ext4_iget(sb, journal_inum); + if (IS_ERR(journal_inode)) { printk(KERN_ERR "EXT4-fs: no journal found.\n"); return NULL; } @@ -2386,7 +2395,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", journal_inode, journal_inode->i_size); - if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { + if (!S_ISREG(journal_inode->i_mode)) { printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); iput(journal_inode); return NULL; @@ -3149,16 +3158,16 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (err) return err; /* Quotafile not on the same filesystem? */ - if (nd.mnt->mnt_sb != sb) { - path_release(&nd); + if (nd.path.mnt->mnt_sb != sb) { + path_put(&nd.path); return -EXDEV; } /* Quotafile not of fs root? */ - if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) printk(KERN_WARNING "EXT4-fs: Quota file not on filesystem root. " "Journalled quota will not work.\n"); - path_release(&nd); + path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path); } diff --git a/fs/fat/file.c b/fs/fat/file.c index 69a83b59dce8..c614175876e0 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -155,6 +155,42 @@ out: return err; } +static int check_mode(const struct msdos_sb_info *sbi, mode_t mode) +{ + mode_t req = mode & ~S_IFMT; + + /* + * Of the r and x bits, all (subject to umask) must be present. Of the + * w bits, either all (subject to umask) or none must be present. + */ + + if (S_ISREG(mode)) { + req &= ~sbi->options.fs_fmask; + + if ((req & (S_IRUGO | S_IXUGO)) != + ((S_IRUGO | S_IXUGO) & ~sbi->options.fs_fmask)) + return -EPERM; + + if ((req & S_IWUGO) != 0 && + (req & S_IWUGO) != (S_IWUGO & ~sbi->options.fs_fmask)) + return -EPERM; + } else if (S_ISDIR(mode)) { + req &= ~sbi->options.fs_dmask; + + if ((req & (S_IRUGO | S_IXUGO)) != + ((S_IRUGO | S_IXUGO) & ~sbi->options.fs_dmask)) + return -EPERM; + + if ((req & S_IWUGO) != 0 && + (req & S_IWUGO) != (S_IWUGO & ~sbi->options.fs_dmask)) + return -EPERM; + } else { + return -EPERM; + } + + return 0; +} + int fat_notify_change(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); @@ -186,9 +222,7 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr) if (((attr->ia_valid & ATTR_UID) && (attr->ia_uid != sbi->options.fs_uid)) || ((attr->ia_valid & ATTR_GID) && - (attr->ia_gid != sbi->options.fs_gid)) || - ((attr->ia_valid & ATTR_MODE) && - (attr->ia_mode & ~MSDOS_VALID_MODE))) + (attr->ia_gid != sbi->options.fs_gid))) error = -EPERM; if (error) { @@ -196,6 +230,13 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr) error = 0; goto out; } + + if (attr->ia_valid & ATTR_MODE) { + error = check_mode(sbi, attr->ia_mode); + if (error != 0 && !sbi->options.quiet) + goto out; + } + error = inode_setattr(inode, attr); if (error) goto out; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 920a576e1c25..53f3cf62b7c1 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -634,8 +634,6 @@ static const struct super_operations fat_sops = { .clear_inode = fat_clear_inode, .remount_fs = fat_remount, - .read_inode = make_bad_inode, - .show_options = fat_show_options, }; @@ -663,8 +661,8 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, if (fh_len < 5 || fh_type != 3) return NULL; - inode = iget(sb, fh[0]); - if (!inode || is_bad_inode(inode) || inode->i_generation != fh[1]) { + inode = ilookup(sb, fh[0]); + if (!inode || inode->i_generation != fh[1]) { if (inode) iput(inode); inode = NULL; @@ -760,7 +758,7 @@ static struct dentry *fat_get_parent(struct dentry *child) inode = fat_build_inode(child->d_sb, de, i_pos); brelse(bh); if (IS_ERR(inode)) { - parent = ERR_PTR(PTR_ERR(inode)); + parent = ERR_CAST(inode); goto out; } parent = d_alloc_anon(inode); @@ -839,6 +837,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) if (!opts->numtail) seq_puts(m, ",nonumtail"); } + if (sbi->options.flush) + seq_puts(m, ",flush"); return 0; } @@ -1295,10 +1295,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data; if (!IS_FSINFO(fsinfo)) { - printk(KERN_WARNING - "FAT: Did not find valid FSINFO signature.\n" - " Found signature1 0x%08x signature2 0x%08x" - " (sector = %lu)\n", + printk(KERN_WARNING "FAT: Invalid FSINFO signature: " + "0x%08x, 0x%08x (sector = %lu)\n", le32_to_cpu(fsinfo->signature1), le32_to_cpu(fsinfo->signature2), sbi->fsinfo_sector); diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 308f2b6b5026..61f23511eacf 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -55,9 +55,8 @@ void fat_clusters_flush(struct super_block *sb) fsinfo = (struct fat_boot_fsinfo *)bh->b_data; /* Sanity check */ if (!IS_FSINFO(fsinfo)) { - printk(KERN_ERR "FAT: Did not find valid FSINFO signature.\n" - " Found signature1 0x%08x signature2 0x%08x" - " (sector = %lu)\n", + printk(KERN_ERR "FAT: Invalid FSINFO signature: " + "0x%08x, 0x%08x (sector = %lu)\n", le32_to_cpu(fsinfo->signature1), le32_to_cpu(fsinfo->signature2), sbi->fsinfo_sector); diff --git a/fs/fcntl.c b/fs/fcntl.c index 8685263ccc4a..e632da761fc1 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -24,7 +24,7 @@ #include <asm/siginfo.h> #include <asm/uaccess.h> -void fastcall set_close_on_exec(unsigned int fd, int flag) +void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; struct fdtable *fdt; @@ -309,7 +309,7 @@ pid_t f_getown(struct file *filp) { pid_t pid; read_lock(&filp->f_owner.lock); - pid = pid_nr_ns(filp->f_owner.pid, current->nsproxy->pid_ns); + pid = pid_vnr(filp->f_owner.pid); if (filp->f_owner.pid_type == PIDTYPE_PGID) pid = -pid; read_unlock(&filp->f_owner.lock); diff --git a/fs/file.c b/fs/file.c index c5575de01113..5110acb1c9ef 100644 --- a/fs/file.c +++ b/fs/file.c @@ -24,6 +24,8 @@ struct fdtable_defer { struct fdtable *next; }; +int sysctl_nr_open __read_mostly = 1024*1024; + /* * We use this list to defer free fdtables that have vmalloced * sets/arrays. By keeping a per-cpu list, we avoid having to embed @@ -147,8 +149,8 @@ static struct fdtable * alloc_fdtable(unsigned int nr) nr /= (1024 / sizeof(struct file *)); nr = roundup_pow_of_two(nr + 1); nr *= (1024 / sizeof(struct file *)); - if (nr > NR_OPEN) - nr = NR_OPEN; + if (nr > sysctl_nr_open) + nr = sysctl_nr_open; fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); if (!fdt) @@ -233,7 +235,7 @@ int expand_files(struct files_struct *files, int nr) if (nr < fdt->max_fds) return 0; /* Can we expand? */ - if (nr >= NR_OPEN) + if (nr >= sysctl_nr_open) return -EMFILE; /* All good, so we try */ diff --git a/fs/file_table.c b/fs/file_table.c index 664e3f2309b8..6d27befe2d48 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -197,7 +197,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, } EXPORT_SYMBOL(init_file); -void fastcall fput(struct file *file) +void fput(struct file *file) { if (atomic_dec_and_test(&file->f_count)) __fput(file); @@ -208,7 +208,7 @@ EXPORT_SYMBOL(fput); /* __fput is called from task context when aio completion releases the last * last use of a struct file *. Do not use otherwise. */ -void fastcall __fput(struct file *file) +void __fput(struct file *file) { struct dentry *dentry = file->f_path.dentry; struct vfsmount *mnt = file->f_path.mnt; @@ -241,7 +241,7 @@ void fastcall __fput(struct file *file) mntput(mnt); } -struct file fastcall *fget(unsigned int fd) +struct file *fget(unsigned int fd) { struct file *file; struct files_struct *files = current->files; @@ -269,7 +269,7 @@ EXPORT_SYMBOL(fget); * and a flag is returned to be passed to the corresponding fput_light(). * There must not be a cloning between an fget_light/fput_light pair. */ -struct file fastcall *fget_light(unsigned int fd, int *fput_needed) +struct file *fget_light(unsigned int fd, int *fput_needed) { struct file *file; struct files_struct *files = current->files; diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h index 91ccee8723f7..2b46064f66b2 100644 --- a/fs/freevxfs/vxfs_extern.h +++ b/fs/freevxfs/vxfs_extern.h @@ -58,7 +58,7 @@ extern struct inode * vxfs_get_fake_inode(struct super_block *, extern void vxfs_put_fake_inode(struct inode *); extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t); extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t); -extern void vxfs_read_inode(struct inode *); +extern struct inode * vxfs_iget(struct super_block *, ino_t); extern void vxfs_clear_inode(struct inode *); /* vxfs_lookup.c */ diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index d1f7c5b5b3c3..ad88d2364bc2 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -129,7 +129,7 @@ fail: * Description: * Search the for inode number @ino in the filesystem * described by @sbp. Use the specified inode table (@ilistp). - * Returns the matching VxFS inode on success, else a NULL pointer. + * Returns the matching VxFS inode on success, else an error code. */ static struct vxfs_inode_info * __vxfs_iget(ino_t ino, struct inode *ilistp) @@ -157,12 +157,12 @@ __vxfs_iget(ino_t ino, struct inode *ilistp) } printk(KERN_WARNING "vxfs: error on page %p\n", pp); - return NULL; + return ERR_CAST(pp); fail: printk(KERN_WARNING "vxfs: unable to read inode %ld\n", (unsigned long)ino); vxfs_put_page(pp); - return NULL; + return ERR_PTR(-ENOMEM); } /** @@ -178,7 +178,10 @@ fail: struct vxfs_inode_info * vxfs_stiget(struct super_block *sbp, ino_t ino) { - return __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_stilist); + struct vxfs_inode_info *vip; + + vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_stilist); + return IS_ERR(vip) ? NULL : vip; } /** @@ -282,23 +285,32 @@ vxfs_put_fake_inode(struct inode *ip) } /** - * vxfs_read_inode - fill in inode information - * @ip: inode pointer to fill + * vxfs_iget - get an inode + * @sbp: the superblock to get the inode for + * @ino: the number of the inode to get * * Description: - * vxfs_read_inode reads the disk inode for @ip and fills - * in all relevant fields in @ip. + * vxfs_read_inode creates an inode, reads the disk inode for @ino and fills + * in all relevant fields in the new inode. */ -void -vxfs_read_inode(struct inode *ip) +struct inode * +vxfs_iget(struct super_block *sbp, ino_t ino) { - struct super_block *sbp = ip->i_sb; struct vxfs_inode_info *vip; const struct address_space_operations *aops; - ino_t ino = ip->i_ino; - - if (!(vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_ilist))) - return; + struct inode *ip; + + ip = iget_locked(sbp, ino); + if (!ip) + return ERR_PTR(-ENOMEM); + if (!(ip->i_state & I_NEW)) + return ip; + + vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_ilist); + if (IS_ERR(vip)) { + iget_failed(ip); + return ERR_CAST(vip); + } vxfs_iinit(ip, vip); @@ -323,7 +335,8 @@ vxfs_read_inode(struct inode *ip) } else init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev)); - return; + unlock_new_inode(ip); + return ip; } /** diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index bf86e5444ea6..aee049cb9f84 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -213,10 +213,10 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, struct nameidata *nd) lock_kernel(); ino = vxfs_inode_by_name(dip, dp); if (ino) { - ip = iget(dip->i_sb, ino); - if (!ip) { + ip = vxfs_iget(dip->i_sb, ino); + if (IS_ERR(ip)) { unlock_kernel(); - return ERR_PTR(-EACCES); + return ERR_CAST(ip); } } unlock_kernel(); diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 4f95572d2722..1dacda831577 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -60,7 +60,6 @@ static int vxfs_statfs(struct dentry *, struct kstatfs *); static int vxfs_remount(struct super_block *, int *, char *); static const struct super_operations vxfs_super_ops = { - .read_inode = vxfs_read_inode, .clear_inode = vxfs_clear_inode, .put_super = vxfs_put_super, .statfs = vxfs_statfs, @@ -153,6 +152,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) struct buffer_head *bp = NULL; u_long bsize; struct inode *root; + int ret = -EINVAL; sbp->s_flags |= MS_RDONLY; @@ -219,7 +219,11 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) } sbp->s_op = &vxfs_super_ops; - root = iget(sbp, VXFS_ROOT_INO); + root = vxfs_iget(sbp, VXFS_ROOT_INO); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto out; + } sbp->s_root = d_alloc_root(root); if (!sbp->s_root) { iput(root); @@ -236,7 +240,7 @@ out_free_ilist: out: brelse(bp); kfree(infp); - return -EINVAL; + return ret; } /* diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 0b3064079fa5..c0076077d338 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -515,8 +515,7 @@ writeback_inodes(struct writeback_control *wbc) might_sleep(); spin_lock(&sb_lock); restart: - sb = sb_entry(super_blocks.prev); - for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { + list_for_each_entry_reverse(sb, &super_blocks, s_list) { if (sb_has_dirty_inodes(sb)) { /* we're making our own get_super here */ sb->s_count++; @@ -581,10 +580,8 @@ static void set_sb_syncing(int val) { struct super_block *sb; spin_lock(&sb_lock); - sb = sb_entry(super_blocks.prev); - for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { + list_for_each_entry_reverse(sb, &super_blocks, s_list) sb->s_syncing = val; - } spin_unlock(&sb_lock); } @@ -658,7 +655,7 @@ int write_inode_now(struct inode *inode, int sync) int ret; struct writeback_control wbc = { .nr_to_write = LONG_MAX, - .sync_mode = WB_SYNC_ALL, + .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, .range_start = 0, .range_end = LLONG_MAX, }; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index db534bcde45f..af639807524e 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -201,6 +201,55 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) } } +static unsigned len_args(unsigned numargs, struct fuse_arg *args) +{ + unsigned nbytes = 0; + unsigned i; + + for (i = 0; i < numargs; i++) + nbytes += args[i].size; + + return nbytes; +} + +static u64 fuse_get_unique(struct fuse_conn *fc) +{ + fc->reqctr++; + /* zero is special */ + if (fc->reqctr == 0) + fc->reqctr = 1; + + return fc->reqctr; +} + +static void queue_request(struct fuse_conn *fc, struct fuse_req *req) +{ + req->in.h.unique = fuse_get_unique(fc); + req->in.h.len = sizeof(struct fuse_in_header) + + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); + list_add_tail(&req->list, &fc->pending); + req->state = FUSE_REQ_PENDING; + if (!req->waiting) { + req->waiting = 1; + atomic_inc(&fc->num_waiting); + } + wake_up(&fc->waitq); + kill_fasync(&fc->fasync, SIGIO, POLL_IN); +} + +static void flush_bg_queue(struct fuse_conn *fc) +{ + while (fc->active_background < FUSE_MAX_BACKGROUND && + !list_empty(&fc->bg_queue)) { + struct fuse_req *req; + + req = list_entry(fc->bg_queue.next, struct fuse_req, list); + list_del(&req->list); + fc->active_background++; + queue_request(fc, req); + } +} + /* * This function is called when a request is finished. Either a reply * has arrived or it was aborted (and not yet sent) or some error @@ -229,6 +278,8 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) clear_bdi_congested(&fc->bdi, WRITE); } fc->num_background--; + fc->active_background--; + flush_bg_queue(fc); } spin_unlock(&fc->lock); wake_up(&req->waitq); @@ -320,42 +371,6 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) } } -static unsigned len_args(unsigned numargs, struct fuse_arg *args) -{ - unsigned nbytes = 0; - unsigned i; - - for (i = 0; i < numargs; i++) - nbytes += args[i].size; - - return nbytes; -} - -static u64 fuse_get_unique(struct fuse_conn *fc) - { - fc->reqctr++; - /* zero is special */ - if (fc->reqctr == 0) - fc->reqctr = 1; - - return fc->reqctr; -} - -static void queue_request(struct fuse_conn *fc, struct fuse_req *req) -{ - req->in.h.unique = fuse_get_unique(fc); - req->in.h.len = sizeof(struct fuse_in_header) + - len_args(req->in.numargs, (struct fuse_arg *) req->in.args); - list_add_tail(&req->list, &fc->pending); - req->state = FUSE_REQ_PENDING; - if (!req->waiting) { - req->waiting = 1; - atomic_inc(&fc->num_waiting); - } - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); -} - void request_send(struct fuse_conn *fc, struct fuse_req *req) { req->isreply = 1; @@ -375,20 +390,26 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req) spin_unlock(&fc->lock); } +static void request_send_nowait_locked(struct fuse_conn *fc, + struct fuse_req *req) +{ + req->background = 1; + fc->num_background++; + if (fc->num_background == FUSE_MAX_BACKGROUND) + fc->blocked = 1; + if (fc->num_background == FUSE_CONGESTION_THRESHOLD) { + set_bdi_congested(&fc->bdi, READ); + set_bdi_congested(&fc->bdi, WRITE); + } + list_add_tail(&req->list, &fc->bg_queue); + flush_bg_queue(fc); +} + static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) { spin_lock(&fc->lock); if (fc->connected) { - req->background = 1; - fc->num_background++; - if (fc->num_background == FUSE_MAX_BACKGROUND) - fc->blocked = 1; - if (fc->num_background == FUSE_CONGESTION_THRESHOLD) { - set_bdi_congested(&fc->bdi, READ); - set_bdi_congested(&fc->bdi, WRITE); - } - - queue_request(fc, req); + request_send_nowait_locked(fc, req); spin_unlock(&fc->lock); } else { req->out.h.error = -ENOTCONN; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 80d2f5292cf9..7fb514b6d852 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -269,12 +269,12 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, req = fuse_get_req(fc); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); forget_req = fuse_get_req(fc); if (IS_ERR(forget_req)) { fuse_put_request(fc, req); - return ERR_PTR(PTR_ERR(forget_req)); + return ERR_CAST(forget_req); } attr_version = fuse_get_attr_version(fc); @@ -416,6 +416,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, fuse_put_request(fc, forget_req); d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); + fuse_invalidate_attr(dir); file = lookup_instantiate_filp(nd, entry, generic_file_open); if (IS_ERR(file)) { ff->fh = outopen.fh; @@ -1005,7 +1006,7 @@ static char *read_link(struct dentry *dentry) char *link; if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); link = (char *) __get_free_page(GFP_KERNEL); if (!link) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index bb05d227cf30..676b0bc8a86d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -77,8 +77,8 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff) static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) { - dput(req->dentry); - mntput(req->vfsmount); + dput(req->misc.release.dentry); + mntput(req->misc.release.vfsmount); fuse_put_request(fc, req); } @@ -86,7 +86,8 @@ static void fuse_file_put(struct fuse_file *ff) { if (atomic_dec_and_test(&ff->count)) { struct fuse_req *req = ff->reserved_req; - struct fuse_conn *fc = get_fuse_conn(req->dentry->d_inode); + struct inode *inode = req->misc.release.dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); req->end = fuse_release_end; request_send_background(fc, req); kfree(ff); @@ -137,7 +138,7 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode) { struct fuse_req *req = ff->reserved_req; - struct fuse_release_in *inarg = &req->misc.release_in; + struct fuse_release_in *inarg = &req->misc.release.in; inarg->fh = ff->fh; inarg->flags = flags; @@ -153,13 +154,14 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir) struct fuse_file *ff = file->private_data; if (ff) { struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req = ff->reserved_req; fuse_release_fill(ff, get_node_id(inode), file->f_flags, isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); /* Hold vfsmount and dentry until release is finished */ - ff->reserved_req->vfsmount = mntget(file->f_path.mnt); - ff->reserved_req->dentry = dget(file->f_path.dentry); + req->misc.release.vfsmount = mntget(file->f_path.mnt); + req->misc.release.dentry = dget(file->f_path.dentry); spin_lock(&fc->lock); list_del(&ff->write_entry); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 3ab8a3048e8b..67aaf6ee38ea 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -215,7 +215,11 @@ struct fuse_req { /** Data for asynchronous requests */ union { struct fuse_forget_in forget_in; - struct fuse_release_in release_in; + struct { + struct fuse_release_in in; + struct vfsmount *vfsmount; + struct dentry *dentry; + } release; struct fuse_init_in init_in; struct fuse_init_out init_out; struct fuse_read_in read_in; @@ -238,12 +242,6 @@ struct fuse_req { /** File used in the request (or NULL) */ struct fuse_file *ff; - /** vfsmount used in release */ - struct vfsmount *vfsmount; - - /** dentry used in release */ - struct dentry *dentry; - /** Request completion callback */ void (*end)(struct fuse_conn *, struct fuse_req *); @@ -298,6 +296,12 @@ struct fuse_conn { /** Number of requests currently in the background */ unsigned num_background; + /** Number of background requests currently queued for userspace */ + unsigned active_background; + + /** The list of background requests set aside for later queuing */ + struct list_head bg_queue; + /** Pending interrupts */ struct list_head interrupts; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e5e80d1a4687..033f7bdd47e8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -29,6 +29,8 @@ DEFINE_MUTEX(fuse_mutex); #define FUSE_SUPER_MAGIC 0x65735546 +#define FUSE_DEFAULT_BLKSIZE 512 + struct fuse_mount_data { int fd; unsigned rootmode; @@ -76,11 +78,6 @@ static void fuse_destroy_inode(struct inode *inode) kmem_cache_free(fuse_inode_cachep, inode); } -static void fuse_read_inode(struct inode *inode) -{ - /* No op */ -} - void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, unsigned long nodeid, u64 nlookup) { @@ -360,7 +357,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) char *p; memset(d, 0, sizeof(struct fuse_mount_data)); d->max_read = ~0; - d->blksize = 512; + d->blksize = FUSE_DEFAULT_BLKSIZE; while ((p = strsep(&opt, ",")) != NULL) { int token; @@ -445,6 +442,9 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, ",allow_other"); if (fc->max_read != ~0) seq_printf(m, ",max_read=%u", fc->max_read); + if (mnt->mnt_sb->s_bdev && + mnt->mnt_sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) + seq_printf(m, ",blksize=%lu", mnt->mnt_sb->s_blocksize); return 0; } @@ -465,6 +465,7 @@ static struct fuse_conn *new_conn(void) INIT_LIST_HEAD(&fc->processing); INIT_LIST_HEAD(&fc->io); INIT_LIST_HEAD(&fc->interrupts); + INIT_LIST_HEAD(&fc->bg_queue); atomic_set(&fc->num_waiting, 0); fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; fc->bdi.unplug_io_fn = default_unplug_io_fn; @@ -514,7 +515,6 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode) static const struct super_operations fuse_super_operations = { .alloc_inode = fuse_alloc_inode, .destroy_inode = fuse_destroy_inode, - .read_inode = fuse_read_inode, .clear_inode = fuse_clear_inode, .drop_inode = generic_delete_inode, .remount_fs = fuse_remount_fs, diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 57e2ed932adc..c34709512b19 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1498,7 +1498,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); if (dent) { if (IS_ERR(dent)) - return ERR_PTR(PTR_ERR(dent)); + return ERR_CAST(dent); inode = gfs2_inode_lookup(dir->i_sb, be16_to_cpu(dent->de_type), be64_to_cpu(dent->de_inum.no_addr), diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 80e09c50590a..7175a4d06435 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -334,7 +334,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_state = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; gl->gl_hash = hash; - gl->gl_owner_pid = 0; + gl->gl_owner_pid = NULL; gl->gl_ip = 0; gl->gl_ops = glops; gl->gl_req_gh = NULL; @@ -399,7 +399,7 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, INIT_LIST_HEAD(&gh->gh_list); gh->gh_gl = gl; gh->gh_ip = (unsigned long)__builtin_return_address(0); - gh->gh_owner_pid = current->pid; + gh->gh_owner_pid = get_pid(task_pid(current)); gh->gh_state = state; gh->gh_flags = flags; gh->gh_error = 0; @@ -433,6 +433,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * void gfs2_holder_uninit(struct gfs2_holder *gh) { + put_pid(gh->gh_owner_pid); gfs2_glock_put(gh->gh_gl); gh->gh_gl = NULL; gh->gh_ip = 0; @@ -631,7 +632,7 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) wait_on_holder(&gh); gfs2_holder_uninit(&gh); } else { - gl->gl_owner_pid = current->pid; + gl->gl_owner_pid = get_pid(task_pid(current)); gl->gl_ip = (unsigned long)__builtin_return_address(0); spin_unlock(&gl->gl_spin); } @@ -652,7 +653,7 @@ static int gfs2_glmutex_trylock(struct gfs2_glock *gl) if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { acquired = 0; } else { - gl->gl_owner_pid = current->pid; + gl->gl_owner_pid = get_pid(task_pid(current)); gl->gl_ip = (unsigned long)__builtin_return_address(0); } spin_unlock(&gl->gl_spin); @@ -668,12 +669,17 @@ static int gfs2_glmutex_trylock(struct gfs2_glock *gl) static void gfs2_glmutex_unlock(struct gfs2_glock *gl) { + struct pid *pid; + spin_lock(&gl->gl_spin); clear_bit(GLF_LOCK, &gl->gl_flags); - gl->gl_owner_pid = 0; + pid = gl->gl_owner_pid; + gl->gl_owner_pid = NULL; gl->gl_ip = 0; run_queue(gl); spin_unlock(&gl->gl_spin); + + put_pid(pid); } /** @@ -1045,7 +1051,7 @@ static int glock_wait_internal(struct gfs2_holder *gh) } static inline struct gfs2_holder * -find_holder_by_owner(struct list_head *head, pid_t pid) +find_holder_by_owner(struct list_head *head, struct pid *pid) { struct gfs2_holder *gh; @@ -1082,7 +1088,7 @@ static void add_to_queue(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; struct gfs2_holder *existing; - BUG_ON(!gh->gh_owner_pid); + BUG_ON(gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) BUG(); @@ -1092,12 +1098,14 @@ static void add_to_queue(struct gfs2_holder *gh) if (existing) { print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); - printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); + printk(KERN_INFO "pid : %d\n", + pid_nr(existing->gh_owner_pid)); printk(KERN_INFO "lock type : %d lock state : %d\n", existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); + printk(KERN_INFO "pid : %d\n", + pid_nr(gh->gh_owner_pid)); printk(KERN_INFO "lock type : %d lock state : %d\n", gl->gl_name.ln_type, gl->gl_state); BUG(); @@ -1798,8 +1806,9 @@ static int dump_holder(struct glock_iter *gi, char *str, print_dbg(gi, " %s\n", str); if (gh->gh_owner_pid) { - print_dbg(gi, " owner = %ld ", (long)gh->gh_owner_pid); - gh_owner = find_task_by_pid(gh->gh_owner_pid); + print_dbg(gi, " owner = %ld ", + (long)pid_nr(gh->gh_owner_pid)); + gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); if (gh_owner) print_dbg(gi, "(%s)\n", gh_owner->comm); else @@ -1877,13 +1886,13 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); print_dbg(gi, " gl_state = %u\n", gl->gl_state); if (gl->gl_owner_pid) { - gl_owner = find_task_by_pid(gl->gl_owner_pid); + gl_owner = pid_task(gl->gl_owner_pid, PIDTYPE_PID); if (gl_owner) print_dbg(gi, " gl_owner = pid %d (%s)\n", - gl->gl_owner_pid, gl_owner->comm); + pid_nr(gl->gl_owner_pid), gl_owner->comm); else print_dbg(gi, " gl_owner = %d (ended)\n", - gl->gl_owner_pid); + pid_nr(gl->gl_owner_pid)); } else print_dbg(gi, " gl_owner = -1\n"); print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index b16f604eea9f..2f9c6d136b37 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -36,11 +36,13 @@ static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) { struct gfs2_holder *gh; int locked = 0; + struct pid *pid; /* Look in glock's list of holders for one with current task as owner */ spin_lock(&gl->gl_spin); + pid = task_pid(current); list_for_each_entry(gh, &gl->gl_holders, gh_list) { - if (gh->gh_owner_pid == current->pid) { + if (gh->gh_owner_pid == pid) { locked = 1; break; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 513aaf0dc0ab..525dcae352d6 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -151,7 +151,7 @@ struct gfs2_holder { struct list_head gh_list; struct gfs2_glock *gh_gl; - pid_t gh_owner_pid; + struct pid *gh_owner_pid; unsigned int gh_state; unsigned gh_flags; @@ -182,7 +182,7 @@ struct gfs2_glock { unsigned int gl_hash; unsigned int gl_demote_state; /* state requested by remote node */ unsigned long gl_demote_time; /* time of first demote request */ - pid_t gl_owner_pid; + struct pid *gl_owner_pid; unsigned long gl_ip; struct list_head gl_holders; struct list_head gl_waiters1; /* HIF_MUTEX */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 728d3169e7bd..37725ade3c51 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -240,7 +240,7 @@ fail_put: ip->i_gl->gl_object = NULL; gfs2_glock_put(ip->i_gl); fail: - iput(inode); + iget_failed(inode); return ERR_PTR(error); } diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index b9da62348a87..334c7f85351b 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -143,7 +143,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) * have to return that as a(n invalid) pointer to dentry. */ if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); dentry = d_alloc_anon(inode); if (!dentry) { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 43d511bba52d..4bee6aa845e4 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -884,12 +884,13 @@ static struct super_block* get_gfs2_sb(const char *dev_name) dev_name); goto out; } - error = vfs_getattr(nd.mnt, nd.dentry, &stat); + error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); fstype = get_fs_type("gfs2"); list_for_each_entry(s, &fstype->fs_supers, s_instances) { if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || - (S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) { + (S_ISDIR(stat.mode) && + s == nd.path.dentry->d_inode->i_sb)) { sb = s; goto free_nd; } @@ -899,7 +900,7 @@ static struct super_block* get_gfs2_sb(const char *dev_name) "mount point %s\n", dev_name); free_nd: - path_release(&nd); + path_put(&nd.path); out: return sb; } diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 9f71372c1757..e87412902bed 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -111,7 +111,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); if (inode && IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); if (inode) { struct gfs2_glock *gl = GFS2_I(inode)->i_gl; diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index f8452a0eab56..4129cdb3f0d8 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c @@ -52,9 +52,9 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) rec = (e + b) / 2; len = hfs_brec_lenoff(bnode, rec, &off); keylen = hfs_brec_keylen(bnode, rec); - if (keylen == HFS_BAD_KEYLEN) { + if (keylen == 0) { res = -EINVAL; - goto done; + goto fail; } hfs_bnode_read(bnode, fd->key, off, keylen); cmpval = bnode->tree->keycmp(fd->key, fd->search_key); @@ -71,9 +71,9 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) if (rec != e && e >= 0) { len = hfs_brec_lenoff(bnode, e, &off); keylen = hfs_brec_keylen(bnode, e); - if (keylen == HFS_BAD_KEYLEN) { + if (keylen == 0) { res = -EINVAL; - goto done; + goto fail; } hfs_bnode_read(bnode, fd->key, off, keylen); } @@ -83,6 +83,7 @@ done: fd->keylength = keylen; fd->entryoffset = off + keylen; fd->entrylength = len - keylen; +fail: return res; } @@ -206,7 +207,7 @@ int hfs_brec_goto(struct hfs_find_data *fd, int cnt) len = hfs_brec_lenoff(bnode, fd->record, &off); keylen = hfs_brec_keylen(bnode, fd->record); - if (keylen == HFS_BAD_KEYLEN) { + if (keylen == 0) { res = -EINVAL; goto out; } diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 8626ee375ea8..878bf25dbc6a 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -49,14 +49,14 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) if (retval > node->tree->max_key_len + 2) { printk(KERN_ERR "hfs: keylen %d too large\n", retval); - retval = HFS_BAD_KEYLEN; + retval = 0; } } else { retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1; if (retval > node->tree->max_key_len + 1) { printk(KERN_ERR "hfs: keylen %d too large\n", retval); - retval = HFS_BAD_KEYLEN; + retval = 0; } } } diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 110dd3515dc8..24cf6fc43021 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -81,15 +81,23 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke goto fail_page; if (!tree->node_count) goto fail_page; - if ((id == HFS_EXT_CNID) && (tree->max_key_len != HFS_MAX_EXT_KEYLEN)) { - printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", - tree->max_key_len); - goto fail_page; - } - if ((id == HFS_CAT_CNID) && (tree->max_key_len != HFS_MAX_CAT_KEYLEN)) { - printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", - tree->max_key_len); - goto fail_page; + switch (id) { + case HFS_EXT_CNID: + if (tree->max_key_len != HFS_MAX_EXT_KEYLEN) { + printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", + tree->max_key_len); + goto fail_page; + } + break; + case HFS_CAT_CNID: + if (tree->max_key_len != HFS_MAX_CAT_KEYLEN) { + printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", + tree->max_key_len); + goto fail_page; + } + break; + default: + BUG(); } tree->node_size_shift = ffs(size) - 1; diff --git a/fs/hfs/hfs.h b/fs/hfs/hfs.h index c6aae61adfe6..6f194d0768b6 100644 --- a/fs/hfs/hfs.h +++ b/fs/hfs/hfs.h @@ -28,8 +28,6 @@ #define HFS_MAX_NAMELEN 128 #define HFS_MAX_VALENCE 32767U -#define HFS_BAD_KEYLEN 0xFF - /* Meanings of the drAtrb field of the MDB, * Reference: _Inside Macintosh: Files_ p. 2-61 */ diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 16cbd902f8b9..32de44ed0021 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -6,7 +6,7 @@ * This file may be distributed under the terms of the GNU General Public License. * * This file contains hfs_read_super(), some of the super_ops and - * init_module() and cleanup_module(). The remaining super_ops are in + * init_hfs_fs() and exit_hfs_fs(). The remaining super_ops are in * inode.c since they deal with inodes. * * Based on the minix file system code, (C) 1991, 1992 by Linus Torvalds diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 050d29c0a5b5..bb5433608a42 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -22,6 +22,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) struct hfs_btree *tree; struct hfs_btree_header_rec *head; struct address_space *mapping; + struct inode *inode; struct page *page; unsigned int size; @@ -33,9 +34,10 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) spin_lock_init(&tree->hash_lock); tree->sb = sb; tree->cnid = id; - tree->inode = iget(sb, id); - if (!tree->inode) + inode = hfsplus_iget(sb, id); + if (IS_ERR(inode)) goto free_tree; + tree->inode = inode; mapping = tree->inode->i_mapping; page = read_mapping_page(mapping, 0, NULL); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 1955ee61251c..29683645fa0a 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -97,9 +97,9 @@ again: goto fail; } hfs_find_exit(&fd); - inode = iget(dir->i_sb, cnid); - if (!inode) - return ERR_PTR(-EACCES); + inode = hfsplus_iget(dir->i_sb, cnid); + if (IS_ERR(inode)) + return ERR_CAST(inode); if (S_ISREG(inode->i_mode)) HFSPLUS_I(inode).dev = linkid; out: diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index d9f5eda6d039..d72d0a8b25aa 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -345,6 +345,9 @@ int hfsplus_parse_options(char *, struct hfsplus_sb_info *); void hfsplus_fill_defaults(struct hfsplus_sb_info *); int hfsplus_show_options(struct seq_file *, struct vfsmount *); +/* super.c */ +struct inode *hfsplus_iget(struct super_block *, unsigned long); + /* tables.c */ extern u16 hfsplus_case_fold_table[]; extern u16 hfsplus_decompose_table[]; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index ecf70dafb643..b0f9ad362d1d 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -20,11 +20,18 @@ static void hfsplus_destroy_inode(struct inode *inode); #include "hfsplus_fs.h" -static void hfsplus_read_inode(struct inode *inode) +struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) { struct hfs_find_data fd; struct hfsplus_vh *vhdr; - int err; + struct inode *inode; + long err = -EIO; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); init_MUTEX(&HFSPLUS_I(inode).extents_lock); @@ -41,7 +48,7 @@ static void hfsplus_read_inode(struct inode *inode) hfs_find_exit(&fd); if (err) goto bad_inode; - return; + goto done; } vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; switch(inode->i_ino) { @@ -70,10 +77,13 @@ static void hfsplus_read_inode(struct inode *inode) goto bad_inode; } - return; +done: + unlock_new_inode(inode); + return inode; - bad_inode: - make_bad_inode(inode); +bad_inode: + iget_failed(inode); + return ERR_PTR(err); } static int hfsplus_write_inode(struct inode *inode, int unused) @@ -262,7 +272,6 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations hfsplus_sops = { .alloc_inode = hfsplus_alloc_inode, .destroy_inode = hfsplus_destroy_inode, - .read_inode = hfsplus_read_inode, .write_inode = hfsplus_write_inode, .clear_inode = hfsplus_clear_inode, .put_super = hfsplus_put_super, @@ -278,7 +287,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) struct hfsplus_sb_info *sbi; hfsplus_cat_entry entry; struct hfs_find_data fd; - struct inode *root; + struct inode *root, *inode; struct qstr str; struct nls_table *nls = NULL; int err = -EINVAL; @@ -366,18 +375,25 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) goto cleanup; } - HFSPLUS_SB(sb).alloc_file = iget(sb, HFSPLUS_ALLOC_CNID); - if (!HFSPLUS_SB(sb).alloc_file) { + inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); + if (IS_ERR(inode)) { printk(KERN_ERR "hfs: failed to load allocation file\n"); + err = PTR_ERR(inode); goto cleanup; } + HFSPLUS_SB(sb).alloc_file = inode; /* Load the root directory */ - root = iget(sb, HFSPLUS_ROOT_CNID); + root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); + if (IS_ERR(root)) { + printk(KERN_ERR "hfs: failed to load root directory\n"); + err = PTR_ERR(root); + goto cleanup; + } sb->s_root = d_alloc_root(root); if (!sb->s_root) { - printk(KERN_ERR "hfs: failed to load root directory\n"); iput(root); + err = -ENOMEM; goto cleanup; } sb->s_root->d_op = &hfsplus_dentry_operations; @@ -390,9 +406,12 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) hfs_find_exit(&fd); if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) goto cleanup; - HFSPLUS_SB(sb).hidden_dir = iget(sb, be32_to_cpu(entry.folder.id)); - if (!HFSPLUS_SB(sb).hidden_dir) + inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id)); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto cleanup; + } + HFSPLUS_SB(sb).hidden_dir = inode; } else hfs_find_exit(&fd); diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index 9e10f9444b64..628ccf6fa402 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -325,7 +325,7 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) struct super_block *sb = dentry->d_sb; const char *astr; const u16 *dstr; - int casefold, decompose, size, dsize, len; + int casefold, decompose, size, len; unsigned long hash; wchar_t c; u16 c2; @@ -336,6 +336,7 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) astr = str->name; len = str->len; while (len > 0) { + int uninitialized_var(dsize); size = asc2unichar(sb, astr, len, &c); astr += size; len -= size; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 8966b050196e..5222345ddccf 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -11,6 +11,8 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/statfs.h> +#include <linux/seq_file.h> +#include <linux/mount.h> #include "hostfs.h" #include "init.h" #include "kern.h" @@ -202,7 +204,7 @@ static char *follow_link(char *link) return ERR_PTR(n); } -static int read_inode(struct inode *ino) +static int hostfs_read_inode(struct inode *ino) { char *name; int err = 0; @@ -233,6 +235,25 @@ static int read_inode(struct inode *ino) return err; } +static struct inode *hostfs_iget(struct super_block *sb) +{ + struct inode *inode; + long ret; + + inode = iget_locked(sb, 0); + if (!inode) + return ERR_PTR(-ENOMEM); + if (inode->i_state & I_NEW) { + ret = hostfs_read_inode(inode); + if (ret < 0) { + iget_failed(inode); + return ERR_PTR(ret); + } + unlock_new_inode(inode); + } + return inode; +} + int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) { /* @@ -303,9 +324,16 @@ static void hostfs_destroy_inode(struct inode *inode) kfree(HOSTFS_I(inode)); } -static void hostfs_read_inode(struct inode *inode) +static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) { - read_inode(inode); + struct inode *root = vfs->mnt_sb->s_root->d_inode; + const char *root_path = HOSTFS_I(root)->host_filename; + size_t offset = strlen(root_ino) + 1; + + if (strlen(root_path) > offset) + seq_printf(seq, ",%s", root_path + offset); + + return 0; } static const struct super_operations hostfs_sbops = { @@ -313,8 +341,8 @@ static const struct super_operations hostfs_sbops = { .drop_inode = generic_delete_inode, .delete_inode = hostfs_delete_inode, .destroy_inode = hostfs_destroy_inode, - .read_inode = hostfs_read_inode, .statfs = hostfs_statfs, + .show_options = hostfs_show_options, }; int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) @@ -571,10 +599,11 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, char *name; int error, fd; - error = -ENOMEM; - inode = iget(dir->i_sb, 0); - if (inode == NULL) + inode = hostfs_iget(dir->i_sb); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); goto out; + } error = init_inode(inode, dentry); if (error) @@ -615,10 +644,11 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, char *name; int err; - err = -ENOMEM; - inode = iget(ino->i_sb, 0); - if (inode == NULL) + inode = hostfs_iget(ino->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto out; + } err = init_inode(inode, dentry); if (err) @@ -736,11 +766,13 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { struct inode *inode; char *name; - int err = -ENOMEM; + int err; - inode = iget(dir->i_sb, 0); - if (inode == NULL) + inode = hostfs_iget(dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto out; + } err = init_inode(inode, dentry); if (err) @@ -952,9 +984,11 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sprintf(host_root_path, "%s/%s", root_ino, req_root); - root_inode = iget(sb, 0); - if (root_inode == NULL) + root_inode = hostfs_iget(sb); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); goto out_free; + } err = init_inode(root_inode, NULL); if (err) @@ -972,7 +1006,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) if (sb->s_root == NULL) goto out_put; - err = read_inode(root_inode); + err = hostfs_read_inode(root_inode); if (err) { /* No iput in this case because the dput does that for us */ dput(sb->s_root); diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 00971d999964..f63a699ec659 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -386,6 +386,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) int lowercase, conv, eas, chk, errs, chkdsk, timeshift; int o; struct hpfs_sb_info *sbi = hpfs_sb(s); + char *new_opts = kstrdup(data, GFP_KERNEL); *flags |= MS_NOATIME; @@ -398,15 +399,15 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase, &conv, &eas, &chk, &errs, &chkdsk, ×hift))) { printk("HPFS: bad mount options.\n"); - return 1; + goto out_err; } if (o == 2) { hpfs_help(); - return 1; + goto out_err; } if (timeshift != sbi->sb_timeshift) { printk("HPFS: timeshift can't be changed using remount.\n"); - return 1; + goto out_err; } unmark_dirty(s); @@ -419,7 +420,14 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) if (!(*flags & MS_RDONLY)) mark_dirty(s); + kfree(s->s_options); + s->s_options = new_opts; + return 0; + +out_err: + kfree(new_opts); + return -EINVAL; } /* Super operations */ @@ -432,6 +440,7 @@ static const struct super_operations hpfs_sops = .put_super = hpfs_put_super, .statfs = hpfs_statfs, .remount_fs = hpfs_remount_fs, + .show_options = generic_show_options, }; static int hpfs_fill_super(struct super_block *s, void *options, int silent) @@ -454,6 +463,8 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) int o; + save_mount_options(s, options); + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return -ENOMEM; diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index affb7412125e..a1e1f0f61aa5 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -155,6 +155,20 @@ static void hppfs_read_inode(struct inode *ino) ino->i_blocks = proc_ino->i_blocks; } +static struct inode *hppfs_iget(struct super_block *sb) +{ + struct inode *inode; + + inode = iget_locked(sb, 0); + if (!inode) + return ERR_PTR(-ENOMEM); + if (inode->i_state & I_NEW) { + hppfs_read_inode(inode); + unlock_new_inode(inode); + } + return inode; +} + static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, struct nameidata *nd) { @@ -190,9 +204,11 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, if(IS_ERR(proc_dentry)) return(proc_dentry); - inode = iget(ino->i_sb, 0); - if(inode == NULL) + inode = hppfs_iget(ino->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto out_dput; + } err = init_inode(inode, proc_dentry); if(err) @@ -652,7 +668,6 @@ static void hppfs_destroy_inode(struct inode *inode) static const struct super_operations hppfs_sbops = { .alloc_inode = hppfs_alloc_inode, .destroy_inode = hppfs_destroy_inode, - .read_inode = hppfs_read_inode, .delete_inode = hppfs_delete_inode, .statfs = hppfs_statfs, }; @@ -745,9 +760,11 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent) sb->s_magic = HPPFS_SUPER_MAGIC; sb->s_op = &hppfs_sbops; - root_inode = iget(sb, 0); - if(root_inode == NULL) + root_inode = hppfs_iget(sb); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); goto out; + } err = init_inode(root_inode, proc_sb->s_root); if(err) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3b3cc28cdefc..eee9487ae47f 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -734,6 +734,7 @@ static const struct super_operations hugetlbfs_ops = { .delete_inode = hugetlbfs_delete_inode, .drop_inode = hugetlbfs_drop_inode, .put_super = hugetlbfs_put_super, + .show_options = generic_show_options, }; static int @@ -817,6 +818,8 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) struct hugetlbfs_config config; struct hugetlbfs_sb_info *sbinfo; + save_mount_options(sb, data); + config.nr_blocks = -1; /* No limit on size by default */ config.nr_inodes = -1; /* No limit on number of inodes by default */ config.uid = current->fsuid; diff --git a/fs/inode.c b/fs/inode.c index 276ffd6b6fdd..53245ffcf93d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -928,8 +928,6 @@ EXPORT_SYMBOL(ilookup); * @set: callback used to initialize a new struct inode * @data: opaque data pointer to pass to @test and @set * - * This is iget() without the read_inode() portion of get_new_inode(). - * * iget5_locked() uses ifind() to search for the inode specified by @hashval * and @data in the inode cache and if present it is returned with an increased * reference count. This is a generalized version of iget_locked() for file @@ -966,8 +964,6 @@ EXPORT_SYMBOL(iget5_locked); * @sb: super block of file system * @ino: inode number to get * - * This is iget() without the read_inode() portion of get_new_inode_fast(). - * * iget_locked() uses ifind_fast() to search for the inode specified by @ino in * the inode cache and if present it is returned with an increased reference * count. This is for file systems where the inode number is sufficient for diff --git a/fs/inotify.c b/fs/inotify.c index 2c5b92152876..690e72595e6e 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -168,20 +168,14 @@ static void set_dentry_child_flags(struct inode *inode, int watched) struct dentry *child; list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { - if (!child->d_inode) { - WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); + if (!child->d_inode) continue; - } + spin_lock(&child->d_lock); - if (watched) { - WARN_ON(child->d_flags & - DCACHE_INOTIFY_PARENT_WATCHED); + if (watched) child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; - } else { - WARN_ON(!(child->d_flags & - DCACHE_INOTIFY_PARENT_WATCHED)); - child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED; - } + else + child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } } @@ -253,7 +247,6 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode) if (!inode) return; - WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); spin_lock(&entry->d_lock); parent = entry->d_parent; if (parent->d_inode && inotify_inode_watched(parent->d_inode)) @@ -627,6 +620,7 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, struct inode *inode, u32 mask) { int ret = 0; + int newly_watched; /* don't allow invalid bits: we don't want flags set */ mask &= IN_ALL_EVENTS | IN_ONESHOT; @@ -653,12 +647,18 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, */ watch->inode = igrab(inode); - if (!inotify_inode_watched(inode)) - set_dentry_child_flags(inode, 1); - /* Add the watch to the handle's and the inode's list */ + newly_watched = !inotify_inode_watched(inode); list_add(&watch->h_list, &ih->watches); list_add(&watch->i_list, &inode->inotify_watches); + /* + * Set child flags _after_ adding the watch, so there is no race + * windows where newly instantiated children could miss their parent's + * watched flag. + */ + if (newly_watched) + set_dentry_child_flags(inode, 1); + out: mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 5e009331c01f..7b94a1e3c015 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -41,9 +41,9 @@ static struct kmem_cache *event_cachep __read_mostly; static struct vfsmount *inotify_mnt __read_mostly; /* these are configurable via /proc/sys/fs/inotify/ */ -int inotify_max_user_instances __read_mostly; -int inotify_max_user_watches __read_mostly; -int inotify_max_queued_events __read_mostly; +static int inotify_max_user_instances __read_mostly; +static int inotify_max_user_watches __read_mostly; +static int inotify_max_queued_events __read_mostly; /* * Lock ordering: @@ -79,6 +79,7 @@ struct inotify_device { atomic_t count; /* reference count */ struct user_struct *user; /* user who opened this dev */ struct inotify_handle *ih; /* inotify handle */ + struct fasync_struct *fa; /* async notification */ unsigned int queue_size; /* size of the queue (bytes) */ unsigned int event_count; /* number of pending events */ unsigned int max_events; /* maximum number of events */ @@ -248,6 +249,19 @@ inotify_dev_get_event(struct inotify_device *dev) } /* + * inotify_dev_get_last_event - return the last event in the given dev's queue + * + * Caller must hold dev->ev_mutex. + */ +static inline struct inotify_kernel_event * +inotify_dev_get_last_event(struct inotify_device *dev) +{ + if (list_empty(&dev->events)) + return NULL; + return list_entry(dev->events.prev, struct inotify_kernel_event, list); +} + +/* * inotify_dev_queue_event - event handler registered with core inotify, adds * a new event to the given device * @@ -269,11 +283,11 @@ static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask, /* we can safely put the watch as we don't reference it while * generating the event */ - if (mask & IN_IGNORED || mask & IN_ONESHOT) + if (mask & IN_IGNORED || w->mask & IN_ONESHOT) put_inotify_watch(w); /* final put */ /* coalescing: drop this event if it is a dupe of the previous */ - last = inotify_dev_get_event(dev); + last = inotify_dev_get_last_event(dev); if (last && last->event.mask == mask && last->event.wd == wd && last->event.cookie == cookie) { const char *lastname = last->name; @@ -302,6 +316,7 @@ static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask, dev->queue_size += sizeof(struct inotify_event) + kevent->event.len; list_add_tail(&kevent->list, &dev->events); wake_up_interruptible(&dev->wq); + kill_fasync(&dev->fa, SIGIO, POLL_IN); out: mutex_unlock(&dev->ev_mutex); @@ -352,7 +367,7 @@ static int find_inode(const char __user *dirname, struct nameidata *nd, /* you can only watch an inode if you have read permissions on it */ error = vfs_permission(nd, MAY_READ); if (error) - path_release(nd); + path_put(&nd->path); return error; } @@ -490,6 +505,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf, return ret; } +static int inotify_fasync(int fd, struct file *file, int on) +{ + struct inotify_device *dev = file->private_data; + + return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO; +} + static int inotify_release(struct inode *ignored, struct file *file) { struct inotify_device *dev = file->private_data; @@ -502,6 +524,9 @@ static int inotify_release(struct inode *ignored, struct file *file) inotify_dev_event_dequeue(dev); mutex_unlock(&dev->ev_mutex); + if (file->f_flags & FASYNC) + inotify_fasync(-1, file, 0); + /* free this device: the put matching the get in inotify_init() */ put_inotify_dev(dev); @@ -530,6 +555,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, static const struct file_operations inotify_fops = { .poll = inotify_poll, .read = inotify_read, + .fasync = inotify_fasync, .release = inotify_release, .unlocked_ioctl = inotify_ioctl, .compat_ioctl = inotify_ioctl, @@ -577,6 +603,7 @@ asmlinkage long sys_inotify_init(void) goto out_free_dev; } dev->ih = ih; + dev->fa = NULL; filp->f_op = &inotify_fops; filp->f_path.mnt = mntget(inotify_mnt); @@ -640,7 +667,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) goto fput_and_out; /* inode held in place by reference to nd; dev by fget on fd */ - inode = nd.dentry->d_inode; + inode = nd.path.dentry->d_inode; dev = filp->private_data; mutex_lock(&dev->up_mutex); @@ -649,7 +676,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) ret = create_watch(dev, inode, mask); mutex_unlock(&dev->up_mutex); - path_release(&nd); + path_put(&nd.path); fput_and_out: fput_light(filp, fput_needed); return ret; diff --git a/fs/ioctl.c b/fs/ioctl.c index c2a773e8620b..f32fbde2175e 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -12,12 +12,24 @@ #include <linux/fs.h> #include <linux/security.h> #include <linux/module.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/ioctls.h> -static long do_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) +/** + * vfs_ioctl - call filesystem specific ioctl methods + * @filp: open file to invoke ioctl method on + * @cmd: ioctl command to execute + * @arg: command-specific argument for ioctl + * + * Invokes filesystem specific ->unlocked_ioctl, if one exists; otherwise + * invokes filesystem specific ->ioctl method. If neither method exists, + * returns -ENOTTY. + * + * Returns 0 on success, -errno on error. + */ +long vfs_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { int error = -ENOTTY; @@ -40,123 +52,148 @@ static long do_ioctl(struct file *filp, unsigned int cmd, return error; } +static int ioctl_fibmap(struct file *filp, int __user *p) +{ + struct address_space *mapping = filp->f_mapping; + int res, block; + + /* do we support this mess? */ + if (!mapping->a_ops->bmap) + return -EINVAL; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + res = get_user(block, p); + if (res) + return res; + lock_kernel(); + res = mapping->a_ops->bmap(mapping, block); + unlock_kernel(); + return put_user(res, p); +} + static int file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - int error; - int block; - struct inode * inode = filp->f_path.dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; int __user *p = (int __user *)arg; switch (cmd) { - case FIBMAP: - { - struct address_space *mapping = filp->f_mapping; - int res; - /* do we support this mess? */ - if (!mapping->a_ops->bmap) - return -EINVAL; - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - if ((error = get_user(block, p)) != 0) - return error; + case FIBMAP: + return ioctl_fibmap(filp, p); + case FIGETBSZ: + return put_user(inode->i_sb->s_blocksize, p); + case FIONREAD: + return put_user(i_size_read(inode) - filp->f_pos, p); + } + return vfs_ioctl(filp, cmd, arg); +} + +static int ioctl_fionbio(struct file *filp, int __user *argp) +{ + unsigned int flag; + int on, error; + + error = get_user(on, argp); + if (error) + return error; + flag = O_NONBLOCK; +#ifdef __sparc__ + /* SunOS compatibility item. */ + if (O_NONBLOCK != O_NDELAY) + flag |= O_NDELAY; +#endif + if (on) + filp->f_flags |= flag; + else + filp->f_flags &= ~flag; + return error; +} + +static int ioctl_fioasync(unsigned int fd, struct file *filp, + int __user *argp) +{ + unsigned int flag; + int on, error; + + error = get_user(on, argp); + if (error) + return error; + flag = on ? FASYNC : 0; + + /* Did FASYNC state change ? */ + if ((flag ^ filp->f_flags) & FASYNC) { + if (filp->f_op && filp->f_op->fasync) { lock_kernel(); - res = mapping->a_ops->bmap(mapping, block); + error = filp->f_op->fasync(fd, filp, on); unlock_kernel(); - return put_user(res, p); - } - case FIGETBSZ: - return put_user(inode->i_sb->s_blocksize, p); - case FIONREAD: - return put_user(i_size_read(inode) - filp->f_pos, p); + } else + error = -ENOTTY; } + if (error) + return error; - return do_ioctl(filp, cmd, arg); + if (on) + filp->f_flags |= FASYNC; + else + filp->f_flags &= ~FASYNC; + return error; } /* * When you add any new common ioctls to the switches above and below * please update compat_sys_ioctl() too. * - * vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d. + * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d. * It's just a simple helper for sys_ioctl and compat_sys_ioctl. */ -int vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg) +int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, + unsigned long arg) { - unsigned int flag; - int on, error = 0; + int error = 0; + int __user *argp = (int __user *)arg; switch (cmd) { - case FIOCLEX: - set_close_on_exec(fd, 1); - break; - - case FIONCLEX: - set_close_on_exec(fd, 0); - break; - - case FIONBIO: - if ((error = get_user(on, (int __user *)arg)) != 0) - break; - flag = O_NONBLOCK; -#ifdef __sparc__ - /* SunOS compatibility item. */ - if(O_NONBLOCK != O_NDELAY) - flag |= O_NDELAY; -#endif - if (on) - filp->f_flags |= flag; - else - filp->f_flags &= ~flag; - break; - - case FIOASYNC: - if ((error = get_user(on, (int __user *)arg)) != 0) - break; - flag = on ? FASYNC : 0; - - /* Did FASYNC state change ? */ - if ((flag ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) { - lock_kernel(); - error = filp->f_op->fasync(fd, filp, on); - unlock_kernel(); - } - else error = -ENOTTY; - } - if (error != 0) - break; - - if (on) - filp->f_flags |= FASYNC; - else - filp->f_flags &= ~FASYNC; - break; - - case FIOQSIZE: - if (S_ISDIR(filp->f_path.dentry->d_inode->i_mode) || - S_ISREG(filp->f_path.dentry->d_inode->i_mode) || - S_ISLNK(filp->f_path.dentry->d_inode->i_mode)) { - loff_t res = inode_get_bytes(filp->f_path.dentry->d_inode); - error = copy_to_user((loff_t __user *)arg, &res, sizeof(res)) ? -EFAULT : 0; - } - else - error = -ENOTTY; - break; - default: - if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) - error = file_ioctl(filp, cmd, arg); - else - error = do_ioctl(filp, cmd, arg); - break; + case FIOCLEX: + set_close_on_exec(fd, 1); + break; + + case FIONCLEX: + set_close_on_exec(fd, 0); + break; + + case FIONBIO: + error = ioctl_fionbio(filp, argp); + break; + + case FIOASYNC: + error = ioctl_fioasync(fd, filp, argp); + break; + + case FIOQSIZE: + if (S_ISDIR(filp->f_path.dentry->d_inode->i_mode) || + S_ISREG(filp->f_path.dentry->d_inode->i_mode) || + S_ISLNK(filp->f_path.dentry->d_inode->i_mode)) { + loff_t res = + inode_get_bytes(filp->f_path.dentry->d_inode); + error = copy_to_user((loff_t __user *)arg, &res, + sizeof(res)) ? -EFAULT : 0; + } else + error = -ENOTTY; + break; + default: + if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + error = file_ioctl(filp, cmd, arg); + else + error = vfs_ioctl(filp, cmd, arg); + break; } return error; } asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) { - struct file * filp; + struct file *filp; int error = -EBADF; int fput_needed; @@ -168,7 +205,7 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) if (error) goto out_fput; - error = vfs_ioctl(filp, fd, cmd, arg); + error = do_vfs_ioctl(filp, fd, cmd, arg); out_fput: fput_light(filp, fput_needed); out: diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 29f9753ae5e5..bb219138331a 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -26,11 +26,9 @@ isofs_export_iget(struct super_block *sb, if (block == 0) return ERR_PTR(-ESTALE); inode = isofs_iget(sb, block, offset); - if (inode == NULL) - return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode) - || (generation && inode->i_generation != generation)) - { + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { iput(inode); return ERR_PTR(-ESTALE); } @@ -110,8 +108,10 @@ static struct dentry *isofs_export_get_parent(struct dentry *child) parent_inode = isofs_iget(child_inode->i_sb, parent_block, parent_offset); - if (parent_inode == NULL) { - rv = ERR_PTR(-EACCES); + if (IS_ERR(parent_inode)) { + rv = ERR_CAST(parent_inode); + if (rv != ERR_PTR(-ENOMEM)) + rv = ERR_PTR(-EACCES); goto out; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 09e3d306e96f..044a254d526b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -54,7 +54,7 @@ static void isofs_put_super(struct super_block *sb) return; } -static void isofs_read_inode(struct inode *); +static int isofs_read_inode(struct inode *); static int isofs_statfs (struct dentry *, struct kstatfs *); static struct kmem_cache *isofs_inode_cachep; @@ -107,10 +107,10 @@ static int isofs_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations isofs_sops = { .alloc_inode = isofs_alloc_inode, .destroy_inode = isofs_destroy_inode, - .read_inode = isofs_read_inode, .put_super = isofs_put_super, .statfs = isofs_statfs, .remount_fs = isofs_remount, + .show_options = generic_show_options, }; @@ -145,7 +145,8 @@ struct iso9660_options{ char nocompress; unsigned char check; unsigned int blocksize; - mode_t mode; + mode_t fmode; + mode_t dmode; gid_t gid; uid_t uid; char *iocharset; @@ -306,7 +307,7 @@ enum { Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore, Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet, Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err, - Opt_nocompress, Opt_hide, Opt_showassoc, + Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, }; static match_table_t tokens = { @@ -333,6 +334,7 @@ static match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_mode, "mode=%u"}, + {Opt_dmode, "dmode=%u"}, {Opt_block, "block=%u"}, {Opt_ignore, "conv=binary"}, {Opt_ignore, "conv=b"}, @@ -360,7 +362,7 @@ static int parse_options(char *options, struct iso9660_options *popt) popt->check = 'u'; /* unset */ popt->nocompress = 0; popt->blocksize = 1024; - popt->mode = S_IRUGO | S_IXUGO; /* + popt->fmode = popt->dmode = S_IRUGO | S_IXUGO; /* * r-x for all. The disc could * be shared with DOS machines so * virtually anything could be @@ -452,7 +454,12 @@ static int parse_options(char *options, struct iso9660_options *popt) case Opt_mode: if (match_int(&args[0], &option)) return 0; - popt->mode = option; + popt->fmode = option; + break; + case Opt_dmode: + if (match_int(&args[0], &option)) + return 0; + popt->dmode = option; break; case Opt_block: if (match_int(&args[0], &option)) @@ -552,9 +559,11 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) int joliet_level = 0; int iso_blknum, block; int orig_zonesize; - int table; + int table, error = -EINVAL; unsigned int vol_desc_start; + save_mount_options(s, data); + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return -ENOMEM; @@ -802,7 +811,8 @@ root_found: * on the disk as suid, so we merely allow them to set the default * permissions. */ - sbi->s_mode = opt.mode & 0777; + sbi->s_fmode = opt.fmode & 0777; + sbi->s_dmode = opt.dmode & 0777; /* * Read the root inode, which _may_ result in changing @@ -810,6 +820,8 @@ root_found: * we then decide whether to use the Joliet descriptor. */ inode = isofs_iget(s, sbi->s_firstdatazone, 0); + if (IS_ERR(inode)) + goto out_no_root; /* * If this disk has both Rock Ridge and Joliet on it, then we @@ -829,6 +841,8 @@ root_found: "ISOFS: changing to secondary root\n"); iput(inode); inode = isofs_iget(s, sbi->s_firstdatazone, 0); + if (IS_ERR(inode)) + goto out_no_root; } } @@ -842,8 +856,6 @@ root_found: sbi->s_joliet_level = joliet_level; /* check the root inode */ - if (!inode) - goto out_no_root; if (!inode->i_op) goto out_bad_root; @@ -876,11 +888,14 @@ root_found: */ out_bad_root: printk(KERN_WARNING "%s: root inode not initialized\n", __func__); - goto out_iput; -out_no_root: - printk(KERN_WARNING "%s: get root inode failed\n", __func__); out_iput: iput(inode); + goto out_no_inode; +out_no_root: + error = PTR_ERR(inode); + if (error != -ENOMEM) + printk(KERN_WARNING "%s: get root inode failed\n", __func__); +out_no_inode: #ifdef CONFIG_JOLIET if (sbi->s_nls_iocharset) unload_nls(sbi->s_nls_iocharset); @@ -908,7 +923,7 @@ out_freesbi: kfree(opt.iocharset); kfree(sbi); s->s_fs_info = NULL; - return -EINVAL; + return error; } static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) @@ -930,7 +945,7 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) /* * Get a set of blocks; filling in buffer_heads if already allocated * or getblk() if they are not. Returns the number of blocks inserted - * (0 == error.) + * (-ve == error.) */ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, struct buffer_head **bh, unsigned long nblocks) @@ -940,11 +955,12 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, unsigned int firstext; unsigned long nextblk, nextoff; long iblock = (long)iblock_s; - int section, rv; + int section, rv, error; struct iso_inode_info *ei = ISOFS_I(inode); lock_kernel(); + error = -EIO; rv = 0; if (iblock < 0 || iblock != iblock_s) { printk(KERN_DEBUG "%s: block number too large\n", __func__); @@ -983,8 +999,10 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, offset += sect_size; ninode = isofs_iget(inode->i_sb, nextblk, nextoff); - if (!ninode) + if (IS_ERR(ninode)) { + error = PTR_ERR(ninode); goto abort; + } firstext = ISOFS_I(ninode)->i_first_extent; sect_size = ISOFS_I(ninode)->i_section_size >> ISOFS_BUFFER_BITS(ninode); nextblk = ISOFS_I(ninode)->i_next_section_block; @@ -1015,9 +1033,10 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, rv++; } + error = 0; abort: unlock_kernel(); - return rv; + return rv != 0 ? rv : error; } /* @@ -1026,12 +1045,15 @@ abort: static int isofs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { + int ret; + if (create) { printk(KERN_DEBUG "%s: Kernel tries to allocate a block\n", __func__); return -EROFS; } - return isofs_get_blocks(inode, iblock, &bh_result, 1) ? 0 : -EIO; + ret = isofs_get_blocks(inode, iblock, &bh_result, 1); + return ret < 0 ? ret : 0; } static int isofs_bmap(struct inode *inode, sector_t block) @@ -1186,7 +1208,7 @@ out_toomany: goto out; } -static void isofs_read_inode(struct inode *inode) +static int isofs_read_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; struct isofs_sb_info *sbi = ISOFS_SB(sb); @@ -1199,6 +1221,7 @@ static void isofs_read_inode(struct inode *inode) unsigned int de_len; unsigned long offset; struct iso_inode_info *ei = ISOFS_I(inode); + int ret = -EIO; block = ei->i_iget5_block; bh = sb_bread(inode->i_sb, block); @@ -1216,6 +1239,7 @@ static void isofs_read_inode(struct inode *inode) tmpde = kmalloc(de_len, GFP_KERNEL); if (tmpde == NULL) { printk(KERN_INFO "%s: out of memory\n", __func__); + ret = -ENOMEM; goto fail; } memcpy(tmpde, bh->b_data + offset, frag1); @@ -1235,7 +1259,7 @@ static void isofs_read_inode(struct inode *inode) ei->i_file_format = isofs_file_normal; if (de->flags[-high_sierra] & 2) { - inode->i_mode = S_IRUGO | S_IXUGO | S_IFDIR; + inode->i_mode = sbi->s_dmode | S_IFDIR; inode->i_nlink = 1; /* * Set to 1. We know there are 2, but * the find utility tries to optimize @@ -1245,9 +1269,8 @@ static void isofs_read_inode(struct inode *inode) */ } else { /* Everybody gets to read the file. */ - inode->i_mode = sbi->s_mode; + inode->i_mode = sbi->s_fmode | S_IFREG; inode->i_nlink = 1; - inode->i_mode |= S_IFREG; } inode->i_uid = sbi->s_uid; inode->i_gid = sbi->s_gid; @@ -1259,8 +1282,10 @@ static void isofs_read_inode(struct inode *inode) ei->i_section_size = isonum_733(de->size); if (de->flags[-high_sierra] & 0x80) { - if(isofs_read_level3_size(inode)) + ret = isofs_read_level3_size(inode); + if (ret < 0) goto fail; + ret = -EIO; } else { ei->i_next_section_block = 0; ei->i_next_section_offset = 0; @@ -1346,16 +1371,16 @@ static void isofs_read_inode(struct inode *inode) /* XXX - parse_rock_ridge_inode() had already set i_rdev. */ init_special_inode(inode, inode->i_mode, inode->i_rdev); + ret = 0; out: kfree(tmpde); if (bh) brelse(bh); - return; + return ret; out_badread: printk(KERN_WARNING "ISOFS: unable to read i-node block\n"); fail: - make_bad_inode(inode); goto out; } @@ -1394,9 +1419,10 @@ struct inode *isofs_iget(struct super_block *sb, unsigned long hashval; struct inode *inode; struct isofs_iget5_callback_data data; + long ret; if (offset >= 1ul << sb->s_blocksize_bits) - return NULL; + return ERR_PTR(-EINVAL); data.block = block; data.offset = offset; @@ -1406,9 +1432,17 @@ struct inode *isofs_iget(struct super_block *sb, inode = iget5_locked(sb, hashval, &isofs_iget5_test, &isofs_iget5_set, &data); - if (inode && (inode->i_state & I_NEW)) { - sb->s_op->read_inode(inode); - unlock_new_inode(inode); + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) { + ret = isofs_read_inode(inode); + if (ret < 0) { + iget_failed(inode); + inode = ERR_PTR(ret); + } else { + unlock_new_inode(inode); + } } return inode; diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index f3213f9f89af..d1bdf8adb351 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -51,7 +51,8 @@ struct isofs_sb_info { unsigned char s_hide; unsigned char s_showassoc; - mode_t s_mode; + mode_t s_fmode; + mode_t s_dmode; gid_t s_gid; uid_t s_uid; struct nls_table *s_nls_iocharset; /* Native language support table */ diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index e2b4dad39ca9..344b247bc29a 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -179,9 +179,9 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam inode = NULL; if (found) { inode = isofs_iget(dir->i_sb, block, offset); - if (!inode) { + if (IS_ERR(inode)) { unlock_kernel(); - return ERR_PTR(-EACCES); + return ERR_CAST(inode); } } unlock_kernel(); diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index f3a1db3098de..6bd48f0a7047 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -474,8 +474,10 @@ repeat: isofs_iget(inode->i_sb, ISOFS_I(inode)->i_first_extent, 0); - if (!reloc) + if (IS_ERR(reloc)) { + ret = PTR_ERR(reloc); goto out; + } inode->i_mode = reloc->i_mode; inode->i_nlink = reloc->i_nlink; inode->i_uid = reloc->i_uid; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 31853eb65b4c..a38c7186c570 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -104,7 +104,8 @@ static int journal_write_commit_record(journal_t *journal, { struct journal_head *descriptor; struct buffer_head *bh; - int i, ret; + journal_header_t *header; + int ret; int barrier_done = 0; if (is_journal_aborted(journal)) @@ -116,13 +117,10 @@ static int journal_write_commit_record(journal_t *journal, bh = jh2bh(descriptor); - /* AKPM: buglet - add `i' to tmp! */ - for (i = 0; i < bh->b_size; i += 512) { - journal_header_t *tmp = (journal_header_t*)bh->b_data; - tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); - tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK); - tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); - } + header = (journal_header_t *)(bh->b_data); + header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); + header->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK); + header->h_sequence = cpu_to_be32(commit_transaction->t_tid); JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); @@ -131,6 +129,8 @@ static int journal_write_commit_record(journal_t *journal, barrier_done = 1; } ret = sync_dirty_buffer(bh); + if (barrier_done) + clear_buffer_ordered(bh); /* is it possible for another commit to fail at roughly * the same time as this one? If so, we don't want to * trust the barrier flag in the super, but instead want @@ -148,7 +148,6 @@ static int journal_write_commit_record(journal_t *journal, spin_unlock(&journal->j_state_lock); /* And try again, without the barrier */ - clear_buffer_ordered(bh); set_buffer_uptodate(bh); set_buffer_dirty(bh); ret = sync_dirty_buffer(bh); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 5d14243499d4..3943a8905eb2 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1457,7 +1457,7 @@ static const char *journal_dev_name(journal_t *journal, char *buffer) * Aborts hard --- we mark the abort as occurred, but do _nothing_ else, * and don't attempt to make any other journal updates. */ -void __journal_abort_hard(journal_t *journal) +static void __journal_abort_hard(journal_t *journal) { transaction_t *transaction; char b[BDEVNAME_SIZE]; diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index c5d9694b6a2f..2b8edf4d6eaa 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -354,7 +354,7 @@ static int do_one_pass(journal_t *journal, struct buffer_head * obh; struct buffer_head * nbh; - cond_resched(); /* We're under lock_kernel() */ + cond_resched(); /* If we already know where to stop the log traversal, * check right now that we haven't gone past the end of diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 4f302d279279..a8173081f831 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -136,18 +136,20 @@ static int journal_submit_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "submit commit block"); lock_buffer(bh); - + get_bh(bh); set_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; if (journal->j_flags & JBD2_BARRIER && - !JBD2_HAS_COMPAT_FEATURE(journal, + !JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { set_buffer_ordered(bh); barrier_done = 1; } ret = submit_bh(WRITE, bh); + if (barrier_done) + clear_buffer_ordered(bh); /* is it possible for another commit to fail at roughly * the same time as this one? If so, we don't want to @@ -166,7 +168,6 @@ static int journal_submit_commit_record(journal_t *journal, spin_unlock(&journal->j_state_lock); /* And try again, without the barrier */ - clear_buffer_ordered(bh); set_buffer_uptodate(bh); set_buffer_dirty(bh); ret = submit_bh(WRITE, bh); @@ -872,7 +873,8 @@ wait_for_iobuf: if (err) __jbd2_journal_abort_hard(journal); } - err = journal_wait_on_commit_record(cbh); + if (!err && !is_journal_aborted(journal)) + err = journal_wait_on_commit_record(cbh); if (err) jbd2_journal_abort(journal, err); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 921680663fa2..146411387ada 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -397,7 +397,7 @@ static int do_one_pass(journal_t *journal, struct buffer_head * obh; struct buffer_head * nbh; - cond_resched(); /* We're under lock_kernel() */ + cond_resched(); /* If we already know where to stop the log traversal, * check right now that we haven't gone past the end of @@ -641,7 +641,7 @@ static int do_one_pass(journal_t *journal, if (chksum_err) { info->end_transaction = next_commit_ID; - if (!JBD2_HAS_COMPAT_FEATURE(journal, + if (!JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ printk(KERN_ERR "JBD: Transaction %u " diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 77fc5838609c..4c80404a9aba 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -176,7 +176,7 @@ static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct spin_unlock(&inode->i_lock); } -struct posix_acl *jffs2_get_acl(struct inode *inode, int type) +static struct posix_acl *jffs2_get_acl(struct inode *inode, int type) { struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); struct posix_acl *acl; @@ -345,8 +345,10 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) if (!clone) return -ENOMEM; rc = posix_acl_create_masq(clone, (mode_t *)i_mode); - if (rc < 0) + if (rc < 0) { + posix_acl_release(clone); return rc; + } if (rc > 0) jffs2_iset_acl(inode, &f->i_acl_access, clone); diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 76c6ebd1acd9..0bb7f003fd80 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -28,7 +28,6 @@ struct jffs2_acl_header { #define JFFS2_ACL_NOT_CACHED ((void *)-1) -extern struct posix_acl *jffs2_get_acl(struct inode *inode, int type); extern int jffs2_permission(struct inode *, int, struct nameidata *); extern int jffs2_acl_chmod(struct inode *); extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); @@ -40,7 +39,6 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler; #else -#define jffs2_get_acl(inode, type) (NULL) #define jffs2_permission (NULL) #define jffs2_acl_chmod(inode) (0) #define jffs2_init_acl_pre(dir_i,inode,mode) (0) diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 787e392ffd41..f948f7e6ec82 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -101,10 +101,10 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, ino = fd->ino; up(&dir_f->sem); if (ino) { - inode = iget(dir_i->i_sb, ino); - if (!inode) { + inode = jffs2_iget(dir_i->i_sb, ino); + if (IS_ERR(inode)) { printk(KERN_WARNING "iget() failed for ino #%u\n", ino); - return (ERR_PTR(-EIO)); + return ERR_CAST(inode); } } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index d2e06f7ea96f..e26ea78c7892 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -97,11 +97,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) ri->gid = cpu_to_je16((ivalid & ATTR_GID)?iattr->ia_gid:inode->i_gid); if (ivalid & ATTR_MODE) - if (iattr->ia_mode & S_ISGID && - !in_group_p(je16_to_cpu(ri->gid)) && !capable(CAP_FSETID)) - ri->mode = cpu_to_jemode(iattr->ia_mode & ~S_ISGID); - else - ri->mode = cpu_to_jemode(iattr->ia_mode); + ri->mode = cpu_to_jemode(iattr->ia_mode); else ri->mode = cpu_to_jemode(inode->i_mode); @@ -230,16 +226,23 @@ void jffs2_clear_inode (struct inode *inode) jffs2_do_clear_inode(c, f); } -void jffs2_read_inode (struct inode *inode) +struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) { struct jffs2_inode_info *f; struct jffs2_sb_info *c; struct jffs2_raw_inode latest_node; union jffs2_device_node jdev; + struct inode *inode; dev_t rdev = 0; int ret; - D1(printk(KERN_DEBUG "jffs2_read_inode(): inode->i_ino == %lu\n", inode->i_ino)); + D1(printk(KERN_DEBUG "jffs2_iget(): ino == %lu\n", ino)); + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; f = JFFS2_INODE_INFO(inode); c = JFFS2_SB_INFO(inode->i_sb); @@ -250,9 +253,9 @@ void jffs2_read_inode (struct inode *inode) ret = jffs2_do_read_inode(c, f, inode->i_ino, &latest_node); if (ret) { - make_bad_inode(inode); up(&f->sem); - return; + iget_failed(inode); + return ERR_PTR(ret); } inode->i_mode = jemode_to_cpu(latest_node.mode); inode->i_uid = je16_to_cpu(latest_node.uid); @@ -303,19 +306,14 @@ void jffs2_read_inode (struct inode *inode) if (f->metadata->size != sizeof(jdev.old) && f->metadata->size != sizeof(jdev.new)) { printk(KERN_NOTICE "Device node has strange size %d\n", f->metadata->size); - up(&f->sem); - jffs2_do_clear_inode(c, f); - make_bad_inode(inode); - return; + goto error_io; } D1(printk(KERN_DEBUG "Reading device numbers from flash\n")); - if (jffs2_read_dnode(c, f, f->metadata, (char *)&jdev, 0, f->metadata->size) < 0) { + ret = jffs2_read_dnode(c, f, f->metadata, (char *)&jdev, 0, f->metadata->size); + if (ret < 0) { /* Eep */ printk(KERN_NOTICE "Read device numbers for inode %lu failed\n", (unsigned long)inode->i_ino); - up(&f->sem); - jffs2_do_clear_inode(c, f); - make_bad_inode(inode); - return; + goto error; } if (f->metadata->size == sizeof(jdev.old)) rdev = old_decode_dev(je16_to_cpu(jdev.old)); @@ -335,6 +333,16 @@ void jffs2_read_inode (struct inode *inode) up(&f->sem); D1(printk(KERN_DEBUG "jffs2_read_inode() returning\n")); + unlock_new_inode(inode); + return inode; + +error_io: + ret = -EIO; +error: + up(&f->sem); + jffs2_do_clear_inode(c, f); + iget_failed(inode); + return ERR_PTR(ret); } void jffs2_dirty_inode(struct inode *inode) @@ -522,15 +530,16 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) if ((ret = jffs2_do_mount_fs(c))) goto out_inohash; - ret = -EINVAL; - D1(printk(KERN_DEBUG "jffs2_do_fill_super(): Getting root inode\n")); - root_i = iget(sb, 1); - if (is_bad_inode(root_i)) { + root_i = jffs2_iget(sb, 1); + if (IS_ERR(root_i)) { D1(printk(KERN_WARNING "get root inode failed\n")); - goto out_root_i; + ret = PTR_ERR(root_i); + goto out_root; } + ret = -ENOMEM; + D1(printk(KERN_DEBUG "jffs2_do_fill_super(): d_alloc_root()\n")); sb->s_root = d_alloc_root(root_i); if (!sb->s_root) @@ -546,6 +555,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) out_root_i: iput(root_i); +out_root: jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); if (jffs2_blocks_use_vmalloc(c)) @@ -615,9 +625,9 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, jffs2_do_unlink() would need the alloc_sem and we have it. Just iget() it, and if read_inode() is necessary that's OK. */ - inode = iget(OFNI_BS_2SFFJ(c), inum); - if (!inode) - return ERR_PTR(-ENOMEM); + inode = jffs2_iget(OFNI_BS_2SFFJ(c), inum); + if (IS_ERR(inode)) + return ERR_CAST(inode); } if (is_bad_inode(inode)) { printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d\n", diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c index 4bf86088b3ae..87c6f555e1a0 100644 --- a/fs/jffs2/nodelist.c +++ b/fs/jffs2/nodelist.c @@ -32,15 +32,18 @@ void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new if ((*prev)->nhash == new->nhash && !strcmp((*prev)->name, new->name)) { /* Duplicate. Free one */ if (new->version < (*prev)->version) { - dbg_dentlist("Eep! Marking new dirent node is obsolete, old is \"%s\", ino #%u\n", + dbg_dentlist("Eep! Marking new dirent node obsolete, old is \"%s\", ino #%u\n", (*prev)->name, (*prev)->ino); jffs2_mark_node_obsolete(c, new->raw); jffs2_free_full_dirent(new); } else { - dbg_dentlist("marking old dirent \"%s\", ino #%u bsolete\n", + dbg_dentlist("marking old dirent \"%s\", ino #%u obsolete\n", (*prev)->name, (*prev)->ino); new->next = (*prev)->next; - jffs2_mark_node_obsolete(c, ((*prev)->raw)); + /* It may have been a 'placeholder' deletion dirent, + if jffs2_can_mark_obsolete() (see jffs2_do_unlink()) */ + if ((*prev)->raw) + jffs2_mark_node_obsolete(c, ((*prev)->raw)); jffs2_free_full_dirent(*prev); *prev = new; } diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index bf64686cf098..1b10d2594092 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -175,7 +175,7 @@ extern const struct inode_operations jffs2_symlink_inode_operations; /* fs.c */ int jffs2_setattr (struct dentry *, struct iattr *); int jffs2_do_setattr (struct inode *, struct iattr *); -void jffs2_read_inode (struct inode *); +struct inode *jffs2_iget(struct super_block *, unsigned long); void jffs2_clear_inode (struct inode *); void jffs2_dirty_inode(struct inode *inode); struct inode *jffs2_new_inode (struct inode *dir_i, int mode, diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 6c1ba3566f58..e512a93d6249 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -37,23 +37,24 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info BUG_ON(tn->csize == 0); - if (!jffs2_is_writebuffered(c)) - goto adj_acc; - /* Calculate how many bytes were already checked */ ofs = ref_offset(ref) + sizeof(struct jffs2_raw_inode); - len = ofs % c->wbuf_pagesize; - if (likely(len)) - len = c->wbuf_pagesize - len; - - if (len >= tn->csize) { - dbg_readinode("no need to check node at %#08x, data length %u, data starts at %#08x - it has already been checked.\n", - ref_offset(ref), tn->csize, ofs); - goto adj_acc; - } + len = tn->csize; + + if (jffs2_is_writebuffered(c)) { + int adj = ofs % c->wbuf_pagesize; + if (likely(adj)) + adj = c->wbuf_pagesize - adj; + + if (adj >= tn->csize) { + dbg_readinode("no need to check node at %#08x, data length %u, data starts at %#08x - it has already been checked.\n", + ref_offset(ref), tn->csize, ofs); + goto adj_acc; + } - ofs += len; - len = tn->csize - len; + ofs += adj; + len -= adj; + } dbg_readinode("check node at %#08x, data length %u, partial CRC %#08x, correct CRC %#08x, data starts at %#08x, start checking from %#08x - %u bytes.\n", ref_offset(ref), tn->csize, tn->partial_crc, tn->data_crc, ofs - len, ofs, len); @@ -63,7 +64,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info * adding and jffs2_flash_read_end() interface. */ if (c->mtd->point) { err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); - if (!err && retlen < tn->csize) { + if (!err && retlen < len) { JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); c->mtd->unpoint(c->mtd, buffer, ofs, retlen); } else if (err) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index ffa447511e6a..4677355996cc 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -65,7 +65,6 @@ static const struct super_operations jffs2_super_operations = { .alloc_inode = jffs2_alloc_inode, .destroy_inode =jffs2_destroy_inode, - .read_inode = jffs2_read_inode, .put_super = jffs2_put_super, .write_super = jffs2_write_super, .statfs = jffs2_statfs, diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c index 147e2cbee9e4..776f13cbf2b5 100644 --- a/fs/jffs2/write.c +++ b/fs/jffs2/write.c @@ -177,7 +177,7 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2 void *hold_err = fn->raw; /* Release the full_dnode which is now useless, and return */ jffs2_free_full_dnode(fn); - return ERR_PTR(PTR_ERR(hold_err)); + return ERR_CAST(hold_err); } fn->ofs = je32_to_cpu(ri->offset); fn->size = je32_to_cpu(ri->dsize); @@ -313,7 +313,7 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff void *hold_err = fd->raw; /* Release the full_dirent which is now useless, and return */ jffs2_free_full_dirent(fd); - return ERR_PTR(PTR_ERR(hold_err)); + return ERR_CAST(hold_err); } if (retried) { @@ -582,7 +582,7 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, jffs2_add_fd_to_list(c, fd, &dir_f->dents); up(&dir_f->sem); } else { - struct jffs2_full_dirent **prev = &dir_f->dents; + struct jffs2_full_dirent *fd = dir_f->dents; uint32_t nhash = full_name_hash(name, namelen); /* We don't actually want to reserve any space, but we do @@ -590,21 +590,22 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, down(&c->alloc_sem); down(&dir_f->sem); - while ((*prev) && (*prev)->nhash <= nhash) { - if ((*prev)->nhash == nhash && - !memcmp((*prev)->name, name, namelen) && - !(*prev)->name[namelen]) { - struct jffs2_full_dirent *this = *prev; + for (fd = dir_f->dents; fd; fd = fd->next) { + if (fd->nhash == nhash && + !memcmp(fd->name, name, namelen) && + !fd->name[namelen]) { D1(printk(KERN_DEBUG "Marking old dirent node (ino #%u) @%08x obsolete\n", - this->ino, ref_offset(this->raw))); - - *prev = this->next; - jffs2_mark_node_obsolete(c, (this->raw)); - jffs2_free_full_dirent(this); + fd->ino, ref_offset(fd->raw))); + jffs2_mark_node_obsolete(c, fd->raw); + /* We don't want to remove it from the list immediately, + because that screws up getdents()/seek() semantics even + more than they're screwed already. Turn it into a + node-less deletion dirent instead -- a placeholder */ + fd->raw = NULL; + fd->ino = 0; break; } - prev = &((*prev)->next); } up(&dir_f->sem); } @@ -630,7 +631,8 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, D1(printk(KERN_DEBUG "Removing deletion dirent for \"%s\" from dir ino #%u\n", fd->name, dead_f->inocache->ino)); } - jffs2_mark_node_obsolete(c, fd->raw); + if (fd->raw) + jffs2_mark_node_obsolete(c, fd->raw); jffs2_free_full_dirent(fd); } } diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 87eb93694af7..7f6063acaa3b 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -112,5 +112,8 @@ const struct file_operations jfs_file_operations = { .splice_write = generic_file_splice_write, .fsync = jfs_fsync, .release = jfs_release, - .ioctl = jfs_ioctl, + .unlocked_ioctl = jfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = jfs_compat_ioctl, +#endif }; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 4672013802e1..210339784b56 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -31,11 +31,21 @@ #include "jfs_debug.h" -void jfs_read_inode(struct inode *inode) +struct inode *jfs_iget(struct super_block *sb, unsigned long ino) { - if (diRead(inode)) { - make_bad_inode(inode); - return; + struct inode *inode; + int ret; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ret = diRead(inode); + if (ret < 0) { + iget_failed(inode); + return ERR_PTR(ret); } if (S_ISREG(inode->i_mode)) { @@ -55,6 +65,8 @@ void jfs_read_inode(struct inode *inode) inode->i_op = &jfs_file_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); } + unlock_new_inode(inode); + return inode; } /* diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index dfda12a073e1..a1f8e375ad21 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -51,9 +51,9 @@ static long jfs_map_ext2(unsigned long flags, int from) } -int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_dentry->d_inode; struct jfs_inode_info *jfs_inode = JFS_IP(inode); unsigned int flags; @@ -82,6 +82,10 @@ int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; + + /* Lock against other parallel changes of flags */ + mutex_lock(&inode->i_mutex); + jfs_get_inode_flags(jfs_inode); oldflags = jfs_inode->mode2; @@ -92,8 +96,10 @@ int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, if ((oldflags & JFS_IMMUTABLE_FL) || ((flags ^ oldflags) & (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { - if (!capable(CAP_LINUX_IMMUTABLE)) + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); return -EPERM; + } } flags = flags & JFS_FL_USER_MODIFIABLE; @@ -101,6 +107,7 @@ int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, jfs_inode->mode2 = flags; jfs_set_inode_flags(inode); + mutex_unlock(&inode->i_mutex); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); return 0; @@ -110,3 +117,21 @@ int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, } } +#ifdef CONFIG_COMPAT +long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + /* While these ioctl numbers defined with 'long' and have different + * numbers than the 64bit ABI, + * the actual implementation only deals with ints and is compatible. + */ + switch (cmd) { + case JFS_IOC_GETFLAGS32: + cmd = JFS_IOC_GETFLAGS; + break; + case JFS_IOC_SETFLAGS32: + cmd = JFS_IOC_SETFLAGS; + break; + } + return jfs_ioctl(filp, cmd, arg); +} +#endif diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index c387540d3425..395c4c0d0f06 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h @@ -170,5 +170,7 @@ struct dinode { #define JFS_IOC_GETFLAGS _IOR('f', 1, long) #define JFS_IOC_SETFLAGS _IOW('f', 2, long) +#define JFS_IOC_GETFLAGS32 _IOR('f', 1, int) +#define JFS_IOC_SETFLAGS32 _IOW('f', 2, int) #endif /*_H_JFS_DINODE */ diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 8e2cf2cde185..adb2fafcc544 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -22,9 +22,9 @@ struct fid; extern struct inode *ialloc(struct inode *, umode_t); extern int jfs_fsync(struct file *, struct dentry *, int); -extern int jfs_ioctl(struct inode *, struct file *, - unsigned int, unsigned long); -extern void jfs_read_inode(struct inode *); +extern long jfs_ioctl(struct file *, unsigned int, unsigned long); +extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); +extern struct inode *jfs_iget(struct super_block *, unsigned long); extern int jfs_commit_inode(struct inode *, int); extern int jfs_write_inode(struct inode*, int); extern void jfs_delete_inode(struct inode *); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index f8718de3505e..0ba6778edaa2 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1462,12 +1462,10 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc } } - ip = iget(dip->i_sb, inum); - if (ip == NULL || is_bad_inode(ip)) { + ip = jfs_iget(dip->i_sb, inum); + if (IS_ERR(ip)) { jfs_err("jfs_lookup: iget failed on inum %d", (uint) inum); - if (ip) - iput(ip); - return ERR_PTR(-EACCES); + return ERR_CAST(ip); } dentry = d_splice_alias(ip, dentry); @@ -1485,12 +1483,11 @@ static struct inode *jfs_nfs_get_inode(struct super_block *sb, if (ino == 0) return ERR_PTR(-ESTALE); - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); + inode = jfs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + if (generation && inode->i_generation != generation) { iput(inode); return ERR_PTR(-ESTALE); } @@ -1521,17 +1518,14 @@ struct dentry *jfs_get_parent(struct dentry *dentry) parent_ino = le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot); - inode = iget(sb, parent_ino); - if (inode) { - if (is_bad_inode(inode)) { + inode = jfs_iget(sb, parent_ino); + if (IS_ERR(inode)) { + parent = ERR_CAST(inode); + } else { + parent = d_alloc_anon(inode); + if (!parent) { + parent = ERR_PTR(-ENOMEM); iput(inode); - parent = ERR_PTR(-EACCES); - } else { - parent = d_alloc_anon(inode); - if (!parent) { - parent = ERR_PTR(-ENOMEM); - iput(inode); - } } } @@ -1562,7 +1556,10 @@ const struct file_operations jfs_dir_operations = { .read = generic_read_dir, .readdir = jfs_readdir, .fsync = jfs_fsync, - .ioctl = jfs_ioctl, + .unlocked_ioctl = jfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = jfs_compat_ioctl, +#endif }; static int jfs_ci_hash(struct dentry *dir, struct qstr *this) diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 70a14001c98f..50ea65451732 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -414,7 +414,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode; int rc; s64 newLVSize = 0; - int flag; + int flag, ret = -EINVAL; jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags); @@ -461,8 +461,10 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) * Initialize direct-mapping inode/address-space */ inode = new_inode(sb); - if (inode == NULL) + if (inode == NULL) { + ret = -ENOMEM; goto out_kfree; + } inode->i_ino = 0; inode->i_nlink = 1; inode->i_size = sb->s_bdev->bd_inode->i_size; @@ -494,9 +496,11 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = JFS_SUPER_MAGIC; - inode = iget(sb, ROOT_I); - if (!inode || is_bad_inode(inode)) + inode = jfs_iget(sb, ROOT_I); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); goto out_no_root; + } sb->s_root = d_alloc_root(inode); if (!sb->s_root) goto out_no_root; @@ -536,7 +540,7 @@ out_kfree: if (sbi->nls_tab) unload_nls(sbi->nls_tab); kfree(sbi); - return -EINVAL; + return ret; } static void jfs_write_super_lockfs(struct super_block *sb) @@ -726,7 +730,6 @@ out: static const struct super_operations jfs_super_operations = { .alloc_inode = jfs_alloc_inode, .destroy_inode = jfs_destroy_inode, - .read_inode = jfs_read_inode, .dirty_inode = jfs_dirty_inode, .write_inode = jfs_write_inode, .delete_inode = jfs_delete_inode, diff --git a/fs/libfs.c b/fs/libfs.c index 5523bde96387..b004dfadd891 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -583,8 +583,8 @@ int simple_transaction_release(struct inode *inode, struct file *file) /* Simple attribute files */ struct simple_attr { - u64 (*get)(void *); - void (*set)(void *, u64); + int (*get)(void *, u64 *); + int (*set)(void *, u64); char get_buf[24]; /* enough to store a u64 and "\n\0" */ char set_buf[24]; void *data; @@ -595,7 +595,7 @@ struct simple_attr { /* simple_attr_open is called by an actual attribute open file operation * to set the attribute specific access operations. */ int simple_attr_open(struct inode *inode, struct file *file, - u64 (*get)(void *), void (*set)(void *, u64), + int (*get)(void *, u64 *), int (*set)(void *, u64), const char *fmt) { struct simple_attr *attr; @@ -615,7 +615,7 @@ int simple_attr_open(struct inode *inode, struct file *file, return nonseekable_open(inode, file); } -int simple_attr_close(struct inode *inode, struct file *file) +int simple_attr_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; @@ -634,15 +634,24 @@ ssize_t simple_attr_read(struct file *file, char __user *buf, if (!attr->get) return -EACCES; - mutex_lock(&attr->mutex); - if (*ppos) /* continued read */ + ret = mutex_lock_interruptible(&attr->mutex); + if (ret) + return ret; + + if (*ppos) { /* continued read */ size = strlen(attr->get_buf); - else /* first read */ + } else { /* first read */ + u64 val; + ret = attr->get(attr->data, &val); + if (ret) + goto out; + size = scnprintf(attr->get_buf, sizeof(attr->get_buf), - attr->fmt, - (unsigned long long)attr->get(attr->data)); + attr->fmt, (unsigned long long)val); + } ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); +out: mutex_unlock(&attr->mutex); return ret; } @@ -657,11 +666,13 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, ssize_t ret; attr = file->private_data; - if (!attr->set) return -EACCES; - mutex_lock(&attr->mutex); + ret = mutex_lock_interruptible(&attr->mutex); + if (ret) + return ret; + ret = -EFAULT; size = min(sizeof(attr->set_buf) - 1, len); if (copy_from_user(attr->set_buf, buf, size)) @@ -793,6 +804,6 @@ EXPORT_SYMBOL(simple_transaction_get); EXPORT_SYMBOL(simple_transaction_read); EXPORT_SYMBOL(simple_transaction_release); EXPORT_SYMBOL_GPL(simple_attr_open); -EXPORT_SYMBOL_GPL(simple_attr_close); +EXPORT_SYMBOL_GPL(simple_attr_release); EXPORT_SYMBOL_GPL(simple_attr_read); EXPORT_SYMBOL_GPL(simple_attr_write); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index ca6b16fc3101..f1ef49fff118 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -243,10 +243,18 @@ nlm_bind_host(struct nlm_host *host) .program = &nlm_program, .version = host->h_version, .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_HARDRTRY | + .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_AUTOBIND), }; + /* + * lockd retries server side blocks automatically so we want + * those to be soft RPC calls. Client side calls need to be + * hard RPC tasks. + */ + if (!host->h_server) + args.flags |= RPC_CLNT_CREATE_HARDRTRY; + clnt = rpc_create(&args); if (!IS_ERR(clnt)) host->h_rpcclnt = clnt; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 2f4d8fa66689..fe9bdb4a220c 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -763,11 +763,20 @@ callback: dprintk("lockd: GRANTing blocked lock.\n"); block->b_granted = 1; - /* Schedule next grant callback in 30 seconds */ - nlmsvc_insert_block(block, 30 * HZ); + /* keep block on the list, but don't reattempt until the RPC + * completes or the submission fails + */ + nlmsvc_insert_block(block, NLM_NEVER); + + /* Call the client -- use a soft RPC task since nlmsvc_retry_blocked + * will queue up a new one if this one times out + */ + error = nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, + &nlmsvc_grant_ops); - /* Call the client */ - nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops); + /* RPC submission failed, wait a bit and retry */ + if (error < 0) + nlmsvc_insert_block(block, 10 * HZ); } /* @@ -786,6 +795,17 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) dprintk("lockd: GRANT_MSG RPC callback\n"); + /* if the block is not on a list at this point then it has + * been invalidated. Don't try to requeue it. + * + * FIXME: it's possible that the block is removed from the list + * after this check but before the nlmsvc_insert_block. In that + * case it will be added back. Perhaps we need better locking + * for nlm_blocked? + */ + if (list_empty(&block->b_list)) + return; + /* Technically, we should down the file semaphore here. Since we * move the block towards the head of the queue only, no harm * can be done, though. */ diff --git a/fs/locks.c b/fs/locks.c index 49354b9c7dc1..f36f0e61558d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -658,8 +658,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) if (cfl) { __locks_copy_lock(fl, cfl); if (cfl->fl_nspid) - fl->fl_pid = pid_nr_ns(cfl->fl_nspid, - task_active_pid_ns(current)); + fl->fl_pid = pid_vnr(cfl->fl_nspid); } else fl->fl_type = F_UNLCK; unlock_kernel(); @@ -2084,7 +2083,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, unsigned int fl_pid; if (fl->fl_nspid) - fl_pid = pid_nr_ns(fl->fl_nspid, task_active_pid_ns(current)); + fl_pid = pid_vnr(fl->fl_nspid); else fl_pid = fl->fl_pid; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index bf4cd316af81..84f6242ba6fc 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -18,7 +18,6 @@ #include <linux/highuid.h> #include <linux/vfs.h> -static void minix_read_inode(struct inode * inode); static int minix_write_inode(struct inode * inode, int wait); static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); static int minix_remount (struct super_block * sb, int * flags, char * data); @@ -96,7 +95,6 @@ static void destroy_inodecache(void) static const struct super_operations minix_sops = { .alloc_inode = minix_alloc_inode, .destroy_inode = minix_destroy_inode, - .read_inode = minix_read_inode, .write_inode = minix_write_inode, .delete_inode = minix_delete_inode, .put_super = minix_put_super, @@ -149,6 +147,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) unsigned long i, block; struct inode *root_inode; struct minix_sb_info *sbi; + int ret = -EINVAL; sbi = kzalloc(sizeof(struct minix_sb_info), GFP_KERNEL); if (!sbi) @@ -246,10 +245,13 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) /* set up enough so that it can read an inode */ s->s_op = &minix_sops; - root_inode = iget(s, MINIX_ROOT_INO); - if (!root_inode || is_bad_inode(root_inode)) + root_inode = minix_iget(s, MINIX_ROOT_INO); + if (IS_ERR(root_inode)) { + ret = PTR_ERR(root_inode); goto out_no_root; + } + ret = -ENOMEM; s->s_root = d_alloc_root(root_inode); if (!s->s_root) goto out_iput; @@ -290,6 +292,7 @@ out_freemap: goto out_release; out_no_map: + ret = -ENOMEM; if (!silent) printk("MINIX-fs: can't allocate map\n"); goto out_release; @@ -316,7 +319,7 @@ out_bad_sb: out: s->s_fs_info = NULL; kfree(sbi); - return -EINVAL; + return ret; } static int minix_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -409,7 +412,7 @@ void minix_set_inode(struct inode *inode, dev_t rdev) /* * The minix V1 function to read an inode. */ -static void V1_minix_read_inode(struct inode * inode) +static struct inode *V1_minix_iget(struct inode *inode) { struct buffer_head * bh; struct minix_inode * raw_inode; @@ -418,8 +421,8 @@ static void V1_minix_read_inode(struct inode * inode) raw_inode = minix_V1_raw_inode(inode->i_sb, inode->i_ino, &bh); if (!raw_inode) { - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } inode->i_mode = raw_inode->i_mode; inode->i_uid = (uid_t)raw_inode->i_uid; @@ -435,12 +438,14 @@ static void V1_minix_read_inode(struct inode * inode) minix_inode->u.i1_data[i] = raw_inode->i_zone[i]; minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0])); brelse(bh); + unlock_new_inode(inode); + return inode; } /* * The minix V2 function to read an inode. */ -static void V2_minix_read_inode(struct inode * inode) +static struct inode *V2_minix_iget(struct inode *inode) { struct buffer_head * bh; struct minix2_inode * raw_inode; @@ -449,8 +454,8 @@ static void V2_minix_read_inode(struct inode * inode) raw_inode = minix_V2_raw_inode(inode->i_sb, inode->i_ino, &bh); if (!raw_inode) { - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } inode->i_mode = raw_inode->i_mode; inode->i_uid = (uid_t)raw_inode->i_uid; @@ -468,17 +473,27 @@ static void V2_minix_read_inode(struct inode * inode) minix_inode->u.i2_data[i] = raw_inode->i_zone[i]; minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0])); brelse(bh); + unlock_new_inode(inode); + return inode; } /* * The global function to read an inode. */ -static void minix_read_inode(struct inode * inode) +struct inode *minix_iget(struct super_block *sb, unsigned long ino) { + struct inode *inode; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + if (INODE_VERSION(inode) == MINIX_V1) - V1_minix_read_inode(inode); + return V1_minix_iget(inode); else - V2_minix_read_inode(inode); + return V2_minix_iget(inode); } /* diff --git a/fs/minix/minix.h b/fs/minix/minix.h index ac5d3a75cb0d..326edfe96108 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -45,6 +45,7 @@ struct minix_sb_info { unsigned short s_version; }; +extern struct inode *minix_iget(struct super_block *, unsigned long); extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **); extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **); extern struct inode * minix_new_inode(const struct inode * dir, int * error); diff --git a/fs/minix/namei.c b/fs/minix/namei.c index f4aa7a939040..102241bc9c79 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -54,10 +54,9 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st ino = minix_inode_by_name(dentry); if (ino) { - inode = iget(dir->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); + inode = minix_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); } d_add(dentry, inode); return NULL; diff --git a/fs/namei.c b/fs/namei.c index 73e2e665817a..941c8e8228c0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -106,7 +106,7 @@ * any extra contention... */ -static int fastcall link_path_walk(const char *name, struct nameidata *nd); +static int link_path_walk(const char *name, struct nameidata *nd); /* In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the @@ -231,7 +231,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) struct vfsmount *mnt = NULL; if (nd) - mnt = nd->mnt; + mnt = nd->path.mnt; if (mask & MAY_WRITE) { umode_t mode = inode->i_mode; @@ -296,7 +296,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) */ int vfs_permission(struct nameidata *nd, int mask) { - return permission(nd->dentry->d_inode, mask, nd); + return permission(nd->path.dentry->d_inode, mask, nd); } /** @@ -362,21 +362,31 @@ int deny_write_access(struct file * file) return 0; } -void path_release(struct nameidata *nd) +/** + * path_get - get a reference to a path + * @path: path to get the reference to + * + * Given a path increment the reference count to the dentry and the vfsmount. + */ +void path_get(struct path *path) { - dput(nd->dentry); - mntput(nd->mnt); + mntget(path->mnt); + dget(path->dentry); } +EXPORT_SYMBOL(path_get); -/* - * umount() mustn't call path_release()/mntput() as that would clear - * mnt_expiry_mark +/** + * path_put - put a reference to a path + * @path: path to put the reference to + * + * Given a path decrement the reference count to the dentry and the vfsmount. */ -void path_release_on_umount(struct nameidata *nd) +void path_put(struct path *path) { - dput(nd->dentry); - mntput_no_expire(nd->mnt); + dput(path->dentry); + mntput(path->mnt); } +EXPORT_SYMBOL(path_put); /** * release_open_intent - free up open intent resources @@ -539,16 +549,16 @@ walk_init_root(const char *name, struct nameidata *nd) struct fs_struct *fs = current->fs; read_lock(&fs->lock); - if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) { - nd->mnt = mntget(fs->altrootmnt); - nd->dentry = dget(fs->altroot); + if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) { + nd->path = fs->altroot; + path_get(&fs->altroot); read_unlock(&fs->lock); if (__emul_lookup_dentry(name,nd)) return 0; read_lock(&fs->lock); } - nd->mnt = mntget(fs->rootmnt); - nd->dentry = dget(fs->root); + nd->path = fs->root; + path_get(&fs->root); read_unlock(&fs->lock); return 1; } @@ -561,7 +571,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l goto fail; if (*link == '/') { - path_release(nd); + path_put(&nd->path); if (!walk_init_root(link, nd)) /* weird __emul_prefix() stuff did it */ goto out; @@ -577,31 +587,31 @@ out: */ name = __getname(); if (unlikely(!name)) { - path_release(nd); + path_put(&nd->path); return -ENOMEM; } strcpy(name, nd->last.name); nd->last.name = name; return 0; fail: - path_release(nd); + path_put(&nd->path); return PTR_ERR(link); } -static inline void dput_path(struct path *path, struct nameidata *nd) +static void path_put_conditional(struct path *path, struct nameidata *nd) { dput(path->dentry); - if (path->mnt != nd->mnt) + if (path->mnt != nd->path.mnt) mntput(path->mnt); } static inline void path_to_nameidata(struct path *path, struct nameidata *nd) { - dput(nd->dentry); - if (nd->mnt != path->mnt) - mntput(nd->mnt); - nd->mnt = path->mnt; - nd->dentry = path->dentry; + dput(nd->path.dentry); + if (nd->path.mnt != path->mnt) + mntput(nd->path.mnt); + nd->path.mnt = path->mnt; + nd->path.dentry = path->dentry; } static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd) @@ -613,7 +623,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata touch_atime(path->mnt, dentry); nd_set_link(nd, NULL); - if (path->mnt != nd->mnt) { + if (path->mnt != nd->path.mnt) { path_to_nameidata(path, nd); dget(dentry); } @@ -628,8 +638,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata if (dentry->d_inode->i_op->put_link) dentry->d_inode->i_op->put_link(dentry, nd, cookie); } - dput(dentry); - mntput(path->mnt); + path_put(path); return error; } @@ -661,8 +670,8 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd) nd->depth--; return err; loop: - dput_path(path, nd); - path_release(nd); + path_put_conditional(path, nd); + path_put(&nd->path); return err; } @@ -743,37 +752,37 @@ static __always_inline void follow_dotdot(struct nameidata *nd) while(1) { struct vfsmount *parent; - struct dentry *old = nd->dentry; + struct dentry *old = nd->path.dentry; read_lock(&fs->lock); - if (nd->dentry == fs->root && - nd->mnt == fs->rootmnt) { + if (nd->path.dentry == fs->root.dentry && + nd->path.mnt == fs->root.mnt) { read_unlock(&fs->lock); break; } read_unlock(&fs->lock); spin_lock(&dcache_lock); - if (nd->dentry != nd->mnt->mnt_root) { - nd->dentry = dget(nd->dentry->d_parent); + if (nd->path.dentry != nd->path.mnt->mnt_root) { + nd->path.dentry = dget(nd->path.dentry->d_parent); spin_unlock(&dcache_lock); dput(old); break; } spin_unlock(&dcache_lock); spin_lock(&vfsmount_lock); - parent = nd->mnt->mnt_parent; - if (parent == nd->mnt) { + parent = nd->path.mnt->mnt_parent; + if (parent == nd->path.mnt) { spin_unlock(&vfsmount_lock); break; } mntget(parent); - nd->dentry = dget(nd->mnt->mnt_mountpoint); + nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint); spin_unlock(&vfsmount_lock); dput(old); - mntput(nd->mnt); - nd->mnt = parent; + mntput(nd->path.mnt); + nd->path.mnt = parent; } - follow_mount(&nd->mnt, &nd->dentry); + follow_mount(&nd->path.mnt, &nd->path.dentry); } /* @@ -784,8 +793,8 @@ static __always_inline void follow_dotdot(struct nameidata *nd) static int do_lookup(struct nameidata *nd, struct qstr *name, struct path *path) { - struct vfsmount *mnt = nd->mnt; - struct dentry *dentry = __d_lookup(nd->dentry, name); + struct vfsmount *mnt = nd->path.mnt; + struct dentry *dentry = __d_lookup(nd->path.dentry, name); if (!dentry) goto need_lookup; @@ -798,7 +807,7 @@ done: return 0; need_lookup: - dentry = real_lookup(nd->dentry, name, nd); + dentry = real_lookup(nd->path.dentry, name, nd); if (IS_ERR(dentry)) goto fail; goto done; @@ -823,7 +832,7 @@ fail: * Returns 0 and nd will have valid dentry and mnt on success. * Returns error and drops reference to input namei data on failure. */ -static fastcall int __link_path_walk(const char * name, struct nameidata *nd) +static int __link_path_walk(const char *name, struct nameidata *nd) { struct path next; struct inode *inode; @@ -835,7 +844,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) if (!*name) goto return_reval; - inode = nd->dentry->d_inode; + inode = nd->path.dentry->d_inode; if (nd->depth) lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); @@ -883,7 +892,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) if (this.name[1] != '.') break; follow_dotdot(nd); - inode = nd->dentry->d_inode; + inode = nd->path.dentry->d_inode; /* fallthrough */ case 1: continue; @@ -892,8 +901,9 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) * See if the low-level filesystem might want * to use its own hash.. */ - if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { - err = nd->dentry->d_op->d_hash(nd->dentry, &this); + if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { + err = nd->path.dentry->d_op->d_hash(nd->path.dentry, + &this); if (err < 0) break; } @@ -915,7 +925,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) if (err) goto return_err; err = -ENOENT; - inode = nd->dentry->d_inode; + inode = nd->path.dentry->d_inode; if (!inode) break; err = -ENOTDIR; @@ -943,13 +953,14 @@ last_component: if (this.name[1] != '.') break; follow_dotdot(nd); - inode = nd->dentry->d_inode; + inode = nd->path.dentry->d_inode; /* fallthrough */ case 1: goto return_reval; } - if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { - err = nd->dentry->d_op->d_hash(nd->dentry, &this); + if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { + err = nd->path.dentry->d_op->d_hash(nd->path.dentry, + &this); if (err < 0) break; } @@ -962,7 +973,7 @@ last_component: err = do_follow_link(&next, nd); if (err) goto return_err; - inode = nd->dentry->d_inode; + inode = nd->path.dentry->d_inode; } else path_to_nameidata(&next, nd); err = -ENOENT; @@ -990,20 +1001,21 @@ return_reval: * We bypassed the ordinary revalidation routines. * We may need to check the cached dentry for staleness. */ - if (nd->dentry && nd->dentry->d_sb && - (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { + if (nd->path.dentry && nd->path.dentry->d_sb && + (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { err = -ESTALE; /* Note: we do not d_invalidate() */ - if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd)) + if (!nd->path.dentry->d_op->d_revalidate( + nd->path.dentry, nd)) break; } return_base: return 0; out_dput: - dput_path(&next, nd); + path_put_conditional(&next, nd); break; } - path_release(nd); + path_put(&nd->path); return_err: return err; } @@ -1015,31 +1027,30 @@ return_err: * Retry the whole path once, forcing real lookup requests * instead of relying on the dcache. */ -static int fastcall link_path_walk(const char *name, struct nameidata *nd) +static int link_path_walk(const char *name, struct nameidata *nd) { struct nameidata save = *nd; int result; /* make sure the stuff we saved doesn't go away */ - dget(save.dentry); - mntget(save.mnt); + dget(save.path.dentry); + mntget(save.path.mnt); result = __link_path_walk(name, nd); if (result == -ESTALE) { *nd = save; - dget(nd->dentry); - mntget(nd->mnt); + dget(nd->path.dentry); + mntget(nd->path.mnt); nd->flags |= LOOKUP_REVAL; result = __link_path_walk(name, nd); } - dput(save.dentry); - mntput(save.mnt); + path_put(&save.path); return result; } -static int fastcall path_walk(const char * name, struct nameidata *nd) +static int path_walk(const char *name, struct nameidata *nd) { current->total_link_count = 0; return link_path_walk(name, nd); @@ -1054,9 +1065,9 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd) if (path_walk(name, nd)) return 0; /* something went wrong... */ - if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) { - struct dentry *old_dentry = nd->dentry; - struct vfsmount *old_mnt = nd->mnt; + if (!nd->path.dentry->d_inode || + S_ISDIR(nd->path.dentry->d_inode->i_mode)) { + struct path old_path = nd->path; struct qstr last = nd->last; int last_type = nd->last_type; struct fs_struct *fs = current->fs; @@ -1067,19 +1078,17 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd) */ nd->last_type = LAST_ROOT; read_lock(&fs->lock); - nd->mnt = mntget(fs->rootmnt); - nd->dentry = dget(fs->root); + nd->path = fs->root; + path_get(&fs->root); read_unlock(&fs->lock); if (path_walk(name, nd) == 0) { - if (nd->dentry->d_inode) { - dput(old_dentry); - mntput(old_mnt); + if (nd->path.dentry->d_inode) { + path_put(&old_path); return 1; } - path_release(nd); + path_put(&nd->path); } - nd->dentry = old_dentry; - nd->mnt = old_mnt; + nd->path = old_path; nd->last = last; nd->last_type = last_type; } @@ -1090,33 +1099,26 @@ void set_fs_altroot(void) { char *emul = __emul_prefix(); struct nameidata nd; - struct vfsmount *mnt = NULL, *oldmnt; - struct dentry *dentry = NULL, *olddentry; + struct path path = {}, old_path; int err; struct fs_struct *fs = current->fs; if (!emul) goto set_it; err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd); - if (!err) { - mnt = nd.mnt; - dentry = nd.dentry; - } + if (!err) + path = nd.path; set_it: write_lock(&fs->lock); - oldmnt = fs->altrootmnt; - olddentry = fs->altroot; - fs->altrootmnt = mnt; - fs->altroot = dentry; + old_path = fs->altroot; + fs->altroot = path; write_unlock(&fs->lock); - if (olddentry) { - dput(olddentry); - mntput(oldmnt); - } + if (old_path.dentry) + path_put(&old_path); } /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ -static int fastcall do_path_lookup(int dfd, const char *name, +static int do_path_lookup(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval = 0; @@ -1130,21 +1132,21 @@ static int fastcall do_path_lookup(int dfd, const char *name, if (*name=='/') { read_lock(&fs->lock); - if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) { - nd->mnt = mntget(fs->altrootmnt); - nd->dentry = dget(fs->altroot); + if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) { + nd->path = fs->altroot; + path_get(&fs->altroot); read_unlock(&fs->lock); if (__emul_lookup_dentry(name,nd)) goto out; /* found in altroot */ read_lock(&fs->lock); } - nd->mnt = mntget(fs->rootmnt); - nd->dentry = dget(fs->root); + nd->path = fs->root; + path_get(&fs->root); read_unlock(&fs->lock); } else if (dfd == AT_FDCWD) { read_lock(&fs->lock); - nd->mnt = mntget(fs->pwdmnt); - nd->dentry = dget(fs->pwd); + nd->path = fs->pwd; + path_get(&fs->pwd); read_unlock(&fs->lock); } else { struct dentry *dentry; @@ -1164,17 +1166,17 @@ static int fastcall do_path_lookup(int dfd, const char *name, if (retval) goto fput_fail; - nd->mnt = mntget(file->f_path.mnt); - nd->dentry = dget(dentry); + nd->path = file->f_path; + path_get(&file->f_path); fput_light(file, fput_needed); } retval = path_walk(name, nd); out: - if (unlikely(!retval && !audit_dummy_context() && nd->dentry && - nd->dentry->d_inode)) - audit_inode(name, nd->dentry); + if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && + nd->path.dentry->d_inode)) + audit_inode(name, nd->path.dentry); out_fail: return retval; @@ -1183,7 +1185,7 @@ fput_fail: goto out_fail; } -int fastcall path_lookup(const char *name, unsigned int flags, +int path_lookup(const char *name, unsigned int flags, struct nameidata *nd) { return do_path_lookup(AT_FDCWD, name, flags, nd); @@ -1208,13 +1210,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, nd->flags = flags; nd->depth = 0; - nd->mnt = mntget(mnt); - nd->dentry = dget(dentry); + nd->path.mnt = mntget(mnt); + nd->path.dentry = dget(dentry); retval = path_walk(name, nd); - if (unlikely(!retval && !audit_dummy_context() && nd->dentry && - nd->dentry->d_inode)) - audit_inode(name, nd->dentry); + if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && + nd->path.dentry->d_inode)) + audit_inode(name, nd->path.dentry); return retval; @@ -1236,7 +1238,7 @@ static int __path_lookup_intent_open(int dfd, const char *name, if (IS_ERR(nd->intent.open.file)) { if (err == 0) { err = PTR_ERR(nd->intent.open.file); - path_release(nd); + path_put(&nd->path); } } else if (err != 0) release_open_intent(nd); @@ -1333,10 +1335,10 @@ static struct dentry *lookup_hash(struct nameidata *nd) { int err; - err = permission(nd->dentry->d_inode, MAY_EXEC, nd); + err = permission(nd->path.dentry->d_inode, MAY_EXEC, nd); if (err) return ERR_PTR(err); - return __lookup_hash(&nd->last, nd->dentry, nd); + return __lookup_hash(&nd->last, nd->path.dentry, nd); } static int __lookup_one_len(const char *name, struct qstr *this, @@ -1409,7 +1411,7 @@ struct dentry *lookup_one_noperm(const char *name, struct dentry *base) return __lookup_hash(&this, base, NULL); } -int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags, +int __user_walk_fd(int dfd, const char __user *name, unsigned flags, struct nameidata *nd) { char *tmp = getname(name); @@ -1422,7 +1424,7 @@ int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags, return err; } -int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) +int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) { return __user_walk_fd(AT_FDCWD, name, flags, nd); } @@ -1595,7 +1597,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, int may_open(struct nameidata *nd, int acc_mode, int flag) { - struct dentry *dentry = nd->dentry; + struct dentry *dentry = nd->path.dentry; struct inode *inode = dentry->d_inode; int error; @@ -1616,7 +1618,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { flag &= ~O_TRUNC; } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { - if (nd->mnt->mnt_flags & MNT_NODEV) + if (nd->path.mnt->mnt_flags & MNT_NODEV) return -EACCES; flag &= ~O_TRUNC; @@ -1678,14 +1680,14 @@ static int open_namei_create(struct nameidata *nd, struct path *path, int flag, int mode) { int error; - struct dentry *dir = nd->dentry; + struct dentry *dir = nd->path.dentry; if (!IS_POSIXACL(dir->d_inode)) mode &= ~current->fs->umask; error = vfs_create(dir->d_inode, path->dentry, mode, nd); mutex_unlock(&dir->d_inode->i_mutex); - dput(nd->dentry); - nd->dentry = path->dentry; + dput(nd->path.dentry); + nd->path.dentry = path->dentry; if (error) return error; /* Don't check for write permission, don't truncate */ @@ -1752,11 +1754,11 @@ int open_namei(int dfd, const char *pathname, int flag, if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) goto exit; - dir = nd->dentry; + dir = nd->path.dentry; nd->flags &= ~LOOKUP_PARENT; mutex_lock(&dir->d_inode->i_mutex); path.dentry = lookup_hash(nd); - path.mnt = nd->mnt; + path.mnt = nd->path.mnt; do_last: error = PTR_ERR(path.dentry); @@ -1812,11 +1814,11 @@ ok: return 0; exit_dput: - dput_path(&path, nd); + path_put_conditional(&path, nd); exit: if (!IS_ERR(nd->intent.open.file)) release_open_intent(nd); - path_release(nd); + path_put(&nd->path); return error; do_link: @@ -1861,10 +1863,10 @@ do_link: __putname(nd->last.name); goto exit; } - dir = nd->dentry; + dir = nd->path.dentry; mutex_lock(&dir->d_inode->i_mutex); path.dentry = lookup_hash(nd); - path.mnt = nd->mnt; + path.mnt = nd->path.mnt; __putname(nd->last.name); goto do_last; } @@ -1877,13 +1879,13 @@ do_link: * Simple function to lookup and return a dentry and create it * if it doesn't exist. Is SMP-safe. * - * Returns with nd->dentry->d_inode->i_mutex locked. + * Returns with nd->path.dentry->d_inode->i_mutex locked. */ struct dentry *lookup_create(struct nameidata *nd, int is_dir) { struct dentry *dentry = ERR_PTR(-EEXIST); - mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&nd->path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); /* * Yucky last component or no last component at all? * (foo/., foo/.., /////) @@ -1962,19 +1964,19 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, dentry = lookup_create(&nd, 0); error = PTR_ERR(dentry); - if (!IS_POSIXACL(nd.dentry->d_inode)) + if (!IS_POSIXACL(nd.path.dentry->d_inode)) mode &= ~current->fs->umask; if (!IS_ERR(dentry)) { switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); + error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); break; case S_IFCHR: case S_IFBLK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode, + error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode, new_decode_dev(dev)); break; case S_IFIFO: case S_IFSOCK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); + error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); break; case S_IFDIR: error = -EPERM; @@ -1984,8 +1986,8 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, } dput(dentry); } - mutex_unlock(&nd.dentry->d_inode->i_mutex); - path_release(&nd); + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); out: putname(tmp); @@ -2039,13 +2041,13 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) if (IS_ERR(dentry)) goto out_unlock; - if (!IS_POSIXACL(nd.dentry->d_inode)) + if (!IS_POSIXACL(nd.path.dentry->d_inode)) mode &= ~current->fs->umask; - error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); + error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); dput(dentry); out_unlock: - mutex_unlock(&nd.dentry->d_inode->i_mutex); - path_release(&nd); + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); out: putname(tmp); out_err: @@ -2143,17 +2145,17 @@ static long do_rmdir(int dfd, const char __user *pathname) error = -EBUSY; goto exit1; } - mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit2; - error = vfs_rmdir(nd.dentry->d_inode, dentry); + error = vfs_rmdir(nd.path.dentry->d_inode, dentry); dput(dentry); exit2: - mutex_unlock(&nd.dentry->d_inode->i_mutex); + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); exit1: - path_release(&nd); + path_put(&nd.path); exit: putname(name); return error; @@ -2188,6 +2190,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { + fsnotify_link_count(dentry->d_inode); d_delete(dentry); } @@ -2218,7 +2221,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; - mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { @@ -2228,15 +2231,15 @@ static long do_unlinkat(int dfd, const char __user *pathname) inode = dentry->d_inode; if (inode) atomic_inc(&inode->i_count); - error = vfs_unlink(nd.dentry->d_inode, dentry); + error = vfs_unlink(nd.path.dentry->d_inode, dentry); exit2: dput(dentry); } - mutex_unlock(&nd.dentry->d_inode->i_mutex); + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ exit1: - path_release(&nd); + path_put(&nd.path); exit: putname(name); return error; @@ -2309,11 +2312,11 @@ asmlinkage long sys_symlinkat(const char __user *oldname, if (IS_ERR(dentry)) goto out_unlock; - error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); + error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); dput(dentry); out_unlock: - mutex_unlock(&nd.dentry->d_inode->i_mutex); - path_release(&nd); + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); out: putname(to); out_putname: @@ -2360,7 +2363,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de error = dir->i_op->link(old_dentry, dir, new_dentry); mutex_unlock(&old_dentry->d_inode->i_mutex); if (!error) - fsnotify_create(dir, new_dentry); + fsnotify_link(dir, old_dentry->d_inode, new_dentry); return error; } @@ -2398,20 +2401,20 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, if (error) goto out; error = -EXDEV; - if (old_nd.mnt != nd.mnt) + if (old_nd.path.mnt != nd.path.mnt) goto out_release; new_dentry = lookup_create(&nd, 0); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto out_unlock; - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); + error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); dput(new_dentry); out_unlock: - mutex_unlock(&nd.dentry->d_inode->i_mutex); + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); out_release: - path_release(&nd); + path_put(&nd.path); out: - path_release(&old_nd); + path_put(&old_nd.path); exit: putname(to); @@ -2587,15 +2590,15 @@ static int do_rename(int olddfd, const char *oldname, goto exit1; error = -EXDEV; - if (oldnd.mnt != newnd.mnt) + if (oldnd.path.mnt != newnd.path.mnt) goto exit2; - old_dir = oldnd.dentry; + old_dir = oldnd.path.dentry; error = -EBUSY; if (oldnd.last_type != LAST_NORM) goto exit2; - new_dir = newnd.dentry; + new_dir = newnd.path.dentry; if (newnd.last_type != LAST_NORM) goto exit2; @@ -2639,9 +2642,9 @@ exit4: exit3: unlock_rename(new_dir, old_dir); exit2: - path_release(&newnd); + path_put(&newnd.path); exit1: - path_release(&oldnd); + path_put(&oldnd.path); exit: return error; } @@ -2815,7 +2818,6 @@ EXPORT_SYMBOL(page_symlink); EXPORT_SYMBOL(page_symlink_inode_operations); EXPORT_SYMBOL(path_lookup); EXPORT_SYMBOL(vfs_path_lookup); -EXPORT_SYMBOL(path_release); EXPORT_SYMBOL(permission); EXPORT_SYMBOL(vfs_permission); EXPORT_SYMBOL(file_permission); diff --git a/fs/namespace.c b/fs/namespace.c index 61bf376e29e8..7953c96a2071 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -25,18 +25,21 @@ #include <linux/security.h> #include <linux/mount.h> #include <linux/ramfs.h> +#include <linux/log2.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include "pnode.h" #include "internal.h" +#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) +#define HASH_SIZE (1UL << HASH_SHIFT) + /* spinlock for vfsmount related operations, inplace of dcache_lock */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); static int event; static struct list_head *mount_hashtable __read_mostly; -static int hash_mask __read_mostly, hash_bits __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; static struct rw_semaphore namespace_sem; @@ -48,8 +51,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); tmp += ((unsigned long)dentry / L1_CACHE_BYTES); - tmp = tmp + (tmp >> hash_bits); - return tmp & hash_mask; + tmp = tmp + (tmp >> HASH_SHIFT); + return tmp & (HASH_SIZE - 1); } struct vfsmount *alloc_vfsmnt(const char *name) @@ -154,13 +157,13 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) { - old_nd->dentry = mnt->mnt_mountpoint; - old_nd->mnt = mnt->mnt_parent; + old_nd->path.dentry = mnt->mnt_mountpoint; + old_nd->path.mnt = mnt->mnt_parent; mnt->mnt_parent = mnt; mnt->mnt_mountpoint = mnt->mnt_root; list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_hash); - old_nd->dentry->d_mounted--; + old_nd->path.dentry->d_mounted--; } void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, @@ -173,10 +176,10 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) { - mnt_set_mountpoint(nd->mnt, nd->dentry, mnt); + mnt_set_mountpoint(nd->path.mnt, nd->path.dentry, mnt); list_add_tail(&mnt->mnt_hash, mount_hashtable + - hash(nd->mnt, nd->dentry)); - list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); + hash(nd->path.mnt, nd->path.dentry)); + list_add_tail(&mnt->mnt_child, &nd->path.mnt->mnt_mounts); } /* @@ -317,6 +320,50 @@ void mnt_unpin(struct vfsmount *mnt) EXPORT_SYMBOL(mnt_unpin); +static inline void mangle(struct seq_file *m, const char *s) +{ + seq_escape(m, s, " \t\n\\"); +} + +/* + * Simple .show_options callback for filesystems which don't want to + * implement more complex mount option showing. + * + * See also save_mount_options(). + */ +int generic_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + const char *options = mnt->mnt_sb->s_options; + + if (options != NULL && options[0]) { + seq_putc(m, ','); + mangle(m, options); + } + + return 0; +} +EXPORT_SYMBOL(generic_show_options); + +/* + * If filesystem uses generic_show_options(), this function should be + * called from the fill_super() callback. + * + * The .remount_fs callback usually needs to be handled in a special + * way, to make sure, that previous options are not overwritten if the + * remount fails. + * + * Also note, that if the filesystem's .remount_fs function doesn't + * reset all options to their default value, but changes only newly + * given options, then the displayed options will not reflect reality + * any more. + */ +void save_mount_options(struct super_block *sb, char *options) +{ + kfree(sb->s_options); + sb->s_options = kstrdup(options, GFP_KERNEL); +} +EXPORT_SYMBOL(save_mount_options); + /* iterator */ static void *m_start(struct seq_file *m, loff_t *pos) { @@ -338,11 +385,6 @@ static void m_stop(struct seq_file *m, void *v) up_read(&namespace_sem); } -static inline void mangle(struct seq_file *m, const char *s) -{ - seq_escape(m, s, " \t\n\\"); -} - static int show_vfsmnt(struct seq_file *m, void *v) { struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); @@ -366,10 +408,11 @@ static int show_vfsmnt(struct seq_file *m, void *v) { 0, NULL } }; struct proc_fs_info *fs_infop; + struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); seq_putc(m, ' '); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_path(m, &mnt_path, " \t\n\\"); seq_putc(m, ' '); mangle(m, mnt->mnt_sb->s_type->name); if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) { @@ -401,6 +444,7 @@ struct seq_operations mounts_op = { static int show_vfsstat(struct seq_file *m, void *v) { struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); + struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; int err = 0; /* device */ @@ -412,7 +456,7 @@ static int show_vfsstat(struct seq_file *m, void *v) /* mount point */ seq_puts(m, " mounted on "); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_path(m, &mnt_path, " \t\n\\"); seq_putc(m, ' '); /* file system type */ @@ -551,7 +595,7 @@ static int do_umount(struct vfsmount *mnt, int flags) * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] */ if (flags & MNT_EXPIRE) { - if (mnt == current->fs->rootmnt || + if (mnt == current->fs->root.mnt || flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; @@ -586,7 +630,7 @@ static int do_umount(struct vfsmount *mnt, int flags) * /reboot - static binary that would close all descriptors and * call reboot(9). Then init(8) could umount root and exec /reboot. */ - if (mnt == current->fs->rootmnt && !(flags & MNT_DETACH)) { + if (mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) { /* * Special case for "unmounting" root ... * we just try to remount it readonly. @@ -637,18 +681,20 @@ asmlinkage long sys_umount(char __user * name, int flags) if (retval) goto out; retval = -EINVAL; - if (nd.dentry != nd.mnt->mnt_root) + if (nd.path.dentry != nd.path.mnt->mnt_root) goto dput_and_out; - if (!check_mnt(nd.mnt)) + if (!check_mnt(nd.path.mnt)) goto dput_and_out; retval = -EPERM; if (!capable(CAP_SYS_ADMIN)) goto dput_and_out; - retval = do_umount(nd.mnt, flags); + retval = do_umount(nd.path.mnt, flags); dput_and_out: - path_release_on_umount(&nd); + /* we mustn't call path_put() as that would clear mnt_expiry_mark */ + dput(nd.path.dentry); + mntput_no_expire(nd.path.mnt); out: return retval; } @@ -671,10 +717,10 @@ static int mount_is_safe(struct nameidata *nd) return 0; return -EPERM; #ifdef notyet - if (S_ISLNK(nd->dentry->d_inode->i_mode)) + if (S_ISLNK(nd->path.dentry->d_inode->i_mode)) return -EPERM; - if (nd->dentry->d_inode->i_mode & S_ISVTX) { - if (current->uid != nd->dentry->d_inode->i_uid) + if (nd->path.dentry->d_inode->i_mode & S_ISVTX) { + if (current->uid != nd->path.dentry->d_inode->i_uid) return -EPERM; } if (vfs_permission(nd, MAY_WRITE)) @@ -723,8 +769,8 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, q = q->mnt_parent; } p = s; - nd.mnt = q; - nd.dentry = p->mnt_mountpoint; + nd.path.mnt = q; + nd.path.dentry = p->mnt_mountpoint; q = clone_mnt(p, p->mnt_root, flag); if (!q) goto Enomem; @@ -833,8 +879,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, struct nameidata *nd, struct nameidata *parent_nd) { LIST_HEAD(tree_list); - struct vfsmount *dest_mnt = nd->mnt; - struct dentry *dest_dentry = nd->dentry; + struct vfsmount *dest_mnt = nd->path.mnt; + struct dentry *dest_dentry = nd->path.dentry; struct vfsmount *child, *p; if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) @@ -869,13 +915,13 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) if (mnt->mnt_sb->s_flags & MS_NOUSER) return -EINVAL; - if (S_ISDIR(nd->dentry->d_inode->i_mode) != + if (S_ISDIR(nd->path.dentry->d_inode->i_mode) != S_ISDIR(mnt->mnt_root->d_inode->i_mode)) return -ENOTDIR; err = -ENOENT; - mutex_lock(&nd->dentry->d_inode->i_mutex); - if (IS_DEADDIR(nd->dentry->d_inode)) + mutex_lock(&nd->path.dentry->d_inode->i_mutex); + if (IS_DEADDIR(nd->path.dentry->d_inode)) goto out_unlock; err = security_sb_check_sb(mnt, nd); @@ -883,10 +929,10 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) goto out_unlock; err = -ENOENT; - if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) + if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry)) err = attach_recursive_mnt(mnt, nd, NULL); out_unlock: - mutex_unlock(&nd->dentry->d_inode->i_mutex); + mutex_unlock(&nd->path.dentry->d_inode->i_mutex); if (!err) security_sb_post_addmount(mnt, nd); return err; @@ -894,17 +940,18 @@ out_unlock: /* * recursively change the type of the mountpoint. + * noinline this do_mount helper to save do_mount stack space. */ -static int do_change_type(struct nameidata *nd, int flag) +static noinline int do_change_type(struct nameidata *nd, int flag) { - struct vfsmount *m, *mnt = nd->mnt; + struct vfsmount *m, *mnt = nd->path.mnt; int recurse = flag & MS_REC; int type = flag & ~MS_REC; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (nd->dentry != nd->mnt->mnt_root) + if (nd->path.dentry != nd->path.mnt->mnt_root) return -EINVAL; down_write(&namespace_sem); @@ -918,8 +965,10 @@ static int do_change_type(struct nameidata *nd, int flag) /* * do loopback mount. + * noinline this do_mount helper to save do_mount stack space. */ -static int do_loopback(struct nameidata *nd, char *old_name, int recurse) +static noinline int do_loopback(struct nameidata *nd, char *old_name, + int recurse) { struct nameidata old_nd; struct vfsmount *mnt = NULL; @@ -934,17 +983,17 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) down_write(&namespace_sem); err = -EINVAL; - if (IS_MNT_UNBINDABLE(old_nd.mnt)) - goto out; + if (IS_MNT_UNBINDABLE(old_nd.path.mnt)) + goto out; - if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt)) + if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt)) goto out; err = -ENOMEM; if (recurse) - mnt = copy_tree(old_nd.mnt, old_nd.dentry, 0); + mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, 0); else - mnt = clone_mnt(old_nd.mnt, old_nd.dentry, 0); + mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, 0); if (!mnt) goto out; @@ -960,7 +1009,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) out: up_write(&namespace_sem); - path_release(&old_nd); + path_put(&old_nd.path); return err; } @@ -968,29 +1017,30 @@ out: * change filesystem flags. dir should be a physical root of filesystem. * If you've mounted a non-root directory somewhere and want to do remount * on it - tough luck. + * noinline this do_mount helper to save do_mount stack space. */ -static int do_remount(struct nameidata *nd, int flags, int mnt_flags, +static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags, void *data) { int err; - struct super_block *sb = nd->mnt->mnt_sb; + struct super_block *sb = nd->path.mnt->mnt_sb; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!check_mnt(nd->mnt)) + if (!check_mnt(nd->path.mnt)) return -EINVAL; - if (nd->dentry != nd->mnt->mnt_root) + if (nd->path.dentry != nd->path.mnt->mnt_root) return -EINVAL; down_write(&sb->s_umount); err = do_remount_sb(sb, flags, data, 0); if (!err) - nd->mnt->mnt_flags = mnt_flags; + nd->path.mnt->mnt_flags = mnt_flags; up_write(&sb->s_umount); if (!err) - security_sb_post_remount(nd->mnt, flags, data); + security_sb_post_remount(nd->path.mnt, flags, data); return err; } @@ -1004,7 +1054,10 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt) return 0; } -static int do_move_mount(struct nameidata *nd, char *old_name) +/* + * noinline this do_mount helper to save do_mount stack space. + */ +static noinline int do_move_mount(struct nameidata *nd, char *old_name) { struct nameidata old_nd, parent_nd; struct vfsmount *p; @@ -1018,69 +1071,74 @@ static int do_move_mount(struct nameidata *nd, char *old_name) return err; down_write(&namespace_sem); - while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) + while (d_mountpoint(nd->path.dentry) && + follow_down(&nd->path.mnt, &nd->path.dentry)) ; err = -EINVAL; - if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt)) + if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt)) goto out; err = -ENOENT; - mutex_lock(&nd->dentry->d_inode->i_mutex); - if (IS_DEADDIR(nd->dentry->d_inode)) + mutex_lock(&nd->path.dentry->d_inode->i_mutex); + if (IS_DEADDIR(nd->path.dentry->d_inode)) goto out1; - if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) + if (!IS_ROOT(nd->path.dentry) && d_unhashed(nd->path.dentry)) goto out1; err = -EINVAL; - if (old_nd.dentry != old_nd.mnt->mnt_root) + if (old_nd.path.dentry != old_nd.path.mnt->mnt_root) goto out1; - if (old_nd.mnt == old_nd.mnt->mnt_parent) + if (old_nd.path.mnt == old_nd.path.mnt->mnt_parent) goto out1; - if (S_ISDIR(nd->dentry->d_inode->i_mode) != - S_ISDIR(old_nd.dentry->d_inode->i_mode)) + if (S_ISDIR(nd->path.dentry->d_inode->i_mode) != + S_ISDIR(old_nd.path.dentry->d_inode->i_mode)) goto out1; /* * Don't move a mount residing in a shared parent. */ - if (old_nd.mnt->mnt_parent && IS_MNT_SHARED(old_nd.mnt->mnt_parent)) + if (old_nd.path.mnt->mnt_parent && + IS_MNT_SHARED(old_nd.path.mnt->mnt_parent)) goto out1; /* * Don't move a mount tree containing unbindable mounts to a destination * mount which is shared. */ - if (IS_MNT_SHARED(nd->mnt) && tree_contains_unbindable(old_nd.mnt)) + if (IS_MNT_SHARED(nd->path.mnt) && + tree_contains_unbindable(old_nd.path.mnt)) goto out1; err = -ELOOP; - for (p = nd->mnt; p->mnt_parent != p; p = p->mnt_parent) - if (p == old_nd.mnt) + for (p = nd->path.mnt; p->mnt_parent != p; p = p->mnt_parent) + if (p == old_nd.path.mnt) goto out1; - if ((err = attach_recursive_mnt(old_nd.mnt, nd, &parent_nd))) + err = attach_recursive_mnt(old_nd.path.mnt, nd, &parent_nd); + if (err) goto out1; spin_lock(&vfsmount_lock); /* if the mount is moved, it should no longer be expire * automatically */ - list_del_init(&old_nd.mnt->mnt_expire); + list_del_init(&old_nd.path.mnt->mnt_expire); spin_unlock(&vfsmount_lock); out1: - mutex_unlock(&nd->dentry->d_inode->i_mutex); + mutex_unlock(&nd->path.dentry->d_inode->i_mutex); out: up_write(&namespace_sem); if (!err) - path_release(&parent_nd); - path_release(&old_nd); + path_put(&parent_nd.path); + path_put(&old_nd.path); return err; } /* * create a new mount for userspace and request it to be added into the * namespace's tree + * noinline this do_mount helper to save do_mount stack space. */ -static int do_new_mount(struct nameidata *nd, char *type, int flags, +static noinline int do_new_mount(struct nameidata *nd, char *type, int flags, int mnt_flags, char *name, void *data) { struct vfsmount *mnt; @@ -1110,16 +1168,17 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, down_write(&namespace_sem); /* Something was mounted here while we slept */ - while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) + while (d_mountpoint(nd->path.dentry) && + follow_down(&nd->path.mnt, &nd->path.dentry)) ; err = -EINVAL; - if (!check_mnt(nd->mnt)) + if (!check_mnt(nd->path.mnt)) goto unlock; /* Refuse the same filesystem on the same mount point */ err = -EBUSY; - if (nd->mnt->mnt_sb == newmnt->mnt_sb && - nd->mnt->mnt_root == nd->dentry) + if (nd->path.mnt->mnt_sb == newmnt->mnt_sb && + nd->path.mnt->mnt_root == nd->path.dentry) goto unlock; err = -EINVAL; @@ -1455,7 +1514,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, retval = do_new_mount(&nd, type_page, flags, mnt_flags, dev_name, data_page); dput_out: - path_release(&nd); + path_put(&nd.path); return retval; } @@ -1502,17 +1561,17 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, while (p) { q->mnt_ns = new_ns; if (fs) { - if (p == fs->rootmnt) { + if (p == fs->root.mnt) { rootmnt = p; - fs->rootmnt = mntget(q); + fs->root.mnt = mntget(q); } - if (p == fs->pwdmnt) { + if (p == fs->pwd.mnt) { pwdmnt = p; - fs->pwdmnt = mntget(q); + fs->pwd.mnt = mntget(q); } - if (p == fs->altrootmnt) { + if (p == fs->altroot.mnt) { altrootmnt = p; - fs->altrootmnt = mntget(q); + fs->altroot.mnt = mntget(q); } } p = next_mnt(p, mnt_ns->root); @@ -1593,44 +1652,35 @@ out1: * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. * It can block. Requires the big lock held. */ -void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt, - struct dentry *dentry) +void set_fs_root(struct fs_struct *fs, struct path *path) { - struct dentry *old_root; - struct vfsmount *old_rootmnt; + struct path old_root; + write_lock(&fs->lock); old_root = fs->root; - old_rootmnt = fs->rootmnt; - fs->rootmnt = mntget(mnt); - fs->root = dget(dentry); + fs->root = *path; + path_get(path); write_unlock(&fs->lock); - if (old_root) { - dput(old_root); - mntput(old_rootmnt); - } + if (old_root.dentry) + path_put(&old_root); } /* * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. * It can block. Requires the big lock held. */ -void set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, - struct dentry *dentry) +void set_fs_pwd(struct fs_struct *fs, struct path *path) { - struct dentry *old_pwd; - struct vfsmount *old_pwdmnt; + struct path old_pwd; write_lock(&fs->lock); old_pwd = fs->pwd; - old_pwdmnt = fs->pwdmnt; - fs->pwdmnt = mntget(mnt); - fs->pwd = dget(dentry); + fs->pwd = *path; + path_get(path); write_unlock(&fs->lock); - if (old_pwd) { - dput(old_pwd); - mntput(old_pwdmnt); - } + if (old_pwd.dentry) + path_put(&old_pwd); } static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) @@ -1645,12 +1695,12 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) if (fs) { atomic_inc(&fs->count); task_unlock(p); - if (fs->root == old_nd->dentry - && fs->rootmnt == old_nd->mnt) - set_fs_root(fs, new_nd->mnt, new_nd->dentry); - if (fs->pwd == old_nd->dentry - && fs->pwdmnt == old_nd->mnt) - set_fs_pwd(fs, new_nd->mnt, new_nd->dentry); + if (fs->root.dentry == old_nd->path.dentry + && fs->root.mnt == old_nd->path.mnt) + set_fs_root(fs, &new_nd->path); + if (fs->pwd.dentry == old_nd->path.dentry + && fs->pwd.mnt == old_nd->path.mnt) + set_fs_pwd(fs, &new_nd->path); put_fs_struct(fs); } else task_unlock(p); @@ -1700,7 +1750,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root, if (error) goto out0; error = -EINVAL; - if (!check_mnt(new_nd.mnt)) + if (!check_mnt(new_nd.path.mnt)) goto out1; error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd); @@ -1709,74 +1759,78 @@ asmlinkage long sys_pivot_root(const char __user * new_root, error = security_sb_pivotroot(&old_nd, &new_nd); if (error) { - path_release(&old_nd); + path_put(&old_nd.path); goto out1; } read_lock(¤t->fs->lock); - user_nd.mnt = mntget(current->fs->rootmnt); - user_nd.dentry = dget(current->fs->root); + user_nd.path = current->fs->root; + path_get(¤t->fs->root); read_unlock(¤t->fs->lock); down_write(&namespace_sem); - mutex_lock(&old_nd.dentry->d_inode->i_mutex); + mutex_lock(&old_nd.path.dentry->d_inode->i_mutex); error = -EINVAL; - if (IS_MNT_SHARED(old_nd.mnt) || - IS_MNT_SHARED(new_nd.mnt->mnt_parent) || - IS_MNT_SHARED(user_nd.mnt->mnt_parent)) + if (IS_MNT_SHARED(old_nd.path.mnt) || + IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) || + IS_MNT_SHARED(user_nd.path.mnt->mnt_parent)) goto out2; - if (!check_mnt(user_nd.mnt)) + if (!check_mnt(user_nd.path.mnt)) goto out2; error = -ENOENT; - if (IS_DEADDIR(new_nd.dentry->d_inode)) + if (IS_DEADDIR(new_nd.path.dentry->d_inode)) goto out2; - if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) + if (d_unhashed(new_nd.path.dentry) && !IS_ROOT(new_nd.path.dentry)) goto out2; - if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) + if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry)) goto out2; error = -EBUSY; - if (new_nd.mnt == user_nd.mnt || old_nd.mnt == user_nd.mnt) + if (new_nd.path.mnt == user_nd.path.mnt || + old_nd.path.mnt == user_nd.path.mnt) goto out2; /* loop, on the same file system */ error = -EINVAL; - if (user_nd.mnt->mnt_root != user_nd.dentry) + if (user_nd.path.mnt->mnt_root != user_nd.path.dentry) goto out2; /* not a mountpoint */ - if (user_nd.mnt->mnt_parent == user_nd.mnt) + if (user_nd.path.mnt->mnt_parent == user_nd.path.mnt) goto out2; /* not attached */ - if (new_nd.mnt->mnt_root != new_nd.dentry) + if (new_nd.path.mnt->mnt_root != new_nd.path.dentry) goto out2; /* not a mountpoint */ - if (new_nd.mnt->mnt_parent == new_nd.mnt) + if (new_nd.path.mnt->mnt_parent == new_nd.path.mnt) goto out2; /* not attached */ - tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */ + /* make sure we can reach put_old from new_root */ + tmp = old_nd.path.mnt; spin_lock(&vfsmount_lock); - if (tmp != new_nd.mnt) { + if (tmp != new_nd.path.mnt) { for (;;) { if (tmp->mnt_parent == tmp) goto out3; /* already mounted on put_old */ - if (tmp->mnt_parent == new_nd.mnt) + if (tmp->mnt_parent == new_nd.path.mnt) break; tmp = tmp->mnt_parent; } - if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry)) + if (!is_subdir(tmp->mnt_mountpoint, new_nd.path.dentry)) goto out3; - } else if (!is_subdir(old_nd.dentry, new_nd.dentry)) + } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) goto out3; - detach_mnt(new_nd.mnt, &parent_nd); - detach_mnt(user_nd.mnt, &root_parent); - attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ - attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ + detach_mnt(new_nd.path.mnt, &parent_nd); + detach_mnt(user_nd.path.mnt, &root_parent); + /* mount old root on put_old */ + attach_mnt(user_nd.path.mnt, &old_nd); + /* mount new_root on / */ + attach_mnt(new_nd.path.mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); spin_unlock(&vfsmount_lock); chroot_fs_refs(&user_nd, &new_nd); security_sb_post_pivotroot(&user_nd, &new_nd); error = 0; - path_release(&root_parent); - path_release(&parent_nd); + path_put(&root_parent.path); + path_put(&parent_nd.path); out2: - mutex_unlock(&old_nd.dentry->d_inode->i_mutex); + mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); up_write(&namespace_sem); - path_release(&user_nd); - path_release(&old_nd); + path_put(&user_nd.path); + path_put(&old_nd.path); out1: - path_release(&new_nd); + path_put(&new_nd.path); out0: unlock_kernel(); return error; @@ -1789,6 +1843,7 @@ static void __init init_mount_tree(void) { struct vfsmount *mnt; struct mnt_namespace *ns; + struct path root; mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) @@ -1807,15 +1862,16 @@ static void __init init_mount_tree(void) init_task.nsproxy->mnt_ns = ns; get_mnt_ns(ns); - set_fs_pwd(current->fs, ns->root, ns->root->mnt_root); - set_fs_root(current->fs, ns->root, ns->root->mnt_root); + root.mnt = ns->root; + root.dentry = ns->root->mnt_root; + + set_fs_pwd(current->fs, &root); + set_fs_root(current->fs, &root); } void __init mnt_init(void) { - struct list_head *d; - unsigned int nr_hash; - int i; + unsigned u; int err; init_rwsem(&namespace_sem); @@ -1828,35 +1884,11 @@ void __init mnt_init(void) if (!mount_hashtable) panic("Failed to allocate mount hash table\n"); - /* - * Find the power-of-two list-heads that can fit into the allocation.. - * We don't guarantee that "sizeof(struct list_head)" is necessarily - * a power-of-two. - */ - nr_hash = PAGE_SIZE / sizeof(struct list_head); - hash_bits = 0; - do { - hash_bits++; - } while ((nr_hash >> hash_bits) != 0); - hash_bits--; + printk("Mount-cache hash table entries: %lu\n", HASH_SIZE); + + for (u = 0; u < HASH_SIZE; u++) + INIT_LIST_HEAD(&mount_hashtable[u]); - /* - * Re-calculate the actual number of entries and the mask - * from the number of bits we can fit. - */ - nr_hash = 1UL << hash_bits; - hash_mask = nr_hash - 1; - - printk("Mount-cache hash table entries: %d\n", nr_hash); - - /* And initialize the newly allocated array */ - d = mount_hashtable; - i = nr_hash; - do { - INIT_LIST_HEAD(d); - d++; - i--; - } while (i); err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index e1cb70c643f8..fbbb9f7afa1a 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -28,6 +28,8 @@ #include <linux/init.h> #include <linux/smp_lock.h> #include <linux/vfs.h> +#include <linux/mount.h> +#include <linux/seq_file.h> #include <linux/ncp_fs.h> @@ -36,9 +38,15 @@ #include "ncplib_kernel.h" #include "getopt.h" +#define NCP_DEFAULT_FILE_MODE 0600 +#define NCP_DEFAULT_DIR_MODE 0700 +#define NCP_DEFAULT_TIME_OUT 10 +#define NCP_DEFAULT_RETRY_COUNT 20 + static void ncp_delete_inode(struct inode *); static void ncp_put_super(struct super_block *); static int ncp_statfs(struct dentry *, struct kstatfs *); +static int ncp_show_options(struct seq_file *, struct vfsmount *); static struct kmem_cache * ncp_inode_cachep; @@ -96,6 +104,7 @@ static const struct super_operations ncp_sops = .put_super = ncp_put_super, .statfs = ncp_statfs, .remount_fs = ncp_remount, + .show_options = ncp_show_options, }; extern struct dentry_operations ncp_root_dentry_operations; @@ -304,6 +313,37 @@ static void ncp_stop_tasks(struct ncp_server *server) { flush_scheduled_work(); } +static int ncp_show_options(struct seq_file *seq, struct vfsmount *mnt) +{ + struct ncp_server *server = NCP_SBP(mnt->mnt_sb); + unsigned int tmp; + + if (server->m.uid != 0) + seq_printf(seq, ",uid=%u", server->m.uid); + if (server->m.gid != 0) + seq_printf(seq, ",gid=%u", server->m.gid); + if (server->m.mounted_uid != 0) + seq_printf(seq, ",owner=%u", server->m.mounted_uid); + tmp = server->m.file_mode & S_IALLUGO; + if (tmp != NCP_DEFAULT_FILE_MODE) + seq_printf(seq, ",mode=0%o", tmp); + tmp = server->m.dir_mode & S_IALLUGO; + if (tmp != NCP_DEFAULT_DIR_MODE) + seq_printf(seq, ",dirmode=0%o", tmp); + if (server->m.time_out != NCP_DEFAULT_TIME_OUT * HZ / 100) { + tmp = server->m.time_out * 100 / HZ; + seq_printf(seq, ",timeout=%u", tmp); + } + if (server->m.retry_count != NCP_DEFAULT_RETRY_COUNT) + seq_printf(seq, ",retry=%u", server->m.retry_count); + if (server->m.flags != 0) + seq_printf(seq, ",flags=%lu", server->m.flags); + if (server->m.wdog_pid != NULL) + seq_printf(seq, ",wdogpid=%u", pid_vnr(server->m.wdog_pid)); + + return 0; +} + static const struct ncp_option ncp_opts[] = { { "uid", OPT_INT, 'u' }, { "gid", OPT_INT, 'g' }, @@ -331,12 +371,12 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options) data->mounted_uid = 0; data->wdog_pid = NULL; data->ncp_fd = ~0; - data->time_out = 10; - data->retry_count = 20; + data->time_out = NCP_DEFAULT_TIME_OUT; + data->retry_count = NCP_DEFAULT_RETRY_COUNT; data->uid = 0; data->gid = 0; - data->file_mode = 0600; - data->dir_mode = 0700; + data->file_mode = NCP_DEFAULT_FILE_MODE; + data->dir_mode = NCP_DEFAULT_DIR_MODE; data->info_fd = -1; data->mounted_vol[0] = 0; @@ -982,12 +1022,13 @@ static struct file_system_type ncp_fs_type = { .name = "ncpfs", .get_sb = ncp_get_sb, .kill_sb = kill_anon_super, + .fs_flags = FS_BINARY_MOUNTDATA, }; static int __init init_ncp_fs(void) { int err; - DPRINTK("ncpfs: init_module called\n"); + DPRINTK("ncpfs: init_ncp_fs called\n"); err = init_inodecache(); if (err) @@ -1004,7 +1045,7 @@ out1: static void __exit exit_ncp_fs(void) { - DPRINTK("ncpfs: cleanup_module called\n"); + DPRINTK("ncpfs: exit_ncp_fs called\n"); unregister_filesystem(&ncp_fs_type); destroy_inodecache(); } diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index bd185a572a23..ecc06c619494 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -105,7 +105,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) */ int nfs_callback_up(void) { - struct svc_serv *serv; + struct svc_serv *serv = NULL; int ret = 0; lock_kernel(); @@ -122,24 +122,30 @@ int nfs_callback_up(void) ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) - goto out_destroy; + goto out_err; nfs_callback_tcpport = ret; dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); ret = svc_create_thread(nfs_callback_svc, serv); if (ret < 0) - goto out_destroy; + goto out_err; nfs_callback_info.serv = serv; wait_for_completion(&nfs_callback_info.started); out: + /* + * svc_create creates the svc_serv with sv_nrthreads == 1, and then + * svc_create_thread increments that. So we need to call svc_destroy + * on both success and failure so that the refcount is 1 when the + * thread exits. + */ + if (serv) + svc_destroy(serv); mutex_unlock(&nfs_callback_mutex); unlock_kernel(); return ret; -out_destroy: +out_err: dprintk("Couldn't create callback socket or server thread; err = %d\n", ret); - svc_destroy(serv); -out_err: nfs_callback_info.users--; goto out; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 476cb0f837fd..ae04892a5e5d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -154,7 +154,6 @@ typedef struct { struct nfs_entry *entry; decode_dirent_t decode; int plus; - int error; unsigned long timestamp; int timestamp_valid; } nfs_readdir_descriptor_t; @@ -213,7 +212,6 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) return 0; error: unlock_page(page); - desc->error = error; return -EIO; } @@ -483,13 +481,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, goto out; } timestamp = jiffies; - desc->error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, *desc->dir_cookie, - page, + status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, + *desc->dir_cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - if (desc->error >= 0) { + if (status >= 0) { desc->timestamp = timestamp; desc->timestamp_valid = 1; if ((status = dir_decode(desc)) == 0) diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index e6242cdbaf91..fae97196daad 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -96,7 +96,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) inode = nfs_fhget(sb, mntfh, fsinfo.fattr); if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); } error = nfs_superblock_set_dummy_root(sb, inode); @@ -266,7 +266,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) inode = nfs_fhget(sb, mntfh, &fattr); if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); } error = nfs_superblock_set_dummy_root(sb, inode); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index be4ce1c3a3d8..607f6eb9cdb5 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -107,38 +107,40 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) BUG_ON(IS_ROOT(dentry)); dprintk("%s: enter\n", __FUNCTION__); - dput(nd->dentry); - nd->dentry = dget(dentry); + dput(nd->path.dentry); + nd->path.dentry = dget(dentry); /* Look it up again */ - parent = dget_parent(nd->dentry); + parent = dget_parent(nd->path.dentry); err = server->nfs_client->rpc_ops->lookup(parent->d_inode, - &nd->dentry->d_name, + &nd->path.dentry->d_name, &fh, &fattr); dput(parent); if (err != 0) goto out_err; if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) - mnt = nfs_do_refmount(nd->mnt, nd->dentry); + mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry); else - mnt = nfs_do_submount(nd->mnt, nd->dentry, &fh, &fattr); + mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, &fh, + &fattr); err = PTR_ERR(mnt); if (IS_ERR(mnt)) goto out_err; mntget(mnt); - err = do_add_mount(mnt, nd, nd->mnt->mnt_flags|MNT_SHRINKABLE, &nfs_automount_list); + err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, + &nfs_automount_list); if (err < 0) { mntput(mnt); if (err == -EBUSY) goto out_follow; goto out_err; } - mntput(nd->mnt); - dput(nd->dentry); - nd->mnt = mnt; - nd->dentry = dget(mnt->mnt_root); + mntput(nd->path.mnt); + dput(nd->path.dentry); + nd->path.mnt = mnt; + nd->path.dentry = dget(mnt->mnt_root); schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); out: dprintk("%s: done, returned %d\n", __FUNCTION__, err); @@ -146,10 +148,11 @@ out: dprintk("<-- nfs_follow_mountpoint() = %d\n", err); return ERR_PTR(err); out_err: - path_release(nd); + path_put(&nd->path); goto out; out_follow: - while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) + while (d_mountpoint(nd->path.dentry) && + follow_down(&nd->path.mnt, &nd->path.dentry)) ; err = 0; goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 027e1095256e..7ce07862c2fb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1384,11 +1384,11 @@ out_close: struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { - struct dentry *parent; struct path path = { - .mnt = nd->mnt, + .mnt = nd->path.mnt, .dentry = dentry, }; + struct dentry *parent; struct iattr attr; struct rpc_cred *cred; struct nfs4_state *state; @@ -1433,7 +1433,7 @@ int nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) { struct path path = { - .mnt = nd->mnt, + .mnt = nd->path.mnt, .dentry = dentry, }; struct rpc_cred *cred; @@ -1885,7 +1885,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nameidata *nd) { struct path path = { - .mnt = nd->mnt, + .mnt = nd->path.mnt, .dentry = dentry, }; struct nfs4_state *state; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f9c7432471dc..6233eb5e98c1 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -682,8 +682,8 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) return; printk(KERN_WARNING "NFS: v4 server returned a bad" - "sequence-id error on an" - "unconfirmed sequence %p!\n", + " sequence-id error on an" + " unconfirmed sequence %p!\n", seqid->sequence); case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 7f4505f6ac6f..1fb381843650 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -190,6 +190,10 @@ static match_table_t nfs_secflavor_tokens = { { Opt_sec_lkeyi, "lkeyi" }, { Opt_sec_lkeyp, "lkeyp" }, + { Opt_sec_spkm, "spkm3" }, + { Opt_sec_spkmi, "spkm3i" }, + { Opt_sec_spkmp, "spkm3p" }, + { Opt_sec_err, NULL } }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b144b1957dd9..f55c437124a2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -697,6 +697,17 @@ int nfs_flush_incompatible(struct file *file, struct page *page) } /* + * If the page cache is marked as unsafe or invalid, then we can't rely on + * the PageUptodate() flag. In this case, we will need to turn off + * write optimisations that depend on the page contents being correct. + */ +static int nfs_write_pageuptodate(struct page *page, struct inode *inode) +{ + return PageUptodate(page) && + !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); +} + +/* * Update and possibly write a cached page of an NFS file. * * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad @@ -717,10 +728,13 @@ int nfs_updatepage(struct file *file, struct page *page, (long long)(page_offset(page) +offset)); /* If we're not using byte range locks, and we know the page - * is entirely in cache, it may be more efficient to avoid - * fragmenting write requests. + * is up to date, it may be more efficient to extend the write + * to cover the entire page in order to avoid fragmentation + * inefficiencies. */ - if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) { + if (nfs_write_pageuptodate(page, inode) && + inode->i_flock == NULL && + !(file->f_mode & O_SYNC)) { count = max(count + offset, nfs_page_length(page)); offset = 0; } diff --git a/fs/nfsctl.c b/fs/nfsctl.c index 51f1b31acbf6..aed8145d9087 100644 --- a/fs/nfsctl.c +++ b/fs/nfsctl.c @@ -41,9 +41,9 @@ static struct file *do_open(char *name, int flags) error = may_open(&nd, MAY_WRITE, FMODE_WRITE); if (!error) - return dentry_open(nd.dentry, nd.mnt, flags); + return dentry_open(nd.path.dentry, nd.path.mnt, flags); - path_release(&nd); + path_put(&nd.path); return ERR_PTR(error); } diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 79b4bf812960..8a6f7c924c75 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -63,10 +63,8 @@ static void expkey_put(struct kref *ref) struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); if (test_bit(CACHE_VALID, &key->h.flags) && - !test_bit(CACHE_NEGATIVE, &key->h.flags)) { - dput(key->ek_dentry); - mntput(key->ek_mnt); - } + !test_bit(CACHE_NEGATIVE, &key->h.flags)) + path_put(&key->ek_path); auth_domain_put(key->ek_client); kfree(key); } @@ -169,15 +167,14 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) goto out; dprintk("Found the path %s\n", buf); - key.ek_mnt = nd.mnt; - key.ek_dentry = nd.dentry; - + key.ek_path = nd.path; + ek = svc_expkey_update(&key, ek); if (ek) cache_put(&ek->h, &svc_expkey_cache); else err = -ENOMEM; - path_release(&nd); + path_put(&nd.path); } cache_flush(); out: @@ -206,7 +203,7 @@ static int expkey_show(struct seq_file *m, if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) { seq_printf(m, " "); - seq_path(m, ek->ek_mnt, ek->ek_dentry, "\\ \t\n"); + seq_path(m, &ek->ek_path, "\\ \t\n"); } seq_printf(m, "\n"); return 0; @@ -243,8 +240,8 @@ static inline void expkey_update(struct cache_head *cnew, struct svc_expkey *new = container_of(cnew, struct svc_expkey, h); struct svc_expkey *item = container_of(citem, struct svc_expkey, h); - new->ek_mnt = mntget(item->ek_mnt); - new->ek_dentry = dget(item->ek_dentry); + new->ek_path = item->ek_path; + path_get(&item->ek_path); } static struct cache_head *expkey_alloc(void) @@ -332,10 +329,9 @@ static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) static void svc_export_put(struct kref *ref) { struct svc_export *exp = container_of(ref, struct svc_export, h.ref); - dput(exp->ex_dentry); - mntput(exp->ex_mnt); + path_put(&exp->ex_path); auth_domain_put(exp->ex_client); - kfree(exp->ex_path); + kfree(exp->ex_pathname); nfsd4_fslocs_free(&exp->ex_fslocs); kfree(exp); } @@ -349,7 +345,7 @@ static void svc_export_request(struct cache_detail *cd, char *pth; qword_add(bpp, blen, exp->ex_client->name); - pth = d_path(exp->ex_dentry, exp->ex_mnt, *bpp, *blen); + pth = d_path(&exp->ex_path, *bpp, *blen); if (IS_ERR(pth)) { /* is this correct? */ (*bpp)[0] = '\n'; @@ -507,8 +503,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) struct svc_export exp, *expp; int an_int; - nd.dentry = NULL; - exp.ex_path = NULL; + nd.path.dentry = NULL; + exp.ex_pathname = NULL; /* fs locations */ exp.ex_fslocs.locations = NULL; @@ -547,11 +543,11 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) exp.h.flags = 0; exp.ex_client = dom; - exp.ex_mnt = nd.mnt; - exp.ex_dentry = nd.dentry; - exp.ex_path = kstrdup(buf, GFP_KERNEL); + exp.ex_path.mnt = nd.path.mnt; + exp.ex_path.dentry = nd.path.dentry; + exp.ex_pathname = kstrdup(buf, GFP_KERNEL); err = -ENOMEM; - if (!exp.ex_path) + if (!exp.ex_pathname) goto out; /* expiry */ @@ -610,7 +606,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) goto out; } - err = check_export(nd.dentry->d_inode, exp.ex_flags, + err = check_export(nd.path.dentry->d_inode, exp.ex_flags, exp.ex_uuid); if (err) goto out; } @@ -628,9 +624,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) out: nfsd4_fslocs_free(&exp.ex_fslocs); kfree(exp.ex_uuid); - kfree(exp.ex_path); - if (nd.dentry) - path_release(&nd); + kfree(exp.ex_pathname); + if (nd.path.dentry) + path_put(&nd.path); out_no_path: if (dom) auth_domain_put(dom); @@ -653,7 +649,7 @@ static int svc_export_show(struct seq_file *m, return 0; } exp = container_of(h, struct svc_export, h); - seq_path(m, exp->ex_mnt, exp->ex_dentry, " \t\n\\"); + seq_path(m, &exp->ex_path, " \t\n\\"); seq_putc(m, '\t'); seq_escape(m, exp->ex_client->name, " \t\n\\"); seq_putc(m, '('); @@ -680,8 +676,8 @@ static int svc_export_match(struct cache_head *a, struct cache_head *b) struct svc_export *orig = container_of(a, struct svc_export, h); struct svc_export *new = container_of(b, struct svc_export, h); return orig->ex_client == new->ex_client && - orig->ex_dentry == new->ex_dentry && - orig->ex_mnt == new->ex_mnt; + orig->ex_path.dentry == new->ex_path.dentry && + orig->ex_path.mnt == new->ex_path.mnt; } static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) @@ -691,9 +687,9 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) kref_get(&item->ex_client->ref); new->ex_client = item->ex_client; - new->ex_dentry = dget(item->ex_dentry); - new->ex_mnt = mntget(item->ex_mnt); - new->ex_path = NULL; + new->ex_path.dentry = dget(item->ex_path.dentry); + new->ex_path.mnt = mntget(item->ex_path.mnt); + new->ex_pathname = NULL; new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; @@ -711,8 +707,8 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) new->ex_fsid = item->ex_fsid; new->ex_uuid = item->ex_uuid; item->ex_uuid = NULL; - new->ex_path = item->ex_path; - item->ex_path = NULL; + new->ex_pathname = item->ex_pathname; + item->ex_pathname = NULL; new->ex_fslocs.locations = item->ex_fslocs.locations; item->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = item->ex_fslocs.locations_count; @@ -755,8 +751,8 @@ svc_export_lookup(struct svc_export *exp) struct cache_head *ch; int hash; hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS); - hash ^= hash_ptr(exp->ex_dentry, EXPORT_HASHBITS); - hash ^= hash_ptr(exp->ex_mnt, EXPORT_HASHBITS); + hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS); + hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS); ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h, hash); @@ -772,8 +768,8 @@ svc_export_update(struct svc_export *new, struct svc_export *old) struct cache_head *ch; int hash; hash = hash_ptr(old->ex_client, EXPORT_HASHBITS); - hash ^= hash_ptr(old->ex_dentry, EXPORT_HASHBITS); - hash ^= hash_ptr(old->ex_mnt, EXPORT_HASHBITS); + hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS); + hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS); ch = sunrpc_cache_update(&svc_export_cache, &new->h, &old->h, @@ -815,8 +811,7 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, key.ek_client = clp; key.ek_fsidtype = fsid_type; memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); - key.ek_mnt = exp->ex_mnt; - key.ek_dentry = exp->ex_dentry; + key.ek_path = exp->ex_path; key.h.expiry_time = NEVER; key.h.flags = 0; @@ -865,13 +860,13 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, { struct svc_export *exp, key; int err; - + if (!clp) return ERR_PTR(-ENOENT); key.ex_client = clp; - key.ex_mnt = mnt; - key.ex_dentry = dentry; + key.ex_path.mnt = mnt; + key.ex_path.dentry = dentry; exp = svc_export_lookup(&key); if (exp == NULL) @@ -968,7 +963,7 @@ static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) static int exp_hash(struct auth_domain *clp, struct svc_export *exp) { u32 fsid[2]; - struct inode *inode = exp->ex_dentry->d_inode; + struct inode *inode = exp->ex_path.dentry->d_inode; dev_t dev = inode->i_sb->s_dev; if (old_valid_dev(dev)) { @@ -982,7 +977,7 @@ static int exp_hash(struct auth_domain *clp, struct svc_export *exp) static void exp_unhash(struct svc_export *exp) { struct svc_expkey *ek; - struct inode *inode = exp->ex_dentry->d_inode; + struct inode *inode = exp->ex_path.dentry->d_inode; ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); if (!IS_ERR(ek)) { @@ -1030,15 +1025,16 @@ exp_export(struct nfsctl_export *nxp) goto out_unlock; err = -EINVAL; - exp = exp_get_by_name(clp, nd.mnt, nd.dentry, NULL); + exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); memset(&new, 0, sizeof(new)); /* must make sure there won't be an ex_fsid clash */ if ((nxp->ex_flags & NFSEXP_FSID) && (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) && - fsid_key->ek_mnt && - (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) ) + fsid_key->ek_path.mnt && + (fsid_key->ek_path.mnt != nd.path.mnt || + fsid_key->ek_path.dentry != nd.path.dentry)) goto finish; if (!IS_ERR(exp)) { @@ -1054,7 +1050,7 @@ exp_export(struct nfsctl_export *nxp) goto finish; } - err = check_export(nd.dentry->d_inode, nxp->ex_flags, NULL); + err = check_export(nd.path.dentry->d_inode, nxp->ex_flags, NULL); if (err) goto finish; err = -ENOMEM; @@ -1063,12 +1059,11 @@ exp_export(struct nfsctl_export *nxp) new.h.expiry_time = NEVER; new.h.flags = 0; - new.ex_path = kstrdup(nxp->ex_path, GFP_KERNEL); - if (!new.ex_path) + new.ex_pathname = kstrdup(nxp->ex_path, GFP_KERNEL); + if (!new.ex_pathname) goto finish; new.ex_client = clp; - new.ex_mnt = nd.mnt; - new.ex_dentry = nd.dentry; + new.ex_path = nd.path; new.ex_flags = nxp->ex_flags; new.ex_anon_uid = nxp->ex_anon_uid; new.ex_anon_gid = nxp->ex_anon_gid; @@ -1089,15 +1084,14 @@ exp_export(struct nfsctl_export *nxp) } else err = 0; finish: - if (new.ex_path) - kfree(new.ex_path); + kfree(new.ex_pathname); if (exp) exp_put(exp); if (fsid_key && !IS_ERR(fsid_key)) cache_put(&fsid_key->h, &svc_expkey_cache); if (clp) auth_domain_put(clp); - path_release(&nd); + path_put(&nd.path); out_unlock: exp_writeunlock(); out: @@ -1148,8 +1142,8 @@ exp_unexport(struct nfsctl_export *nxp) goto out_domain; err = -EINVAL; - exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL); - path_release(&nd); + exp = exp_get_by_name(dom, nd.path.mnt, nd.path.dentry, NULL); + path_put(&nd.path); if (IS_ERR(exp)) goto out_domain; @@ -1185,12 +1179,12 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) printk("nfsd: exp_rootfh path not found %s", path); return err; } - inode = nd.dentry->d_inode; + inode = nd.path.dentry->d_inode; dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", - path, nd.dentry, clp->name, + path, nd.path.dentry, clp->name, inode->i_sb->s_id, inode->i_ino); - exp = exp_parent(clp, nd.mnt, nd.dentry, NULL); + exp = exp_parent(clp, nd.path.mnt, nd.path.dentry, NULL); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto out; @@ -1200,7 +1194,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) * fh must be initialized before calling fh_compose */ fh_init(&fh, maxsize); - if (fh_compose(&fh, exp, nd.dentry, NULL)) + if (fh_compose(&fh, exp, nd.path.dentry, NULL)) err = -EINVAL; else err = 0; @@ -1208,7 +1202,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) fh_put(&fh); exp_put(exp); out: - path_release(&nd); + path_put(&nd.path); return err; } @@ -1218,13 +1212,13 @@ static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type, struct svc_export *exp; struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); if (IS_ERR(ek)) - return ERR_PTR(PTR_ERR(ek)); + return ERR_CAST(ek); - exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp); + exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp); cache_put(&ek->h, &svc_expkey_cache); if (IS_ERR(exp)) - return ERR_PTR(PTR_ERR(exp)); + return ERR_CAST(exp); return exp; } @@ -1359,7 +1353,7 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); if (IS_ERR(exp)) return nfserrno(PTR_ERR(exp)); - rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); + rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); if (rv) goto out; rv = check_nfsd_access(exp, rqstp); diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index eac82830bfd7..c721a1e6e9dd 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -67,7 +67,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, if (nfserr) RETURN_STATUS(nfserr); - err = vfs_getattr(resp->fh.fh_export->ex_mnt, + err = vfs_getattr(resp->fh.fh_export->ex_path.mnt, resp->fh.fh_dentry, &resp->stat); nfserr = nfserrno(err); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index d7647f70e02b..17d0dd997204 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -218,7 +218,7 @@ encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) int err; struct kstat stat; - err = vfs_getattr(fhp->fh_export->ex_mnt, dentry, &stat); + err = vfs_getattr(fhp->fh_export->ex_path.mnt, dentry, &stat); if (!err) { *p++ = xdr_one; /* attributes follow */ lease_get_mtime(dentry->d_inode, &stat.mtime); @@ -270,7 +270,7 @@ void fill_post_wcc(struct svc_fh *fhp) if (fhp->fh_post_saved) printk("nfsd: inode locked twice during operation.\n"); - err = vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry, + err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &fhp->fh_post_attr); if (err) fhp->fh_post_saved = 0; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 1602cd00dd45..1ff90625860f 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -120,9 +120,9 @@ out_no_tfm: static void nfsd4_sync_rec_dir(void) { - mutex_lock(&rec_dir.dentry->d_inode->i_mutex); - nfsd_sync_dir(rec_dir.dentry); - mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); + mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); + nfsd_sync_dir(rec_dir.path.dentry); + mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); } int @@ -142,9 +142,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) nfs4_save_user(&uid, &gid); /* lock the parent */ - mutex_lock(&rec_dir.dentry->d_inode->i_mutex); + mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); - dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1); + dentry = lookup_one_len(dname, rec_dir.path.dentry, HEXDIR_LEN-1); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); goto out_unlock; @@ -154,11 +154,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); goto out_put; } - status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); + status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); out_put: dput(dentry); out_unlock: - mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); + mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); if (status == 0) { clp->cl_firststate = 1; nfsd4_sync_rec_dir(); @@ -221,7 +221,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) nfs4_save_user(&uid, &gid); - filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY); + filp = dentry_open(dget(dir), mntget(rec_dir.path.mnt), O_RDONLY); status = PTR_ERR(filp); if (IS_ERR(filp)) goto out; @@ -286,9 +286,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen) dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); - mutex_lock(&rec_dir.dentry->d_inode->i_mutex); - dentry = lookup_one_len(name, rec_dir.dentry, namlen); - mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); + mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); + dentry = lookup_one_len(name, rec_dir.path.dentry, namlen); + mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); return status; @@ -297,7 +297,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen) if (!dentry->d_inode) goto out; - status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry); + status = nfsd4_clear_clid_dir(rec_dir.path.dentry, dentry); out: dput(dentry); return status; @@ -347,12 +347,12 @@ nfsd4_recdir_purge_old(void) { if (!rec_dir_init) return; - status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old); + status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); if (status == 0) nfsd4_sync_rec_dir(); if (status) printk("nfsd4: failed to purge old clients from recovery" - " directory %s\n", rec_dir.dentry->d_name.name); + " directory %s\n", rec_dir.path.dentry->d_name.name); return; } @@ -373,10 +373,10 @@ int nfsd4_recdir_load(void) { int status; - status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir); + status = nfsd4_list_rec_dir(rec_dir.path.dentry, load_recdir); if (status) printk("nfsd4: failed loading clients from recovery" - " directory %s\n", rec_dir.dentry->d_name.name); + " directory %s\n", rec_dir.path.dentry->d_name.name); return status; } @@ -415,5 +415,5 @@ nfsd4_shutdown_recdir(void) if (!rec_dir_init) return; rec_dir_init = 0; - path_release(&rec_dir); + path_put(&rec_dir.path); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f6744bc03dae..bcb97d8e8b8b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3261,11 +3261,11 @@ nfs4_reset_recoverydir(char *recdir) if (status) return status; status = -ENOTDIR; - if (S_ISDIR(nd.dentry->d_inode->i_mode)) { + if (S_ISDIR(nd.path.dentry->d_inode->i_mode)) { nfs4_set_recdir(recdir); status = 0; } - path_release(&nd); + path_put(&nd.path); return status; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b0592e7c378d..0e6a179eccaf 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1330,9 +1330,9 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 * *stat = exp_pseudoroot(rqstp, &tmp_fh); if (*stat) return NULL; - rootpath = tmp_fh.fh_export->ex_path; + rootpath = tmp_fh.fh_export->ex_pathname; - path = exp->ex_path; + path = exp->ex_pathname; if (strncmp(path, rootpath, strlen(rootpath))) { dprintk("nfsd: fs_locations failed;" @@ -1481,7 +1481,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out; } - err = vfs_getattr(exp->ex_mnt, dentry, &stat); + err = vfs_getattr(exp->ex_path.mnt, dentry, &stat); if (err) goto out_nfserr; if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | @@ -1838,9 +1838,9 @@ out_acl: * and this is the root of a cross-mounted filesystem. */ if (ignore_crossmnt == 0 && - exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) { - err = vfs_getattr(exp->ex_mnt->mnt_parent, - exp->ex_mnt->mnt_mountpoint, &stat); + exp->ex_path.mnt->mnt_root->d_inode == dentry->d_inode) { + err = vfs_getattr(exp->ex_path.mnt->mnt_parent, + exp->ex_path.mnt->mnt_mountpoint, &stat); if (err) goto out_nfserr; } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 8fbd2dc08a92..0130b345234d 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) return 1; tdentry = dget(dentry); - while (tdentry != exp->ex_dentry && ! IS_ROOT(tdentry)) { + while (tdentry != exp->ex_path.dentry && !IS_ROOT(tdentry)) { /* make sure parents give x permission to user */ int err; parent = dget_parent(tdentry); @@ -59,9 +59,9 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) dput(tdentry); tdentry = parent; } - if (tdentry != exp->ex_dentry) + if (tdentry != exp->ex_path.dentry) dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name); - rv = (tdentry == exp->ex_dentry); + rv = (tdentry == exp->ex_path.dentry); dput(tdentry); return rv; } @@ -209,9 +209,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) fileid_type = fh->fh_fileid_type; if (fileid_type == FILEID_ROOT) - dentry = dget(exp->ex_dentry); + dentry = dget(exp->ex_path.dentry); else { - dentry = exportfs_decode_fh(exp->ex_mnt, fid, + dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, data_left, fileid_type, nfsd_acceptable, exp); } @@ -299,7 +299,7 @@ out: static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry) { - if (dentry != exp->ex_dentry) { + if (dentry != exp->ex_path.dentry) { struct fid *fid = (struct fid *) (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; @@ -344,12 +344,12 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct inode * inode = dentry->d_inode; struct dentry *parent = dentry->d_parent; __u32 *datap; - dev_t ex_dev = exp->ex_dentry->d_inode->i_sb->s_dev; - int root_export = (exp->ex_dentry == exp->ex_dentry->d_sb->s_root); + dev_t ex_dev = exp->ex_path.dentry->d_inode->i_sb->s_dev; + int root_export = (exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root); dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", MAJOR(ex_dev), MINOR(ex_dev), - (long) exp->ex_dentry->d_inode->i_ino, + (long) exp->ex_path.dentry->d_inode->i_ino, parent->d_name.name, dentry->d_name.name, (inode ? inode->i_ino : 0)); @@ -391,7 +391,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, /* FALL THROUGH */ case FSID_MAJOR_MINOR: case FSID_ENCODE_DEV: - if (!(exp->ex_dentry->d_inode->i_sb->s_type->fs_flags + if (!(exp->ex_path.dentry->d_inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV)) goto retry; break; @@ -454,7 +454,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev); fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev; fhp->fh_handle.ofh_xino = - ino_t_to_u32(exp->ex_dentry->d_inode->i_ino); + ino_t_to_u32(exp->ex_path.dentry->d_inode->i_ino); fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry)); if (inode) _fh_update_old(dentry, exp, &fhp->fh_handle); @@ -465,7 +465,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, datap = fhp->fh_handle.fh_auth+0; fhp->fh_handle.fh_fsid_type = fsid_type; mk_fsid(fsid_type, datap, ex_dev, - exp->ex_dentry->d_inode->i_ino, + exp->ex_path.dentry->d_inode->i_ino, exp->ex_fsid, exp->ex_uuid); len = key_len(fsid_type); @@ -571,7 +571,7 @@ enum fsid_source fsid_source(struct svc_fh *fhp) case FSID_DEV: case FSID_ENCODE_DEV: case FSID_MAJOR_MINOR: - if (fhp->fh_export->ex_dentry->d_inode->i_sb->s_type->fs_flags + if (fhp->fh_export->ex_path.dentry->d_inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) return FSIDSOURCE_DEV; break; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 977a71f64e19..6cfc96a12483 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -41,7 +41,7 @@ static __be32 nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp) { if (err) return err; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt, + return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, resp->fh.fh_dentry, &resp->stat)); } @@ -49,7 +49,7 @@ static __be32 nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp) { if (err) return err; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt, + return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, resp->fh.fh_dentry, &resp->stat)); } @@ -164,7 +164,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, &resp->count); if (nfserr) return nfserr; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt, + return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, resp->fh.fh_dentry, &resp->stat)); } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 61ad61743d94..afd08e2c90a5 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -207,7 +207,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct kstat stat; - vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry, &stat); + vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &stat); return encode_fattr(rqstp, p, fhp, &stat); } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index cc75e4fcd02b..46f59d5365a0 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -101,7 +101,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, { struct svc_export *exp = *expp, *exp2 = NULL; struct dentry *dentry = *dpp; - struct vfsmount *mnt = mntget(exp->ex_mnt); + struct vfsmount *mnt = mntget(exp->ex_path.mnt); struct dentry *mounts = dget(dentry); int err = 0; @@ -156,15 +156,15 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, if (isdotent(name, len)) { if (len==1) dentry = dget(dparent); - else if (dparent != exp->ex_dentry) { + else if (dparent != exp->ex_path.dentry) dentry = dget_parent(dparent); - } else if (!EX_NOHIDE(exp)) + else if (!EX_NOHIDE(exp)) dentry = dget(dparent); /* .. == . just like at / */ else { /* checking mountpoint crossing is very different when stepping up */ struct svc_export *exp2 = NULL; struct dentry *dp; - struct vfsmount *mnt = mntget(exp->ex_mnt); + struct vfsmount *mnt = mntget(exp->ex_path.mnt); dentry = dget(dparent); while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry)) ; @@ -721,7 +721,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, DQUOT_INIT(inode); } - *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_mnt), flags); + *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), + flags); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); out_nfserr: @@ -1462,7 +1463,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) if (!inode->i_op || !inode->i_op->readlink) goto out; - touch_atime(fhp->fh_export->ex_mnt, dentry); + touch_atime(fhp->fh_export->ex_path.mnt, dentry); /* N.B. Why does this call need a get_fs()?? * Remove the set_fs and watch the fireworks:-) --okir */ diff --git a/fs/ocfs2/cluster/endian.h b/fs/ocfs2/cluster/endian.h deleted file mode 100644 index 2df9082f4e35..000000000000 --- a/fs/ocfs2/cluster/endian.h +++ /dev/null @@ -1,30 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * Copyright (C) 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef OCFS2_CLUSTER_ENDIAN_H -#define OCFS2_CLUSTER_ENDIAN_H - -static inline void be32_add_cpu(__be32 *var, u32 val) -{ - *var = cpu_to_be32(be32_to_cpu(*var) + val); -} - -#endif /* OCFS2_CLUSTER_ENDIAN_H */ diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index af2070da308b..709fba25bf7e 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -24,7 +24,6 @@ #include <linux/sysctl.h> #include <linux/configfs.h> -#include "endian.h" #include "tcp.h" #include "nodemanager.h" #include "heartbeat.h" diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index b2e832aca567..d25b9af28500 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -38,6 +38,15 @@ * locking semantics of the file system using the protocol. It should * be somewhere else, I'm sure, but right now it isn't. * + * With version 11, we separate out the filesystem locking portion. The + * filesystem now has a major.minor version it negotiates. Version 11 + * introduces this negotiation to the o2dlm protocol, and as such the + * version here in tcp_internal.h should not need to be bumped for + * filesystem locking changes. + * + * New in version 11 + * - Negotiation of filesystem locking in the dlm join. + * * New in version 10: * - Meta/data locks combined * @@ -66,7 +75,7 @@ * - full 64 bit i_size in the metadata lock lvbs * - introduction of "rw" lock and pushing meta/data locking down */ -#define O2NET_PROTOCOL_VERSION 10ULL +#define O2NET_PROTOCOL_VERSION 11ULL struct o2net_handshake { __be64 protocol_version; __be64 connector_id; diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h index cfd5cb65cab0..b5786a787fab 100644 --- a/fs/ocfs2/dlm/dlmapi.h +++ b/fs/ocfs2/dlm/dlmapi.h @@ -193,7 +193,12 @@ enum dlm_status dlmunlock(struct dlm_ctxt *dlm, dlm_astunlockfunc_t *unlockast, void *data); -struct dlm_ctxt * dlm_register_domain(const char *domain, u32 key); +struct dlm_protocol_version { + u8 pv_major; + u8 pv_minor; +}; +struct dlm_ctxt * dlm_register_domain(const char *domain, u32 key, + struct dlm_protocol_version *fs_proto); void dlm_unregister_domain(struct dlm_ctxt *dlm); diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 2fd8bded38f3..644bee55d8ba 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -43,7 +43,6 @@ #include "cluster/heartbeat.h" #include "cluster/nodemanager.h" #include "cluster/tcp.h" -#include "cluster/endian.h" #include "dlmapi.h" #include "dlmcommon.h" diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index e90b92f9ece1..9843ee17ea27 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -142,6 +142,12 @@ struct dlm_ctxt spinlock_t work_lock; struct list_head dlm_domain_handlers; struct list_head dlm_eviction_callbacks; + + /* The filesystem specifies this at domain registration. We + * cache it here to know what to tell other nodes. */ + struct dlm_protocol_version fs_locking_proto; + /* This is the inter-dlm communication version */ + struct dlm_protocol_version dlm_locking_proto; }; static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned i) @@ -589,10 +595,24 @@ struct dlm_proxy_ast #define DLM_PROXY_AST_MAX_LEN (sizeof(struct dlm_proxy_ast)+DLM_LVB_LEN) #define DLM_MOD_KEY (0x666c6172) -enum dlm_query_join_response { +enum dlm_query_join_response_code { JOIN_DISALLOW = 0, JOIN_OK, JOIN_OK_NO_MAP, + JOIN_PROTOCOL_MISMATCH, +}; + +union dlm_query_join_response { + u32 intval; + struct { + u8 code; /* Response code. dlm_minor and fs_minor + are only valid if this is JOIN_OK */ + u8 dlm_minor; /* The minor version of the protocol the + dlm is speaking. */ + u8 fs_minor; /* The minor version of the protocol the + filesystem is speaking. */ + u8 reserved; + } packet; }; struct dlm_lock_request @@ -633,6 +653,8 @@ struct dlm_query_join_request u8 node_idx; u8 pad1[2]; u8 name_len; + struct dlm_protocol_version dlm_proto; + struct dlm_protocol_version fs_proto; u8 domain[O2NM_MAX_NAME_LEN]; u8 node_map[BITS_TO_BYTES(O2NM_MAX_NODES)]; }; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 6954565b8ccb..638d2ebb892b 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -123,6 +123,17 @@ DEFINE_SPINLOCK(dlm_domain_lock); LIST_HEAD(dlm_domains); static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); +/* + * The supported protocol version for DLM communication. Running domains + * will have a negotiated version with the same major number and a minor + * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should + * be used to determine what a running domain is actually using. + */ +static const struct dlm_protocol_version dlm_protocol = { + .pv_major = 1, + .pv_minor = 0, +}; + #define DLM_DOMAIN_BACKOFF_MS 200 static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, @@ -133,6 +144,8 @@ static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data); static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data); +static int dlm_protocol_compare(struct dlm_protocol_version *existing, + struct dlm_protocol_version *request); static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); @@ -668,11 +681,45 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) } EXPORT_SYMBOL_GPL(dlm_unregister_domain); +static int dlm_query_join_proto_check(char *proto_type, int node, + struct dlm_protocol_version *ours, + struct dlm_protocol_version *request) +{ + int rc; + struct dlm_protocol_version proto = *request; + + if (!dlm_protocol_compare(ours, &proto)) { + mlog(0, + "node %u wanted to join with %s locking protocol " + "%u.%u, we respond with %u.%u\n", + node, proto_type, + request->pv_major, + request->pv_minor, + proto.pv_major, proto.pv_minor); + request->pv_minor = proto.pv_minor; + rc = 0; + } else { + mlog(ML_NOTICE, + "Node %u wanted to join with %s locking " + "protocol %u.%u, but we have %u.%u, disallowing\n", + node, proto_type, + request->pv_major, + request->pv_minor, + ours->pv_major, + ours->pv_minor); + rc = 1; + } + + return rc; +} + static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data) { struct dlm_query_join_request *query; - enum dlm_query_join_response response; + union dlm_query_join_response response = { + .packet.code = JOIN_DISALLOW, + }; struct dlm_ctxt *dlm = NULL; u8 nodenum; @@ -690,11 +737,11 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, mlog(0, "node %u is not in our live map yet\n", query->node_idx); - response = JOIN_DISALLOW; + response.packet.code = JOIN_DISALLOW; goto respond; } - response = JOIN_OK_NO_MAP; + response.packet.code = JOIN_OK_NO_MAP; spin_lock(&dlm_domain_lock); dlm = __dlm_lookup_domain_full(query->domain, query->name_len); @@ -713,7 +760,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, mlog(0, "disallow join as node %u does not " "have node %u in its nodemap\n", query->node_idx, nodenum); - response = JOIN_DISALLOW; + response.packet.code = JOIN_DISALLOW; goto unlock_respond; } } @@ -733,30 +780,48 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, /*If this is a brand new context and we * haven't started our join process yet, then * the other node won the race. */ - response = JOIN_OK_NO_MAP; + response.packet.code = JOIN_OK_NO_MAP; } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { /* Disallow parallel joins. */ - response = JOIN_DISALLOW; + response.packet.code = JOIN_DISALLOW; } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { mlog(0, "node %u trying to join, but recovery " "is ongoing.\n", bit); - response = JOIN_DISALLOW; + response.packet.code = JOIN_DISALLOW; } else if (test_bit(bit, dlm->recovery_map)) { mlog(0, "node %u trying to join, but it " "still needs recovery.\n", bit); - response = JOIN_DISALLOW; + response.packet.code = JOIN_DISALLOW; } else if (test_bit(bit, dlm->domain_map)) { mlog(0, "node %u trying to join, but it " "is still in the domain! needs recovery?\n", bit); - response = JOIN_DISALLOW; + response.packet.code = JOIN_DISALLOW; } else { /* Alright we're fully a part of this domain * so we keep some state as to who's joining * and indicate to him that needs to be fixed * up. */ - response = JOIN_OK; - __dlm_set_joining_node(dlm, query->node_idx); + + /* Make sure we speak compatible locking protocols. */ + if (dlm_query_join_proto_check("DLM", bit, + &dlm->dlm_locking_proto, + &query->dlm_proto)) { + response.packet.code = + JOIN_PROTOCOL_MISMATCH; + } else if (dlm_query_join_proto_check("fs", bit, + &dlm->fs_locking_proto, + &query->fs_proto)) { + response.packet.code = + JOIN_PROTOCOL_MISMATCH; + } else { + response.packet.dlm_minor = + query->dlm_proto.pv_minor; + response.packet.fs_minor = + query->fs_proto.pv_minor; + response.packet.code = JOIN_OK; + __dlm_set_joining_node(dlm, query->node_idx); + } } spin_unlock(&dlm->spinlock); @@ -765,9 +830,9 @@ unlock_respond: spin_unlock(&dlm_domain_lock); respond: - mlog(0, "We respond with %u\n", response); + mlog(0, "We respond with %u\n", response.packet.code); - return response; + return response.intval; } static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, @@ -899,10 +964,11 @@ static int dlm_send_join_cancels(struct dlm_ctxt *dlm, static int dlm_request_join(struct dlm_ctxt *dlm, int node, - enum dlm_query_join_response *response) + enum dlm_query_join_response_code *response) { - int status, retval; + int status; struct dlm_query_join_request join_msg; + union dlm_query_join_response join_resp; mlog(0, "querying node %d\n", node); @@ -910,12 +976,15 @@ static int dlm_request_join(struct dlm_ctxt *dlm, join_msg.node_idx = dlm->node_num; join_msg.name_len = strlen(dlm->name); memcpy(join_msg.domain, dlm->name, join_msg.name_len); + join_msg.dlm_proto = dlm->dlm_locking_proto; + join_msg.fs_proto = dlm->fs_locking_proto; /* copy live node map to join message */ byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES); status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, - sizeof(join_msg), node, &retval); + sizeof(join_msg), node, + &join_resp.intval); if (status < 0 && status != -ENOPROTOOPT) { mlog_errno(status); goto bail; @@ -928,14 +997,41 @@ static int dlm_request_join(struct dlm_ctxt *dlm, if (status == -ENOPROTOOPT) { status = 0; *response = JOIN_OK_NO_MAP; - } else if (retval == JOIN_DISALLOW || - retval == JOIN_OK || - retval == JOIN_OK_NO_MAP) { - *response = retval; + } else if (join_resp.packet.code == JOIN_DISALLOW || + join_resp.packet.code == JOIN_OK_NO_MAP) { + *response = join_resp.packet.code; + } else if (join_resp.packet.code == JOIN_PROTOCOL_MISMATCH) { + mlog(ML_NOTICE, + "This node requested DLM locking protocol %u.%u and " + "filesystem locking protocol %u.%u. At least one of " + "the protocol versions on node %d is not compatible, " + "disconnecting\n", + dlm->dlm_locking_proto.pv_major, + dlm->dlm_locking_proto.pv_minor, + dlm->fs_locking_proto.pv_major, + dlm->fs_locking_proto.pv_minor, + node); + status = -EPROTO; + *response = join_resp.packet.code; + } else if (join_resp.packet.code == JOIN_OK) { + *response = join_resp.packet.code; + /* Use the same locking protocol as the remote node */ + dlm->dlm_locking_proto.pv_minor = + join_resp.packet.dlm_minor; + dlm->fs_locking_proto.pv_minor = + join_resp.packet.fs_minor; + mlog(0, + "Node %d responds JOIN_OK with DLM locking protocol " + "%u.%u and fs locking protocol %u.%u\n", + node, + dlm->dlm_locking_proto.pv_major, + dlm->dlm_locking_proto.pv_minor, + dlm->fs_locking_proto.pv_major, + dlm->fs_locking_proto.pv_minor); } else { status = -EINVAL; - mlog(ML_ERROR, "invalid response %d from node %u\n", retval, - node); + mlog(ML_ERROR, "invalid response %d from node %u\n", + join_resp.packet.code, node); } mlog(0, "status %d, node %d response is %d\n", status, node, @@ -1008,7 +1104,7 @@ struct domain_join_ctxt { static int dlm_should_restart_join(struct dlm_ctxt *dlm, struct domain_join_ctxt *ctxt, - enum dlm_query_join_response response) + enum dlm_query_join_response_code response) { int ret; @@ -1034,7 +1130,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) { int status = 0, tmpstat, node; struct domain_join_ctxt *ctxt; - enum dlm_query_join_response response = JOIN_DISALLOW; + enum dlm_query_join_response_code response = JOIN_DISALLOW; mlog_entry("%p", dlm); @@ -1450,10 +1546,38 @@ leave: } /* - * dlm_register_domain: one-time setup per "domain" + * Compare a requested locking protocol version against the current one. + * + * If the major numbers are different, they are incompatible. + * If the current minor is greater than the request, they are incompatible. + * If the current minor is less than or equal to the request, they are + * compatible, and the requester should run at the current minor version. + */ +static int dlm_protocol_compare(struct dlm_protocol_version *existing, + struct dlm_protocol_version *request) +{ + if (existing->pv_major != request->pv_major) + return 1; + + if (existing->pv_minor > request->pv_minor) + return 1; + + if (existing->pv_minor < request->pv_minor) + request->pv_minor = existing->pv_minor; + + return 0; +} + +/* + * dlm_register_domain: one-time setup per "domain". + * + * The filesystem passes in the requested locking version via proto. + * If registration was successful, proto will contain the negotiated + * locking protocol. */ struct dlm_ctxt * dlm_register_domain(const char *domain, - u32 key) + u32 key, + struct dlm_protocol_version *fs_proto) { int ret; struct dlm_ctxt *dlm = NULL; @@ -1496,6 +1620,15 @@ retry: goto retry; } + if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) { + mlog(ML_ERROR, + "Requested locking protocol version is not " + "compatible with already registered domain " + "\"%s\"\n", domain); + ret = -EPROTO; + goto leave; + } + __dlm_get(dlm); dlm->num_joins++; @@ -1526,6 +1659,13 @@ retry: list_add_tail(&dlm->list, &dlm_domains); spin_unlock(&dlm_domain_lock); + /* + * Pass the locking protocol version into the join. If the join + * succeeds, it will have the negotiated protocol set. + */ + dlm->dlm_locking_proto = dlm_protocol; + dlm->fs_locking_proto = *fs_proto; + ret = dlm_join_domain(dlm); if (ret) { mlog_errno(ret); @@ -1533,6 +1673,9 @@ retry: goto leave; } + /* Tell the caller what locking protocol we negotiated */ + *fs_proto = dlm->fs_locking_proto; + ret = 0; leave: if (new_ctxt) diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 6639baab0798..61a000f8524c 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -60,6 +60,8 @@ #define MLOG_MASK_PREFIX ML_DLMFS #include "cluster/masklog.h" +#include "ocfs2_lockingver.h" + static const struct super_operations dlmfs_ops; static const struct file_operations dlmfs_file_operations; static const struct inode_operations dlmfs_dir_inode_operations; @@ -70,6 +72,16 @@ static struct kmem_cache *dlmfs_inode_cache; struct workqueue_struct *user_dlm_worker; /* + * This is the userdlmfs locking protocol version. + * + * See fs/ocfs2/dlmglue.c for more details on locking versions. + */ +static const struct dlm_protocol_version user_locking_protocol = { + .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, + .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, +}; + +/* * decodes a set of open flags into a valid lock level and a set of flags. * returns < 0 if we have invalid flags * flags which mean something to us: @@ -416,6 +428,7 @@ static int dlmfs_mkdir(struct inode * dir, struct qstr *domain = &dentry->d_name; struct dlmfs_inode_private *ip; struct dlm_ctxt *dlm; + struct dlm_protocol_version proto = user_locking_protocol; mlog(0, "mkdir %.*s\n", domain->len, domain->name); @@ -435,7 +448,7 @@ static int dlmfs_mkdir(struct inode * dir, ip = DLMFS_I(inode); - dlm = user_dlm_register_context(domain); + dlm = user_dlm_register_context(domain, &proto); if (IS_ERR(dlm)) { status = PTR_ERR(dlm); mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c index 7d2f578b267d..4cb1d3dae250 100644 --- a/fs/ocfs2/dlm/userdlm.c +++ b/fs/ocfs2/dlm/userdlm.c @@ -645,7 +645,8 @@ bail: return status; } -struct dlm_ctxt *user_dlm_register_context(struct qstr *name) +struct dlm_ctxt *user_dlm_register_context(struct qstr *name, + struct dlm_protocol_version *proto) { struct dlm_ctxt *dlm; u32 dlm_key; @@ -661,7 +662,7 @@ struct dlm_ctxt *user_dlm_register_context(struct qstr *name) snprintf(domain, name->len + 1, "%.*s", name->len, name->name); - dlm = dlm_register_domain(domain, dlm_key); + dlm = dlm_register_domain(domain, dlm_key, proto); if (IS_ERR(dlm)) mlog_errno(PTR_ERR(dlm)); diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h index c400e93bbf79..39ec27738499 100644 --- a/fs/ocfs2/dlm/userdlm.h +++ b/fs/ocfs2/dlm/userdlm.h @@ -83,7 +83,8 @@ void user_dlm_write_lvb(struct inode *inode, void user_dlm_read_lvb(struct inode *inode, char *val, unsigned int len); -struct dlm_ctxt *user_dlm_register_context(struct qstr *name); +struct dlm_ctxt *user_dlm_register_context(struct qstr *name, + struct dlm_protocol_version *proto); void user_dlm_unregister_context(struct dlm_ctxt *dlm); struct dlmfs_inode_private { diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 3867244fb144..351130c9b734 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -43,6 +43,7 @@ #include <cluster/masklog.h> #include "ocfs2.h" +#include "ocfs2_lockingver.h" #include "alloc.h" #include "dcache.h" @@ -258,6 +259,31 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = { .flags = 0, }; +/* + * This is the filesystem locking protocol version. + * + * Whenever the filesystem does new things with locks (adds or removes a + * lock, orders them differently, does different things underneath a lock), + * the version must be changed. The protocol is negotiated when joining + * the dlm domain. A node may join the domain if its major version is + * identical to all other nodes and its minor version is greater than + * or equal to all other nodes. When its minor version is greater than + * the other nodes, it will run at the minor version specified by the + * other nodes. + * + * If a locking change is made that will not be compatible with older + * versions, the major number must be increased and the minor version set + * to zero. If a change merely adds a behavior that can be disabled when + * speaking to older versions, the minor version must be increased. If a + * change adds a fully backwards compatible change (eg, LVB changes that + * are just ignored by older versions), the version does not need to be + * updated. + */ +const struct dlm_protocol_version ocfs2_locking_protocol = { + .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, + .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, +}; + static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) { return lockres->l_type == OCFS2_LOCK_TYPE_META || @@ -2506,7 +2532,8 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) dlm_key = crc32_le(0, osb->uuid_str, strlen(osb->uuid_str)); /* for now, uuid == domain */ - dlm = dlm_register_domain(osb->uuid_str, dlm_key); + dlm = dlm_register_domain(osb->uuid_str, dlm_key, + &osb->osb_locking_proto); if (IS_ERR(dlm)) { status = PTR_ERR(dlm); mlog_errno(status); diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 5f17243ba501..1d5b0699d0a9 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -116,4 +116,5 @@ void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb); struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); +extern const struct dlm_protocol_version ocfs2_locking_protocol; #endif /* DLMGLUE_H */ diff --git a/fs/ocfs2/endian.h b/fs/ocfs2/endian.h deleted file mode 100644 index 1942e09f6ee5..000000000000 --- a/fs/ocfs2/endian.h +++ /dev/null @@ -1,45 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * Copyright (C) 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef OCFS2_ENDIAN_H -#define OCFS2_ENDIAN_H - -static inline void le16_add_cpu(__le16 *var, u16 val) -{ - *var = cpu_to_le16(le16_to_cpu(*var) + val); -} - -static inline void le32_add_cpu(__le32 *var, u32 val) -{ - *var = cpu_to_le32(le32_to_cpu(*var) + val); -} - -static inline void le64_add_cpu(__le64 *var, u64 val) -{ - *var = cpu_to_le64(le64_to_cpu(*var) + val); -} - -static inline void be32_add_cpu(__be32 *var, u32 val) -{ - *var = cpu_to_be32(be32_to_cpu(*var) + val); -} - -#endif /* OCFS2_ENDIAN_H */ diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d08480580470..6546cef212e3 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -43,7 +43,6 @@ #include "dlm/dlmapi.h" #include "ocfs2_fs.h" -#include "endian.h" #include "ocfs2_lockid.h" /* Most user visible OCFS2 inodes will have very few pieces of @@ -251,6 +250,7 @@ struct ocfs2_super struct ocfs2_lock_res osb_rename_lockres; struct dlm_eviction_cb osb_eviction_cb; struct ocfs2_dlm_debug *osb_dlm_debug; + struct dlm_protocol_version osb_locking_proto; struct dentry *osb_debug_root; diff --git a/fs/ocfs2/ocfs2_lockingver.h b/fs/ocfs2/ocfs2_lockingver.h new file mode 100644 index 000000000000..82d5eeac0fff --- /dev/null +++ b/fs/ocfs2/ocfs2_lockingver.h @@ -0,0 +1,30 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * ocfs2_lockingver.h + * + * Defines OCFS2 Locking version values. + * + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License, version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef OCFS2_LOCKINGVER_H +#define OCFS2_LOCKINGVER_H + +/* + * The protocol version for ocfs2 cluster locking. See dlmglue.c for + * more details. + */ +#define OCFS2_LOCKING_PROTOCOL_MAJOR 1 +#define OCFS2_LOCKING_PROTOCOL_MINOR 0 + +#endif /* OCFS2_LOCKINGVER_H */ diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 01fe40ee5ea9..bec75aff3d9f 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1355,6 +1355,7 @@ static int ocfs2_initialize_super(struct super_block *sb, sb->s_fs_info = osb; sb->s_op = &ocfs2_sops; sb->s_export_op = &ocfs2_export_ops; + osb->osb_locking_proto = ocfs2_locking_protocol; sb->s_time_gran = 1; sb->s_flags |= MS_NOATIME; /* this is needed to support O_LARGEFILE */ diff --git a/fs/open.c b/fs/open.c index 4932b4d1da05..54198538b67e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -127,10 +127,10 @@ asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf) error = user_path_walk(path, &nd); if (!error) { struct statfs tmp; - error = vfs_statfs_native(nd.dentry, &tmp); + error = vfs_statfs_native(nd.path.dentry, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; - path_release(&nd); + path_put(&nd.path); } return error; } @@ -146,10 +146,10 @@ asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 error = user_path_walk(path, &nd); if (!error) { struct statfs64 tmp; - error = vfs_statfs64(nd.dentry, &tmp); + error = vfs_statfs64(nd.path.dentry, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; - path_release(&nd); + path_put(&nd.path); } return error; } @@ -233,7 +233,7 @@ static long do_sys_truncate(const char __user * path, loff_t length) error = user_path_walk(path, &nd); if (error) goto out; - inode = nd.dentry->d_inode; + inode = nd.path.dentry->d_inode; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ error = -EISDIR; @@ -271,13 +271,13 @@ static long do_sys_truncate(const char __user * path, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) { DQUOT_INIT(inode); - error = do_truncate(nd.dentry, length, 0, NULL); + error = do_truncate(nd.path.dentry, length, 0, NULL); } put_write_and_out: put_write_access(inode); dput_and_out: - path_release(&nd); + path_put(&nd.path); out: return error; } @@ -455,14 +455,14 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) res = vfs_permission(&nd, mode); /* SuS v2 requires we report a read only fs too */ if(res || !(mode & S_IWOTH) || - special_file(nd.dentry->d_inode->i_mode)) + special_file(nd.path.dentry->d_inode->i_mode)) goto out_path_release; - if(IS_RDONLY(nd.dentry->d_inode)) + if(IS_RDONLY(nd.path.dentry->d_inode)) res = -EROFS; out_path_release: - path_release(&nd); + path_put(&nd.path); out: current->fsuid = old_fsuid; current->fsgid = old_fsgid; @@ -490,10 +490,10 @@ asmlinkage long sys_chdir(const char __user * filename) if (error) goto dput_and_out; - set_fs_pwd(current->fs, nd.mnt, nd.dentry); + set_fs_pwd(current->fs, &nd.path); dput_and_out: - path_release(&nd); + path_put(&nd.path); out: return error; } @@ -501,9 +501,7 @@ out: asmlinkage long sys_fchdir(unsigned int fd) { struct file *file; - struct dentry *dentry; struct inode *inode; - struct vfsmount *mnt; int error; error = -EBADF; @@ -511,9 +509,7 @@ asmlinkage long sys_fchdir(unsigned int fd) if (!file) goto out; - dentry = file->f_path.dentry; - mnt = file->f_path.mnt; - inode = dentry->d_inode; + inode = file->f_path.dentry->d_inode; error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) @@ -521,7 +517,7 @@ asmlinkage long sys_fchdir(unsigned int fd) error = file_permission(file, MAY_EXEC); if (!error) - set_fs_pwd(current->fs, mnt, dentry); + set_fs_pwd(current->fs, &file->f_path); out_putf: fput(file); out: @@ -545,11 +541,11 @@ asmlinkage long sys_chroot(const char __user * filename) if (!capable(CAP_SYS_CHROOT)) goto dput_and_out; - set_fs_root(current->fs, nd.mnt, nd.dentry); + set_fs_root(current->fs, &nd.path); set_fs_altroot(); error = 0; dput_and_out: - path_release(&nd); + path_put(&nd.path); out: return error; } @@ -602,7 +598,7 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename, error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); if (error) goto out; - inode = nd.dentry->d_inode; + inode = nd.path.dentry->d_inode; error = -EROFS; if (IS_RDONLY(inode)) @@ -617,11 +613,11 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename, mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(nd.dentry, &newattrs); + error = notify_change(nd.path.dentry, &newattrs); mutex_unlock(&inode->i_mutex); dput_and_out: - path_release(&nd); + path_put(&nd.path); out: return error; } @@ -675,8 +671,8 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) error = user_path_walk(filename, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); - path_release(&nd); + error = chown_common(nd.path.dentry, user, group); + path_put(&nd.path); out: return error; } @@ -695,8 +691,8 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, error = __user_walk_fd(dfd, filename, follow, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); - path_release(&nd); + error = chown_common(nd.path.dentry, user, group); + path_put(&nd.path); out: return error; } @@ -709,8 +705,8 @@ asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group error = user_path_walk_link(filename, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); - path_release(&nd); + error = chown_common(nd.path.dentry, user, group); + path_put(&nd.path); out: return error; } @@ -863,7 +859,7 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry goto out; if (IS_ERR(dentry)) goto out_err; - nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt), + nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), nd->intent.open.flags - 1, nd->intent.open.file, open); @@ -891,9 +887,10 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags) filp = nd->intent.open.file; /* Has the filesystem initialised the file for us? */ if (filp->f_path.dentry == NULL) - filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL); + filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp, + NULL); else - path_release(nd); + path_put(&nd->path); return filp; } @@ -991,7 +988,7 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd) files->next_fd = fd; } -void fastcall put_unused_fd(unsigned int fd) +void put_unused_fd(unsigned int fd) { struct files_struct *files = current->files; spin_lock(&files->file_lock); @@ -1014,7 +1011,7 @@ EXPORT_SYMBOL(put_unused_fd); * will follow. */ -void fastcall fd_install(unsigned int fd, struct file * file) +void fd_install(unsigned int fd, struct file *file) { struct files_struct *files = current->files; struct fdtable *fdt; @@ -1061,7 +1058,6 @@ asmlinkage long sys_open(const char __user *filename, int flags, int mode) prevent_tail_call(ret); return ret; } -EXPORT_UNUSED_SYMBOL_GPL(sys_open); /* To be deleted for 2.6.25 */ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, int mode) diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 6b7ff1618945..d17b4fd204e1 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -38,6 +38,8 @@ struct op_inode_info { union op_inode_data u; }; +static struct inode *openprom_iget(struct super_block *sb, ino_t ino); + static inline struct op_inode_info *OP_I(struct inode *inode) { return container_of(inode, struct op_inode_info, vfs_inode); @@ -226,10 +228,10 @@ static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry return ERR_PTR(-ENOENT); found: - inode = iget(dir->i_sb, ino); + inode = openprom_iget(dir->i_sb, ino); mutex_unlock(&op_mutex); - if (!inode) - return ERR_PTR(-EINVAL); + if (IS_ERR(inode)) + return ERR_CAST(inode); ent_oi = OP_I(inode); ent_oi->type = ent_type; ent_oi->u = ent_data; @@ -348,14 +350,23 @@ static void openprom_destroy_inode(struct inode *inode) kmem_cache_free(op_inode_cachep, OP_I(inode)); } -static void openprom_read_inode(struct inode * inode) +static struct inode *openprom_iget(struct super_block *sb, ino_t ino) { - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - if (inode->i_ino == OPENPROM_ROOT_INO) { - inode->i_op = &openprom_inode_operations; - inode->i_fop = &openprom_operations; - inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + struct inode *inode; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (inode->i_state & I_NEW) { + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + if (inode->i_ino == OPENPROM_ROOT_INO) { + inode->i_op = &openprom_inode_operations; + inode->i_fop = &openprom_operations; + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + } + unlock_new_inode(inode); } + return inode; } static int openprom_remount(struct super_block *sb, int *flags, char *data) @@ -367,7 +378,6 @@ static int openprom_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations openprom_sops = { .alloc_inode = openprom_alloc_inode, .destroy_inode = openprom_destroy_inode, - .read_inode = openprom_read_inode, .statfs = simple_statfs, .remount_fs = openprom_remount, }; @@ -376,6 +386,7 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent) { struct inode *root_inode; struct op_inode_info *oi; + int ret; s->s_flags |= MS_NOATIME; s->s_blocksize = 1024; @@ -383,9 +394,11 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent) s->s_magic = OPENPROM_SUPER_MAGIC; s->s_op = &openprom_sops; s->s_time_gran = 1; - root_inode = iget(s, OPENPROM_ROOT_INO); - if (!root_inode) + root_inode = openprom_iget(s, OPENPROM_ROOT_INO); + if (IS_ERR(root_inode)) { + ret = PTR_ERR(root_inode); goto out_no_root; + } oi = OP_I(root_inode); oi->type = op_inode_node; @@ -393,13 +406,15 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent) s->s_root = d_alloc_root(root_inode); if (!s->s_root) - goto out_no_root; + goto out_no_root_dentry; return 0; +out_no_root_dentry: + iput(root_inode); + ret = -ENOMEM; out_no_root: printk("openprom_fill_super: get root inode failed\n"); - iput(root_inode); - return -ENOMEM; + return ret; } static int openprom_get_sb(struct file_system_type *fs_type, diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index a99acd8de353..cb5f0a3f1b03 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig @@ -198,7 +198,7 @@ config LDM_DEBUG config SGI_PARTITION bool "SGI partition support" if PARTITION_ADVANCED - default y if (SGI_IP22 || SGI_IP27 || ((MACH_JAZZ || SNI_RM) && !CPU_LITTLE_ENDIAN)) + default y if DEFAULT_SGI_PARTITION help Say Y here if you would like to be able to read the hard disk partition table format used by SGI machines. diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 739da701ae7b..03f808c5b79d 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -18,6 +18,7 @@ #include <linux/fs.h> #include <linux/kmod.h> #include <linux/ctype.h> +#include <linux/genhd.h> #include "check.h" @@ -215,9 +216,25 @@ static ssize_t part_stat_show(struct device *dev, { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%8u %8llu %8u %8llu\n", - p->ios[0], (unsigned long long)p->sectors[0], - p->ios[1], (unsigned long long)p->sectors[1]); + preempt_disable(); + part_round_stats(p); + preempt_enable(); + return sprintf(buf, + "%8lu %8lu %8llu %8u " + "%8lu %8lu %8llu %8u " + "%8u %8u %8u" + "\n", + part_stat_read(p, ios[READ]), + part_stat_read(p, merges[READ]), + (unsigned long long)part_stat_read(p, sectors[READ]), + jiffies_to_msecs(part_stat_read(p, ticks[READ])), + part_stat_read(p, ios[WRITE]), + part_stat_read(p, merges[WRITE]), + (unsigned long long)part_stat_read(p, sectors[WRITE]), + jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), + p->in_flight, + jiffies_to_msecs(part_stat_read(p, io_ticks)), + jiffies_to_msecs(part_stat_read(p, time_in_queue))); } #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -273,6 +290,7 @@ static struct attribute_group *part_attr_groups[] = { static void part_release(struct device *dev) { struct hd_struct *p = dev_to_part(dev); + free_part_stats(p); kfree(p); } @@ -312,13 +330,20 @@ void delete_partition(struct gendisk *disk, int part) disk->part[part-1] = NULL; p->start_sect = 0; p->nr_sects = 0; - p->ios[0] = p->ios[1] = 0; - p->sectors[0] = p->sectors[1] = 0; + part_stat_set_all(p, 0); kobject_put(p->holder_dir); device_del(&p->dev); put_device(&p->dev); } +static ssize_t whole_disk_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return 0; +} +static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, + whole_disk_show, NULL); + void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) { struct hd_struct *p; @@ -328,6 +353,10 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, if (!p) return; + if (!init_part_stats(p)) { + kfree(p); + return; + } p->start_sect = start; p->nr_sects = len; p->partno = part; @@ -352,13 +381,8 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, device_add(&p->dev); partition_sysfs_add_subdir(p); p->dev.uevent_suppress = 0; - if (flags & ADDPART_FLAG_WHOLEDISK) { - static struct attribute addpartattr = { - .name = "whole_disk", - .mode = S_IRUSR | S_IRGRP | S_IROTH, - }; - err = sysfs_create_file(&p->dev.kobj, &addpartattr); - } + if (flags & ADDPART_FLAG_WHOLEDISK) + err = device_create_file(&p->dev, &dev_attr_whole_disk); /* suppress uevent if the disk supresses it */ if (!disk->dev.uevent_suppress) diff --git a/fs/pipe.c b/fs/pipe.c index e66ec48e95d8..3c185b6527bc 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -171,7 +171,7 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, * * Description: * This function returns a kernel virtual address mapping for the - * passed in @pipe_buffer. If @atomic is set, an atomic map is provided + * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided * and the caller has to be careful not to fault before calling * the unmap function. * @@ -208,15 +208,15 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, } /** - * generic_pipe_buf_steal - attempt to take ownership of a @pipe_buffer + * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to attempt to steal * * Description: - * This function attempts to steal the @struct page attached to + * This function attempts to steal the &struct page attached to * @buf. If successful, this function returns 0 and returns with * the page locked. The caller may then reuse the page for whatever - * he wishes, the typical use is insertion into a different file + * he wishes; the typical use is insertion into a different file * page cache. */ int generic_pipe_buf_steal(struct pipe_inode_info *pipe, @@ -238,7 +238,7 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe, } /** - * generic_pipe_buf_get - get a reference to a @struct pipe_buffer + * generic_pipe_buf_get - get a reference to a &struct pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to get a reference to * @@ -576,9 +576,7 @@ bad_pipe_w(struct file *filp, const char __user *buf, size_t count, return -EBADF; } -static int -pipe_ioctl(struct inode *pino, struct file *filp, - unsigned int cmd, unsigned long arg) +static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_path.dentry->d_inode; struct pipe_inode_info *pipe; @@ -785,7 +783,7 @@ const struct file_operations read_fifo_fops = { .aio_read = pipe_read, .write = bad_pipe_w, .poll = pipe_poll, - .ioctl = pipe_ioctl, + .unlocked_ioctl = pipe_ioctl, .open = pipe_read_open, .release = pipe_read_release, .fasync = pipe_read_fasync, @@ -797,7 +795,7 @@ const struct file_operations write_fifo_fops = { .write = do_sync_write, .aio_write = pipe_write, .poll = pipe_poll, - .ioctl = pipe_ioctl, + .unlocked_ioctl = pipe_ioctl, .open = pipe_write_open, .release = pipe_write_release, .fasync = pipe_write_fasync, @@ -810,7 +808,7 @@ const struct file_operations rdwr_fifo_fops = { .write = do_sync_write, .aio_write = pipe_write, .poll = pipe_poll, - .ioctl = pipe_ioctl, + .unlocked_ioctl = pipe_ioctl, .open = pipe_rdwr_open, .release = pipe_rdwr_release, .fasync = pipe_rdwr_fasync, @@ -822,7 +820,7 @@ static const struct file_operations read_pipe_fops = { .aio_read = pipe_read, .write = bad_pipe_w, .poll = pipe_poll, - .ioctl = pipe_ioctl, + .unlocked_ioctl = pipe_ioctl, .open = pipe_read_open, .release = pipe_read_release, .fasync = pipe_read_fasync, @@ -834,7 +832,7 @@ static const struct file_operations write_pipe_fops = { .write = do_sync_write, .aio_write = pipe_write, .poll = pipe_poll, - .ioctl = pipe_ioctl, + .unlocked_ioctl = pipe_ioctl, .open = pipe_write_open, .release = pipe_write_release, .fasync = pipe_write_fasync, @@ -847,7 +845,7 @@ static const struct file_operations rdwr_pipe_fops = { .write = do_sync_write, .aio_write = pipe_write, .poll = pipe_poll, - .ioctl = pipe_ioctl, + .unlocked_ioctl = pipe_ioctl, .open = pipe_rdwr_open, .release = pipe_rdwr_release, .fasync = pipe_rdwr_fasync, diff --git a/fs/pnode.c b/fs/pnode.c index 89940f243fc2..05ba692bc540 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -83,6 +83,8 @@ void change_mnt_propagation(struct vfsmount *mnt, int type) mnt->mnt_master = NULL; if (type == MS_UNBINDABLE) mnt->mnt_flags |= MNT_UNBINDABLE; + else + mnt->mnt_flags &= ~MNT_UNBINDABLE; } } diff --git a/fs/proc/array.c b/fs/proc/array.c index 6ba2746e4517..07d6c4853fe8 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -77,6 +77,7 @@ #include <linux/cpuset.h> #include <linux/rcupdate.h> #include <linux/delayacct.h> +#include <linux/seq_file.h> #include <linux/pid_namespace.h> #include <asm/pgtable.h> @@ -88,18 +89,21 @@ do { memcpy(buffer, string, strlen(string)); \ buffer += strlen(string); } while (0) -static inline char *task_name(struct task_struct *p, char *buf) +static inline void task_name(struct seq_file *m, struct task_struct *p) { int i; + char *buf, *end; char *name; char tcomm[sizeof(p->comm)]; get_task_comm(tcomm, p); - ADDBUF(buf, "Name:\t"); + seq_printf(m, "Name:\t"); + end = m->buf + m->size; + buf = m->buf + m->count; name = tcomm; i = sizeof(tcomm); - do { + while (i && (buf < end)) { unsigned char c = *name; name++; i--; @@ -107,20 +111,21 @@ static inline char *task_name(struct task_struct *p, char *buf) if (!c) break; if (c == '\\') { - buf[1] = c; - buf += 2; + buf++; + if (buf < end) + *buf++ = c; continue; } if (c == '\n') { - buf[0] = '\\'; - buf[1] = 'n'; - buf += 2; + *buf++ = '\\'; + if (buf < end) + *buf++ = 'n'; continue; } buf++; - } while (i); - *buf = '\n'; - return buf+1; + } + m->count = buf - m->buf; + seq_printf(m, "\n"); } /* @@ -151,21 +156,20 @@ static inline const char *get_task_state(struct task_struct *tsk) return *p; } -static inline char *task_state(struct task_struct *p, char *buffer) +static inline void task_state(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *p) { struct group_info *group_info; int g; struct fdtable *fdt = NULL; - struct pid_namespace *ns; pid_t ppid, tpid; - ns = current->nsproxy->pid_ns; rcu_read_lock(); ppid = pid_alive(p) ? task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; tpid = pid_alive(p) && p->ptrace ? task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0; - buffer += sprintf(buffer, + seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" "Pid:\t%d\n" @@ -175,7 +179,7 @@ static inline char *task_state(struct task_struct *p, char *buffer) "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), task_tgid_nr_ns(p, ns), - task_pid_nr_ns(p, ns), + pid_nr_ns(pid, ns), ppid, tpid, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); @@ -183,7 +187,7 @@ static inline char *task_state(struct task_struct *p, char *buffer) task_lock(p); if (p->files) fdt = files_fdtable(p->files); - buffer += sprintf(buffer, + seq_printf(m, "FDSize:\t%d\n" "Groups:\t", fdt ? fdt->max_fds : 0); @@ -194,20 +198,18 @@ static inline char *task_state(struct task_struct *p, char *buffer) task_unlock(p); for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) - buffer += sprintf(buffer, "%d ", GROUP_AT(group_info, g)); + seq_printf(m, "%d ", GROUP_AT(group_info, g)); put_group_info(group_info); - buffer += sprintf(buffer, "\n"); - return buffer; + seq_printf(m, "\n"); } -static char *render_sigset_t(const char *header, sigset_t *set, char *buffer) +static void render_sigset_t(struct seq_file *m, const char *header, + sigset_t *set) { - int i, len; + int i; - len = strlen(header); - memcpy(buffer, header, len); - buffer += len; + seq_printf(m, "%s", header); i = _NSIG; do { @@ -218,12 +220,10 @@ static char *render_sigset_t(const char *header, sigset_t *set, char *buffer) if (sigismember(set, i+2)) x |= 2; if (sigismember(set, i+3)) x |= 4; if (sigismember(set, i+4)) x |= 8; - *buffer++ = (x < 10 ? '0' : 'a' - 10) + x; + seq_printf(m, "%x", x); } while (i >= 4); - *buffer++ = '\n'; - *buffer = 0; - return buffer; + seq_printf(m, "\n"); } static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, @@ -241,7 +241,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, } } -static inline char *task_sig(struct task_struct *p, char *buffer) +static inline void task_sig(struct seq_file *m, struct task_struct *p) { unsigned long flags; sigset_t pending, shpending, blocked, ignored, caught; @@ -268,67 +268,66 @@ static inline char *task_sig(struct task_struct *p, char *buffer) } rcu_read_unlock(); - buffer += sprintf(buffer, "Threads:\t%d\n", num_threads); - buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim); + seq_printf(m, "Threads:\t%d\n", num_threads); + seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); /* render them all */ - buffer = render_sigset_t("SigPnd:\t", &pending, buffer); - buffer = render_sigset_t("ShdPnd:\t", &shpending, buffer); - buffer = render_sigset_t("SigBlk:\t", &blocked, buffer); - buffer = render_sigset_t("SigIgn:\t", &ignored, buffer); - buffer = render_sigset_t("SigCgt:\t", &caught, buffer); - - return buffer; + render_sigset_t(m, "SigPnd:\t", &pending); + render_sigset_t(m, "ShdPnd:\t", &shpending); + render_sigset_t(m, "SigBlk:\t", &blocked); + render_sigset_t(m, "SigIgn:\t", &ignored); + render_sigset_t(m, "SigCgt:\t", &caught); } -static char *render_cap_t(const char *header, kernel_cap_t *a, char *buffer) +static void render_cap_t(struct seq_file *m, const char *header, + kernel_cap_t *a) { unsigned __capi; - buffer += sprintf(buffer, "%s", header); + seq_printf(m, "%s", header); CAP_FOR_EACH_U32(__capi) { - buffer += sprintf(buffer, "%08x", - a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]); + seq_printf(m, "%08x", + a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]); } - return buffer + sprintf(buffer, "\n"); + seq_printf(m, "\n"); } -static inline char *task_cap(struct task_struct *p, char *buffer) +static inline void task_cap(struct seq_file *m, struct task_struct *p) { - buffer = render_cap_t("CapInh:\t", &p->cap_inheritable, buffer); - buffer = render_cap_t("CapPrm:\t", &p->cap_permitted, buffer); - return render_cap_t("CapEff:\t", &p->cap_effective, buffer); + render_cap_t(m, "CapInh:\t", &p->cap_inheritable); + render_cap_t(m, "CapPrm:\t", &p->cap_permitted); + render_cap_t(m, "CapEff:\t", &p->cap_effective); } -static inline char *task_context_switch_counts(struct task_struct *p, - char *buffer) +static inline void task_context_switch_counts(struct seq_file *m, + struct task_struct *p) { - return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n" - "nonvoluntary_ctxt_switches:\t%lu\n", - p->nvcsw, - p->nivcsw); + seq_printf(m, "voluntary_ctxt_switches:\t%lu\n" + "nonvoluntary_ctxt_switches:\t%lu\n", + p->nvcsw, + p->nivcsw); } -int proc_pid_status(struct task_struct *task, char *buffer) +int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { - char *orig = buffer; struct mm_struct *mm = get_task_mm(task); - buffer = task_name(task, buffer); - buffer = task_state(task, buffer); + task_name(m, task); + task_state(m, ns, pid, task); if (mm) { - buffer = task_mem(mm, buffer); + task_mem(m, mm); mmput(mm); } - buffer = task_sig(task, buffer); - buffer = task_cap(task, buffer); - buffer = cpuset_task_status_allowed(task, buffer); + task_sig(m, task); + task_cap(m, task); + cpuset_task_status_allowed(m, task); #if defined(CONFIG_S390) - buffer = task_show_regs(task, buffer); + task_show_regs(m, task); #endif - buffer = task_context_switch_counts(task, buffer); - return buffer - orig; + task_context_switch_counts(m, task); + return 0; } /* @@ -390,14 +389,14 @@ static cputime_t task_gtime(struct task_struct *p) return p->gtime; } -static int do_task_stat(struct task_struct *task, char *buffer, int whole) +static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task, int whole) { unsigned long vsize, eip, esp, wchan = ~0UL; long priority, nice; int tty_pgrp = -1, tty_nr = 0; sigset_t sigign, sigcatch; char state; - int res; pid_t ppid = 0, pgid = -1, sid = -1; int num_threads = 0; struct mm_struct *mm; @@ -409,9 +408,6 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) unsigned long rsslim = 0; char tcomm[sizeof(task->comm)]; unsigned long flags; - struct pid_namespace *ns; - - ns = current->nsproxy->pid_ns; state = *get_task_state(task); vsize = eip = esp = 0; @@ -498,10 +494,10 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) /* convert nsec -> ticks */ start_time = nsec_to_clock_t(start_time); - res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \ + seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", - task_pid_nr_ns(task, ns), + pid_nr_ns(pid, ns), tcomm, state, ppid, @@ -550,20 +546,23 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) cputime_to_clock_t(cgtime)); if (mm) mmput(mm); - return res; + return 0; } -int proc_tid_stat(struct task_struct *task, char *buffer) +int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { - return do_task_stat(task, buffer, 0); + return do_task_stat(m, ns, pid, task, 0); } -int proc_tgid_stat(struct task_struct *task, char *buffer) +int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { - return do_task_stat(task, buffer, 1); + return do_task_stat(m, ns, pid, task, 1); } -int proc_pid_statm(struct task_struct *task, char *buffer) +int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; struct mm_struct *mm = get_task_mm(task); @@ -572,7 +571,8 @@ int proc_pid_statm(struct task_struct *task, char *buffer) size = task_statm(mm, &shared, &text, &data, &resident); mmput(mm); } + seq_printf(m, "%d %d %d %d %d %d %d\n", + size, resident, shared, text, lib, data, 0); - return sprintf(buffer, "%d %d %d %d %d %d %d\n", - size, resident, shared, text, lib, data, 0); + return 0; } diff --git a/fs/proc/base.c b/fs/proc/base.c index c59852b38787..88f8edf18258 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -121,6 +121,10 @@ struct pid_entry { NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_info_file_operations, \ { .proc_read = &proc_##OTYPE } ) +#define ONE(NAME, MODE, OTYPE) \ + NOD(NAME, (S_IFREG|(MODE)), \ + NULL, &proc_single_file_operations, \ + { .proc_show = &proc_##OTYPE } ) int maps_protect; EXPORT_SYMBOL(maps_protect); @@ -149,7 +153,7 @@ static int get_nr_threads(struct task_struct *tsk) return count; } -static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static int proc_cwd_link(struct inode *inode, struct path *path) { struct task_struct *task = get_proc_task(inode); struct fs_struct *fs = NULL; @@ -161,8 +165,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs } if (fs) { read_lock(&fs->lock); - *mnt = mntget(fs->pwdmnt); - *dentry = dget(fs->pwd); + *path = fs->pwd; + path_get(&fs->pwd); read_unlock(&fs->lock); result = 0; put_fs_struct(fs); @@ -170,7 +174,7 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs return result; } -static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static int proc_root_link(struct inode *inode, struct path *path) { struct task_struct *task = get_proc_task(inode); struct fs_struct *fs = NULL; @@ -182,8 +186,8 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf } if (fs) { read_lock(&fs->lock); - *mnt = mntget(fs->rootmnt); - *dentry = dget(fs->root); + *path = fs->root; + path_get(&fs->root); read_unlock(&fs->lock); result = 0; put_fs_struct(fs); @@ -502,7 +506,7 @@ static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; -extern struct seq_operations mounts_op; +extern const struct seq_operations mounts_op; struct proc_mounts { struct seq_file m; int event; @@ -581,7 +585,7 @@ static const struct file_operations proc_mounts_operations = { .poll = mounts_poll, }; -extern struct seq_operations mountstats_op; +extern const struct seq_operations mountstats_op; static int mountstats_open(struct inode *inode, struct file *file) { int ret = seq_open(file, &mountstats_op); @@ -658,6 +662,45 @@ static const struct file_operations proc_info_file_operations = { .read = proc_info_read, }; +static int proc_single_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct pid_namespace *ns; + struct pid *pid; + struct task_struct *task; + int ret; + + ns = inode->i_sb->s_fs_info; + pid = proc_pid(inode); + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) + return -ESRCH; + + ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); + + put_task_struct(task); + return ret; +} + +static int proc_single_open(struct inode *inode, struct file *filp) +{ + int ret; + ret = single_open(filp, proc_single_show, NULL); + if (!ret) { + struct seq_file *m = filp->private_data; + + m->private = inode; + } + return ret; +} + +static const struct file_operations proc_single_file_operations = { + .open = proc_single_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int mem_open(struct inode* inode, struct file* file) { file->private_data = (void*)((long)current->self_exec_id); @@ -1121,39 +1164,36 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) int error = -EACCES; /* We don't need a base pointer in the /proc filesystem */ - path_release(nd); + path_put(&nd->path); /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); + error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); nd->last_type = LAST_BIND; out: return ERR_PTR(error); } -static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, - char __user *buffer, int buflen) +static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) { - struct inode * inode; char *tmp = (char*)__get_free_page(GFP_TEMPORARY); - char *path; + char *pathname; int len; if (!tmp) return -ENOMEM; - inode = dentry->d_inode; - path = d_path(dentry, mnt, tmp, PAGE_SIZE); - len = PTR_ERR(path); - if (IS_ERR(path)) + pathname = d_path(path, tmp, PAGE_SIZE); + len = PTR_ERR(pathname); + if (IS_ERR(pathname)) goto out; - len = tmp + PAGE_SIZE - 1 - path; + len = tmp + PAGE_SIZE - 1 - pathname; if (len > buflen) len = buflen; - if (copy_to_user(buffer, path, len)) + if (copy_to_user(buffer, pathname, len)) len = -EFAULT; out: free_page((unsigned long)tmp); @@ -1164,20 +1204,18 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b { int error = -EACCES; struct inode *inode = dentry->d_inode; - struct dentry *de; - struct vfsmount *mnt = NULL; + struct path path; /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); + error = PROC_I(inode)->op.proc_get_link(inode, &path); if (error) goto out; - error = do_proc_readlink(de, mnt, buffer, buflen); - dput(de); - mntput(mnt); + error = do_proc_readlink(&path, buffer, buflen); + path_put(&path); out: return error; } @@ -1404,8 +1442,7 @@ out: #define PROC_FDINFO_MAX 64 -static int proc_fd_info(struct inode *inode, struct dentry **dentry, - struct vfsmount **mnt, char *info) +static int proc_fd_info(struct inode *inode, struct path *path, char *info) { struct task_struct *task = get_proc_task(inode); struct files_struct *files = NULL; @@ -1424,10 +1461,10 @@ static int proc_fd_info(struct inode *inode, struct dentry **dentry, spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { - if (mnt) - *mnt = mntget(file->f_path.mnt); - if (dentry) - *dentry = dget(file->f_path.dentry); + if (path) { + *path = file->f_path; + path_get(&file->f_path); + } if (info) snprintf(info, PROC_FDINFO_MAX, "pos:\t%lli\n" @@ -1444,10 +1481,9 @@ static int proc_fd_info(struct inode *inode, struct dentry **dentry, return -ENOENT; } -static int proc_fd_link(struct inode *inode, struct dentry **dentry, - struct vfsmount **mnt) +static int proc_fd_link(struct inode *inode, struct path *path) { - return proc_fd_info(inode, dentry, mnt, NULL); + return proc_fd_info(inode, path, NULL); } static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) @@ -1641,7 +1677,7 @@ static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { char tmp[PROC_FDINFO_MAX]; - int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, NULL, tmp); + int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); if (!err) err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); return err; @@ -2058,15 +2094,23 @@ static const struct file_operations proc_coredump_filter_operations = { static int proc_self_readlink(struct dentry *dentry, char __user *buffer, int buflen) { + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", task_tgid_vnr(current)); + if (!tgid) + return -ENOENT; + sprintf(tmp, "%d", tgid); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", task_tgid_vnr(current)); + if (!tgid) + return ERR_PTR(-ENOENT); + sprintf(tmp, "%d", task_tgid_nr_ns(current, ns)); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -2231,14 +2275,14 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), - INF("status", S_IRUGO, pid_status), + ONE("status", S_IRUGO, pid_status), INF("limits", S_IRUSR, pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif INF("cmdline", S_IRUGO, pid_cmdline), - INF("stat", S_IRUGO, tgid_stat), - INF("statm", S_IRUGO, pid_statm), + ONE("stat", S_IRUGO, tgid_stat), + ONE("statm", S_IRUGO, pid_statm), REG("maps", S_IRUGO, maps), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, numa_maps), @@ -2562,14 +2606,14 @@ static const struct pid_entry tid_base_stuff[] = { DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), - INF("status", S_IRUGO, pid_status), + ONE("status", S_IRUGO, pid_status), INF("limits", S_IRUSR, pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif INF("cmdline", S_IRUGO, pid_cmdline), - INF("stat", S_IRUGO, tid_stat), - INF("statm", S_IRUGO, pid_statm), + ONE("stat", S_IRUGO, tid_stat), + ONE("statm", S_IRUGO, pid_statm), REG("maps", S_IRUGO, maps), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, numa_maps), diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 6a2fe5187b62..68971e66cd41 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -25,12 +25,6 @@ #include "internal.h" -static ssize_t proc_file_read(struct file *file, char __user *buf, - size_t nbytes, loff_t *ppos); -static ssize_t proc_file_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos); -static loff_t proc_file_lseek(struct file *, loff_t, int); - DEFINE_SPINLOCK(proc_subdir_lock); static int proc_match(int len, const char *name, struct proc_dir_entry *de) @@ -40,12 +34,6 @@ static int proc_match(int len, const char *name, struct proc_dir_entry *de) return !memcmp(name, de->name, len); } -static const struct file_operations proc_file_operations = { - .llseek = proc_file_lseek, - .read = proc_file_read, - .write = proc_file_write, -}; - /* buffer size is one page but our output routines use some slack for overruns */ #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) @@ -233,6 +221,12 @@ proc_file_lseek(struct file *file, loff_t offset, int orig) return retval; } +static const struct file_operations proc_file_operations = { + .llseek = proc_file_lseek, + .read = proc_file_read, + .write = proc_file_write, +}; + static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; @@ -406,12 +400,12 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam spin_unlock(&proc_subdir_lock); error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); - spin_lock(&proc_subdir_lock); - break; + goto out_unlock; } } } spin_unlock(&proc_subdir_lock); +out_unlock: unlock_kernel(); if (inode) { @@ -527,6 +521,7 @@ static const struct inode_operations proc_dir_inode_operations = { static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { unsigned int i; + struct proc_dir_entry *tmp; i = get_inode_number(); if (i == 0) @@ -550,6 +545,15 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp } spin_lock(&proc_subdir_lock); + + for (tmp = dir->subdir; tmp; tmp = tmp->next) + if (strcmp(tmp->name, dp->name) == 0) { + printk(KERN_WARNING "proc_dir_entry '%s' already " + "registered\n", dp->name); + dump_stack(); + break; + } + dp->next = dir->subdir; dp->parent = dir; dir->subdir = dp; @@ -558,7 +562,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp return 0; } -static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, +static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, const char *name, mode_t mode, nlink_t nlink) @@ -601,7 +605,7 @@ struct proc_dir_entry *proc_symlink(const char *name, { struct proc_dir_entry *ent; - ent = proc_create(&parent,name, + ent = __proc_create(&parent, name, (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); if (ent) { @@ -626,7 +630,7 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, { struct proc_dir_entry *ent; - ent = proc_create(&parent, name, S_IFDIR | mode, 2); + ent = __proc_create(&parent, name, S_IFDIR | mode, 2); if (ent) { if (proc_register(parent, ent) < 0) { kfree(ent); @@ -660,7 +664,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, nlink = 1; } - ent = proc_create(&parent,name,mode,nlink); + ent = __proc_create(&parent, name, mode, nlink); if (ent) { if (proc_register(parent, ent) < 0) { kfree(ent); @@ -670,6 +674,38 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, return ent; } +struct proc_dir_entry *proc_create(const char *name, mode_t mode, + struct proc_dir_entry *parent, + const struct file_operations *proc_fops) +{ + struct proc_dir_entry *pde; + nlink_t nlink; + + if (S_ISDIR(mode)) { + if ((mode & S_IALLUGO) == 0) + mode |= S_IRUGO | S_IXUGO; + nlink = 2; + } else { + if ((mode & S_IFMT) == 0) + mode |= S_IFREG; + if ((mode & S_IALLUGO) == 0) + mode |= S_IRUGO; + nlink = 1; + } + + pde = __proc_create(&parent, name, mode, nlink); + if (!pde) + goto out; + pde->proc_fops = proc_fops; + if (proc_register(parent, pde) < 0) + goto out_free; + return pde; +out_free: + kfree(pde); +out: + return NULL; +} + void free_proc_entry(struct proc_dir_entry *de) { unsigned int ino = de->low_ino; @@ -679,7 +715,7 @@ void free_proc_entry(struct proc_dir_entry *de) release_inode_number(ino); - if (S_ISLNK(de->mode) && de->data) + if (S_ISLNK(de->mode)) kfree(de->data); kfree(de); } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 1a551d92e1d8..82b3a1b5a70b 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -73,11 +73,6 @@ static void proc_delete_inode(struct inode *inode) struct vfsmount *proc_mnt; -static void proc_read_inode(struct inode * inode) -{ - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -} - static struct kmem_cache * proc_inode_cachep; static struct inode *proc_alloc_inode(struct super_block *sb) @@ -128,7 +123,6 @@ static int proc_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, - .read_inode = proc_read_inode, .drop_inode = generic_delete_inode, .delete_inode = proc_delete_inode, .statfs = simple_statfs, @@ -401,39 +395,41 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, if (de != NULL && !try_module_get(de->owner)) goto out_mod; - inode = iget(sb, ino); + inode = iget_locked(sb, ino); if (!inode) goto out_ino; - - PROC_I(inode)->fd = 0; - PROC_I(inode)->pde = de; - if (de) { - if (de->mode) { - inode->i_mode = de->mode; - inode->i_uid = de->uid; - inode->i_gid = de->gid; - } - if (de->size) - inode->i_size = de->size; - if (de->nlink) - inode->i_nlink = de->nlink; - if (de->proc_iops) - inode->i_op = de->proc_iops; - if (de->proc_fops) { - if (S_ISREG(inode->i_mode)) { + if (inode->i_state & I_NEW) { + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + PROC_I(inode)->fd = 0; + PROC_I(inode)->pde = de; + if (de) { + if (de->mode) { + inode->i_mode = de->mode; + inode->i_uid = de->uid; + inode->i_gid = de->gid; + } + if (de->size) + inode->i_size = de->size; + if (de->nlink) + inode->i_nlink = de->nlink; + if (de->proc_iops) + inode->i_op = de->proc_iops; + if (de->proc_fops) { + if (S_ISREG(inode->i_mode)) { #ifdef CONFIG_COMPAT - if (!de->proc_fops->compat_ioctl) - inode->i_fop = - &proc_reg_file_ops_no_compat; - else + if (!de->proc_fops->compat_ioctl) + inode->i_fop = + &proc_reg_file_ops_no_compat; + else #endif - inode->i_fop = &proc_reg_file_ops; + inode->i_fop = &proc_reg_file_ops; + } else { + inode->i_fop = de->proc_fops; + } } - else - inode->i_fop = de->proc_fops; } + unlock_new_inode(inode); } - return inode; out_ino: @@ -471,4 +467,3 @@ out_no_root: de_put(&proc_root); return -ENOMEM; } -MODULE_LICENSE("GPL"); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 7d57e8069924..1c81c8f1aeed 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -46,12 +46,17 @@ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); extern int maps_protect; -extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f); -extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **); -extern int proc_tid_stat(struct task_struct *, char *); -extern int proc_tgid_stat(struct task_struct *, char *); -extern int proc_pid_status(struct task_struct *, char *); -extern int proc_pid_statm(struct task_struct *, char *); +extern void create_seq_entry(char *name, mode_t mode, + const struct file_operations *f); +extern int proc_exe_link(struct inode *, struct path *); +extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +extern int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); extern const struct file_operations proc_maps_operations; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 7dd26e18cbfd..e78c81fcf547 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -12,7 +12,6 @@ #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/user.h> -#include <linux/a.out.h> #include <linux/capability.h> #include <linux/elf.h> #include <linux/elfcore.h> diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 22f789de3909..941e95114b5a 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -67,7 +67,7 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) if (len < 1) len = 1; seq_printf(m, "%*c", len, ' '); - seq_path(m, file->f_path.mnt, file->f_path.dentry, ""); + seq_path(m, &file->f_path, ""); } seq_putc(m, '\n'); @@ -116,7 +116,7 @@ static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos) return rb_next((struct rb_node *) v); } -static struct seq_operations proc_nommu_vma_list_seqop = { +static const struct seq_operations proc_nommu_vma_list_seqop = { .start = nommu_vma_list_start, .next = nommu_vma_list_next, .stop = nommu_vma_list_stop, diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 51288db37a0c..468805d40e2b 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -29,6 +29,7 @@ #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/pagemap.h> +#include <linux/interrupt.h> #include <linux/swap.h> #include <linux/slab.h> #include <linux/smp.h> @@ -64,7 +65,6 @@ */ extern int get_hardware_list(char *); extern int get_stram_list(char *); -extern int get_filesystem_list(char *); extern int get_exec_domain_list(char *); extern int get_dma_list(char *); @@ -84,10 +84,15 @@ static int loadavg_read_proc(char *page, char **start, off_t off, { int a, b, c; int len; + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + } while (read_seqretry(&xtime_lock, seq)); - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), @@ -217,7 +222,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, #undef K } -extern struct seq_operations fragmentation_op; +extern const struct seq_operations fragmentation_op; static int fragmentation_open(struct inode *inode, struct file *file) { (void)inode; @@ -231,7 +236,7 @@ static const struct file_operations fragmentation_file_operations = { .release = seq_release, }; -extern struct seq_operations pagetypeinfo_op; +extern const struct seq_operations pagetypeinfo_op; static int pagetypeinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &pagetypeinfo_op); @@ -244,7 +249,7 @@ static const struct file_operations pagetypeinfo_file_ops = { .release = seq_release, }; -extern struct seq_operations zoneinfo_op; +extern const struct seq_operations zoneinfo_op; static int zoneinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &zoneinfo_op); @@ -269,7 +274,7 @@ static int version_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } -extern struct seq_operations cpuinfo_op; +extern const struct seq_operations cpuinfo_op; static int cpuinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &cpuinfo_op); @@ -322,7 +327,7 @@ static void devinfo_stop(struct seq_file *f, void *v) /* Nothing to do */ } -static struct seq_operations devinfo_ops = { +static const struct seq_operations devinfo_ops = { .start = devinfo_start, .next = devinfo_next, .stop = devinfo_stop, @@ -341,7 +346,7 @@ static const struct file_operations proc_devinfo_operations = { .release = seq_release, }; -extern struct seq_operations vmstat_op; +extern const struct seq_operations vmstat_op; static int vmstat_open(struct inode *inode, struct file *file) { return seq_open(file, &vmstat_op); @@ -372,7 +377,7 @@ static int stram_read_proc(char *page, char **start, off_t off, #endif #ifdef CONFIG_BLOCK -extern struct seq_operations partitions_op; +extern const struct seq_operations partitions_op; static int partitions_open(struct inode *inode, struct file *file) { return seq_open(file, &partitions_op); @@ -384,7 +389,7 @@ static const struct file_operations proc_partitions_operations = { .release = seq_release, }; -extern struct seq_operations diskstats_op; +extern const struct seq_operations diskstats_op; static int diskstats_open(struct inode *inode, struct file *file) { return seq_open(file, &diskstats_op); @@ -398,7 +403,7 @@ static const struct file_operations proc_diskstats_operations = { #endif #ifdef CONFIG_MODULES -extern struct seq_operations modules_op; +extern const struct seq_operations modules_op; static int modules_open(struct inode *inode, struct file *file) { return seq_open(file, &modules_op); @@ -425,7 +430,7 @@ static const struct file_operations proc_slabinfo_operations = { }; #ifdef CONFIG_DEBUG_SLAB_LEAK -extern struct seq_operations slabstats_op; +extern const struct seq_operations slabstats_op; static int slabstats_open(struct inode *inode, struct file *file) { unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL); @@ -599,8 +604,7 @@ static void int_seq_stop(struct seq_file *f, void *v) } -extern int show_interrupts(struct seq_file *f, void *v); /* In arch code */ -static struct seq_operations int_seq_ops = { +static const struct seq_operations int_seq_ops = { .start = int_seq_start, .next = int_seq_next, .stop = int_seq_stop, diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 4823c9677fac..14e9b5aaf863 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -67,12 +67,7 @@ EXPORT_SYMBOL_GPL(seq_release_net); struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops) { - struct proc_dir_entry *res; - - res = create_proc_entry(name, mode, net->proc_net); - if (res) - res->proc_fops = fops; - return res; + return proc_create(name, mode, net->proc_net, fops); } EXPORT_SYMBOL_GPL(proc_net_fops_create); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 4e57fcf85982..614c34b6d1c2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -9,7 +9,7 @@ static struct dentry_operations proc_sys_dentry_operations; static const struct file_operations proc_sys_file_operations; -static struct inode_operations proc_sys_inode_operations; +static const struct inode_operations proc_sys_inode_operations; static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) { @@ -407,7 +407,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata * if (!nd || !depth) goto out; - dentry = nd->dentry; + dentry = nd->path.dentry; table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); /* If the entry does not exist deny permission */ @@ -446,7 +446,7 @@ static const struct file_operations proc_sys_file_operations = { .readdir = proc_sys_readdir, }; -static struct inode_operations proc_sys_inode_operations = { +static const struct inode_operations proc_sys_inode_operations = { .lookup = proc_sys_lookup, .permission = proc_sys_permission, .setattr = proc_sys_setattr, diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 22846225acfa..49816e00b51a 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -15,9 +15,6 @@ #include <linux/seq_file.h> #include <linux/bitops.h> -static int tty_ldiscs_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data); - /* * The /proc/tty directory inodes... */ @@ -120,7 +117,7 @@ static void t_stop(struct seq_file *m, void *v) mutex_unlock(&tty_mutex); } -static struct seq_operations tty_drivers_op = { +static const struct seq_operations tty_drivers_op = { .start = t_start, .next = t_next, .stop = t_stop, diff --git a/fs/proc/root.c b/fs/proc/root.c index 81f99e691f99..ef0fb57fc9ef 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -232,6 +232,7 @@ void pid_ns_release_proc(struct pid_namespace *ns) EXPORT_SYMBOL(proc_symlink); EXPORT_SYMBOL(proc_mkdir); EXPORT_SYMBOL(create_proc_entry); +EXPORT_SYMBOL(proc_create); EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(proc_root); EXPORT_SYMBOL(proc_root_fs); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 38338ed98cc6..49958cffbd8d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -9,13 +9,14 @@ #include <linux/mempolicy.h> #include <linux/swap.h> #include <linux/swapops.h> +#include <linux/seq_file.h> #include <asm/elf.h> #include <asm/uaccess.h> #include <asm/tlbflush.h> #include "internal.h" -char *task_mem(struct mm_struct *mm, char *buffer) +void task_mem(struct seq_file *m, struct mm_struct *mm) { unsigned long data, text, lib; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; @@ -37,7 +38,7 @@ char *task_mem(struct mm_struct *mm, char *buffer) data = mm->total_vm - mm->shared_vm - mm->stack_vm; text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; - buffer += sprintf(buffer, + seq_printf(m, "VmPeak:\t%8lu kB\n" "VmSize:\t%8lu kB\n" "VmLck:\t%8lu kB\n" @@ -56,7 +57,6 @@ char *task_mem(struct mm_struct *mm, char *buffer) data << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10); - return buffer; } unsigned long task_vsize(struct mm_struct *mm) @@ -75,7 +75,7 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return mm->total_vm; } -int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +int proc_exe_link(struct inode *inode, struct path *path) { struct vm_area_struct * vma; int result = -ENOENT; @@ -98,8 +98,8 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount * } if (vma) { - *mnt = mntget(vma->vm_file->f_path.mnt); - *dentry = dget(vma->vm_file->f_path.dentry); + *path = vma->vm_file->f_path; + path_get(&vma->vm_file->f_path); result = 0; } @@ -216,7 +216,7 @@ static void m_stop(struct seq_file *m, void *v) } static int do_maps_open(struct inode *inode, struct file *file, - struct seq_operations *ops) + const struct seq_operations *ops) { struct proc_maps_private *priv; int ret = -ENOMEM; @@ -271,7 +271,7 @@ static int show_map(struct seq_file *m, void *v) */ if (file) { pad_len_spaces(m, len); - seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n"); + seq_path(m, &file->f_path, "\n"); } else { const char *name = arch_vma_name(vma); if (!name) { @@ -299,7 +299,7 @@ static int show_map(struct seq_file *m, void *v) return 0; } -static struct seq_operations proc_pid_maps_op = { +static const struct seq_operations proc_pid_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, @@ -434,7 +434,7 @@ static int show_smap(struct seq_file *m, void *v) return ret; } -static struct seq_operations proc_pid_smaps_op = { +static const struct seq_operations proc_pid_smaps_op = { .start = m_start, .next = m_next, .stop = m_stop, @@ -734,7 +734,7 @@ static int show_numa_map_checked(struct seq_file *m, void *v) return show_numa_map(m, v); } -static struct seq_operations proc_pid_numa_maps_op = { +static const struct seq_operations proc_pid_numa_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 1932c2ca3457..8011528518bd 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -12,7 +12,7 @@ * each process that owns it. Non-shared memory is counted * accurately. */ -char *task_mem(struct mm_struct *mm, char *buffer) +void task_mem(struct seq_file *m, struct mm_struct *mm) { struct vm_list_struct *vml; unsigned long bytes = 0, sbytes = 0, slack = 0; @@ -58,14 +58,13 @@ char *task_mem(struct mm_struct *mm, char *buffer) bytes += kobjsize(current); /* includes kernel stack */ - buffer += sprintf(buffer, + seq_printf(m, "Mem:\t%8lu bytes\n" "Slack:\t%8lu bytes\n" "Shared:\t%8lu bytes\n", bytes, slack, sbytes); up_read(&mm->mmap_sem); - return buffer; } unsigned long task_vsize(struct mm_struct *mm) @@ -104,7 +103,7 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return size; } -int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +int proc_exe_link(struct inode *inode, struct path *path) { struct vm_list_struct *vml; struct vm_area_struct *vma; @@ -127,8 +126,8 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount * } if (vma) { - *mnt = mntget(vma->vm_file->f_path.mnt); - *dentry = dget(vma->vm_file->f_path.dentry); + *path = vma->vm_file->f_path; + path_get(&vma->vm_file->f_path); result = 0; } @@ -199,7 +198,7 @@ static void *m_next(struct seq_file *m, void *_vml, loff_t *pos) return vml ? vml->next : NULL; } -static struct seq_operations proc_pid_maps_ops = { +static const struct seq_operations proc_pid_maps_ops = { .start = m_start, .next = m_next, .stop = m_stop, diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 523e1098ae88..9ac0f5e064e0 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -10,7 +10,6 @@ #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/user.h> -#include <linux/a.out.h> #include <linux/elf.h> #include <linux/elfcore.h> #include <linux/highmem.h> diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 638bdb963213..b31ab78052b3 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -125,7 +125,6 @@ static int qnx4_write_inode(struct inode *inode, int unused) static void qnx4_put_super(struct super_block *sb); static struct inode *qnx4_alloc_inode(struct super_block *sb); static void qnx4_destroy_inode(struct inode *inode); -static void qnx4_read_inode(struct inode *); static int qnx4_remount(struct super_block *sb, int *flags, char *data); static int qnx4_statfs(struct dentry *, struct kstatfs *); @@ -133,7 +132,6 @@ static const struct super_operations qnx4_sops = { .alloc_inode = qnx4_alloc_inode, .destroy_inode = qnx4_destroy_inode, - .read_inode = qnx4_read_inode, .put_super = qnx4_put_super, .statfs = qnx4_statfs, .remount_fs = qnx4_remount, @@ -357,6 +355,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) struct inode *root; const char *errmsg; struct qnx4_sb_info *qs; + int ret = -EINVAL; qs = kzalloc(sizeof(struct qnx4_sb_info), GFP_KERNEL); if (!qs) @@ -396,12 +395,14 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) } /* does root not have inode number QNX4_ROOT_INO ?? */ - root = iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK); - if (!root) { + root = qnx4_iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK); + if (IS_ERR(root)) { printk("qnx4: get inode failed\n"); + ret = PTR_ERR(root); goto out; } + ret = -ENOMEM; s->s_root = d_alloc_root(root); if (s->s_root == NULL) goto outi; @@ -417,7 +418,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) outnobh: kfree(qs); s->s_fs_info = NULL; - return -EINVAL; + return ret; } static void qnx4_put_super(struct super_block *sb) @@ -462,29 +463,38 @@ static const struct address_space_operations qnx4_aops = { .bmap = qnx4_bmap }; -static void qnx4_read_inode(struct inode *inode) +struct inode *qnx4_iget(struct super_block *sb, unsigned long ino) { struct buffer_head *bh; struct qnx4_inode_entry *raw_inode; - int block, ino; - struct super_block *sb = inode->i_sb; - struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode); + int block; + struct qnx4_inode_entry *qnx4_inode; + struct inode *inode; - ino = inode->i_ino; + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + qnx4_inode = qnx4_raw_inode(inode); inode->i_mode = 0; QNX4DEBUG(("Reading inode : [%d]\n", ino)); if (!ino) { - printk("qnx4: bad inode number on dev %s: %d is out of range\n", + printk(KERN_ERR "qnx4: bad inode number on dev %s: %lu is " + "out of range\n", sb->s_id, ino); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } block = ino / QNX4_INODES_PER_BLOCK; if (!(bh = sb_bread(sb, block))) { printk("qnx4: major problem: unable to read inode from dev " "%s\n", sb->s_id); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } raw_inode = ((struct qnx4_inode_entry *) bh->b_data) + (ino % QNX4_INODES_PER_BLOCK); @@ -515,9 +525,16 @@ static void qnx4_read_inode(struct inode *inode) inode->i_op = &page_symlink_inode_operations; inode->i_mapping->a_ops = &qnx4_aops; qnx4_i(inode)->mmu_private = inode->i_size; - } else - printk("qnx4: bad inode %d on dev %s\n",ino,sb->s_id); + } else { + printk(KERN_ERR "qnx4: bad inode %lu on dev %s\n", + ino, sb->s_id); + iget_failed(inode); + brelse(bh); + return ERR_PTR(-EIO); + } brelse(bh); + unlock_new_inode(inode); + return inode; } static struct kmem_cache *qnx4_inode_cachep; diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 733cdf01d645..775eed3a4085 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -128,10 +128,12 @@ struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nam } brelse(bh); - if ((foundinode = iget(dir->i_sb, ino)) == NULL) { + foundinode = qnx4_iget(dir->i_sb, ino); + if (IS_ERR(foundinode)) { unlock_kernel(); - QNX4DEBUG(("qnx4: lookup->iget -> NULL\n")); - return ERR_PTR(-EACCES); + QNX4DEBUG(("qnx4: lookup->iget -> error %ld\n", + PTR_ERR(foundinode))); + return ERR_CAST(foundinode); } out: unlock_kernel(); diff --git a/fs/quota.c b/fs/quota.c index 99b24b52bfc8..84f28dd72116 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -341,11 +341,11 @@ static inline struct super_block *quotactl_block(const char __user *special) char *tmp = getname(special); if (IS_ERR(tmp)) - return ERR_PTR(PTR_ERR(tmp)); + return ERR_CAST(tmp); bdev = lookup_bdev(tmp); putname(tmp); if (IS_ERR(bdev)) - return ERR_PTR(PTR_ERR(bdev)); + return ERR_CAST(bdev); sb = get_super(bdev); bdput(bdev); if (!sb) diff --git a/fs/read_write.c b/fs/read_write.c index 1c177f29e1b7..49a98718ecdf 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -366,7 +366,6 @@ asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) return ret; } -EXPORT_UNUSED_SYMBOL_GPL(sys_read); /* to be deleted for 2.6.25 */ asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 195309857e63..57917932212e 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1536,7 +1536,7 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb, if (!inode) inode = ERR_PTR(-ESTALE); if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); result = d_alloc_anon(inode); if (!result) { iput(inode); diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 5e7388b32d02..740bb8c0c1ae 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -575,6 +575,8 @@ void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int l printk ("Block %llu contains unformatted data\n", (unsigned long long)bh->b_blocknr); + + va_end(args); } static char print_tb_buf[2048]; diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 001144621672..8f86c52b30d8 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -444,7 +444,7 @@ static int r_show(struct seq_file *m, void *v) return show(m, v); } -static struct seq_operations r_ops = { +static const struct seq_operations r_ops = { .start = r_start, .next = r_next, .stop = r_stop, diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5cd85fe5df5d..6841452e0dea 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -617,6 +617,7 @@ static const struct super_operations reiserfs_sops = { .unlockfs = reiserfs_unlockfs, .statfs = reiserfs_statfs, .remount_fs = reiserfs_remount, + .show_options = generic_show_options, #ifdef CONFIG_QUOTA .quota_read = reiserfs_quota_read, .quota_write = reiserfs_quota_write, @@ -1138,6 +1139,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) unsigned long safe_mask = 0; unsigned int commit_max_age = (unsigned int)-1; struct reiserfs_journal *journal = SB_JOURNAL(s); + char *new_opts = kstrdup(arg, GFP_KERNEL); int err; #ifdef CONFIG_QUOTA int i; @@ -1153,7 +1155,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) REISERFS_SB(s)->s_qf_names[i] = NULL; } #endif - return -EINVAL; + err = -EINVAL; + goto out_err; } handle_attrs(s); @@ -1191,9 +1194,9 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) } if (blocks) { - int rc = reiserfs_resize(s, blocks); - if (rc != 0) - return rc; + err = reiserfs_resize(s, blocks); + if (err != 0) + goto out_err; } if (*mount_flags & MS_RDONLY) { @@ -1201,16 +1204,16 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) /* remount read-only */ if (s->s_flags & MS_RDONLY) /* it is read-only already */ - return 0; + goto out_ok; /* try to remount file system with read-only permissions */ if (sb_umount_state(rs) == REISERFS_VALID_FS || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) { - return 0; + goto out_ok; } err = journal_begin(&th, s, 10); if (err) - return err; + goto out_err; /* Mounting a rw partition read-only. */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); @@ -1220,11 +1223,13 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) /* remount read-write */ if (!(s->s_flags & MS_RDONLY)) { reiserfs_xattr_init(s, *mount_flags); - return 0; /* We are read-write already */ + goto out_ok; /* We are read-write already */ } - if (reiserfs_is_journal_aborted(journal)) - return journal->j_errno; + if (reiserfs_is_journal_aborted(journal)) { + err = journal->j_errno; + goto out_err; + } handle_data_mode(s, mount_options); handle_barrier_mode(s, mount_options); @@ -1232,7 +1237,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ err = journal_begin(&th, s, 10); if (err) - return err; + goto out_err; /* Mount a partition which is read-only, read-write */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); @@ -1247,7 +1252,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) SB_JOURNAL(s)->j_must_wait = 1; err = journal_end(&th, s, 10); if (err) - return err; + goto out_err; s->s_dirt = 0; if (!(*mount_flags & MS_RDONLY)) { @@ -1255,7 +1260,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) reiserfs_xattr_init(s, *mount_flags); } +out_ok: + kfree(s->s_options); + s->s_options = new_opts; return 0; + +out_err: + kfree(new_opts); + return err; } static int read_super_block(struct super_block *s, int offset) @@ -1559,6 +1571,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) struct reiserfs_sb_info *sbi; int errval = -EINVAL; + save_mount_options(s, data); + sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); if (!sbi) { errval = -ENOMEM; @@ -2012,29 +2026,29 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, if (err) return err; /* Quotafile not on the same filesystem? */ - if (nd.mnt->mnt_sb != sb) { - path_release(&nd); + if (nd.path.mnt->mnt_sb != sb) { + path_put(&nd.path); return -EXDEV; } /* We must not pack tails for quota files on reiserfs for quota IO to work */ - if (!REISERFS_I(nd.dentry->d_inode)->i_flags & i_nopack_mask) { + if (!REISERFS_I(nd.path.dentry->d_inode)->i_flags & i_nopack_mask) { reiserfs_warning(sb, "reiserfs: Quota file must have tail packing disabled."); - path_release(&nd); + path_put(&nd.path); return -EINVAL; } /* Not journalling quota? No more tests needed... */ if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { - path_release(&nd); + path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path); } /* Quotafile not of fs root? */ - if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) reiserfs_warning(sb, "reiserfs: Quota file not on filesystem root. " "Journalled quota will not work."); - path_release(&nd); + path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path); } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 1597f6b649e0..eba037b3338f 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -155,7 +155,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode, xadir = open_xa_dir(inode, flags); if (IS_ERR(xadir)) { - return ERR_PTR(PTR_ERR(xadir)); + return ERR_CAST(xadir); } else if (xadir && !xadir->d_inode) { dput(xadir); return ERR_PTR(-ENODATA); @@ -164,7 +164,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode, xafile = lookup_one_len(name, xadir, strlen(name)); if (IS_ERR(xafile)) { dput(xadir); - return ERR_PTR(PTR_ERR(xafile)); + return ERR_CAST(xafile); } if (xafile->d_inode) { /* file exists */ @@ -1084,7 +1084,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) } /* This is the implementation for the xattr plugin infrastructure */ -static struct list_head xattr_handlers = LIST_HEAD_INIT(xattr_handlers); +static LIST_HEAD(xattr_handlers); static DEFINE_RWLOCK(handler_lock); static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index a49cf5b9a195..00b6f0a518c8 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -84,6 +84,8 @@ struct romfs_inode_info { struct inode vfs_inode; }; +static struct inode *romfs_iget(struct super_block *, unsigned long); + /* instead of private superblock data */ static inline unsigned long romfs_maxsize(struct super_block *sb) { @@ -117,7 +119,7 @@ static int romfs_fill_super(struct super_block *s, void *data, int silent) struct buffer_head *bh; struct romfs_super_block *rsb; struct inode *root; - int sz; + int sz, ret = -EINVAL; /* I would parse the options here, but there are none.. :) */ @@ -157,10 +159,13 @@ static int romfs_fill_super(struct super_block *s, void *data, int silent) & ROMFH_MASK; s->s_op = &romfs_ops; - root = iget(s, sz); - if (!root) + root = romfs_iget(s, sz); + if (IS_ERR(root)) { + ret = PTR_ERR(root); goto out; + } + ret = -ENOMEM; s->s_root = d_alloc_root(root); if (!s->s_root) goto outiput; @@ -173,7 +178,7 @@ outiput: out: brelse(bh); outnobh: - return -EINVAL; + return ret; } /* That's simple too. */ @@ -389,8 +394,11 @@ romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD) offset = be32_to_cpu(ri.spec) & ROMFH_MASK; - if ((inode = iget(dir->i_sb, offset))) - goto outi; + inode = romfs_iget(dir->i_sb, offset); + if (IS_ERR(inode)) { + res = PTR_ERR(inode); + goto out; + } /* * it's a bit funky, _lookup needs to return an error code @@ -402,7 +410,7 @@ romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) */ out0: inode = NULL; -outi: res = 0; + res = 0; d_add (dentry, inode); out: unlock_kernel(); @@ -478,20 +486,29 @@ static mode_t romfs_modemap[] = S_IFBLK+0600, S_IFCHR+0600, S_IFSOCK+0644, S_IFIFO+0644 }; -static void -romfs_read_inode(struct inode *i) +static struct inode * +romfs_iget(struct super_block *sb, unsigned long ino) { - int nextfh, ino; + int nextfh; struct romfs_inode ri; + struct inode *i; + + ino &= ROMFH_MASK; + i = iget_locked(sb, ino); + if (!i) + return ERR_PTR(-ENOMEM); + if (!(i->i_state & I_NEW)) + return i; - ino = i->i_ino & ROMFH_MASK; i->i_mode = 0; /* Loop for finding the real hard link */ for(;;) { if (romfs_copyfrom(i, &ri, ino, ROMFH_SIZE) <= 0) { - printk("romfs: read error for inode 0x%x\n", ino); - return; + printk(KERN_ERR "romfs: read error for inode 0x%lx\n", + ino); + iget_failed(i); + return ERR_PTR(-EIO); } /* XXX: do romfs_checksum here too (with name) */ @@ -548,6 +565,8 @@ romfs_read_inode(struct inode *i) init_special_inode(i, ino, MKDEV(nextfh>>16,nextfh&0xffff)); } + unlock_new_inode(i); + return i; } static struct kmem_cache * romfs_inode_cachep; @@ -599,7 +618,6 @@ static int romfs_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations romfs_ops = { .alloc_inode = romfs_alloc_inode, .destroy_inode = romfs_destroy_inode, - .read_inode = romfs_read_inode, .statfs = romfs_statfs, .remount_fs = romfs_remount, }; diff --git a/fs/select.c b/fs/select.c index 47f47925aea2..5633fe980781 100644 --- a/fs/select.c +++ b/fs/select.c @@ -739,7 +739,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, timeout_jiffies = -1; else #endif - timeout_jiffies = msecs_to_jiffies(timeout_msecs); + timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1; } else { /* Infinite (< 0) or no (0) timeout */ timeout_jiffies = timeout_msecs; diff --git a/fs/seq_file.c b/fs/seq_file.c index ca71c115bdaa..853770274f20 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -342,13 +342,11 @@ int seq_printf(struct seq_file *m, const char *f, ...) } EXPORT_SYMBOL(seq_printf); -int seq_path(struct seq_file *m, - struct vfsmount *mnt, struct dentry *dentry, - char *esc) +int seq_path(struct seq_file *m, struct path *path, char *esc) { if (m->count < m->size) { char *s = m->buf + m->count; - char *p = d_path(dentry, mnt, s, m->size - m->count); + char *p = d_path(path, s, m->size - m->count); if (!IS_ERR(p)) { while (s <= p) { char c = *p++; diff --git a/fs/signalfd.c b/fs/signalfd.c index 2d3e107da2d3..cb2b63ae0bf4 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -27,6 +27,7 @@ #include <linux/list.h> #include <linux/anon_inodes.h> #include <linux/signalfd.h> +#include <linux/syscalls.h> struct signalfd_ctx { sigset_t sigmask; diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 4e5c22ca802e..376ef3ee6ed7 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -505,7 +505,7 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) if (warn_count < 5) { warn_count++; printk(KERN_EMERG "smbfs is deprecated and will be removed" - "from the 2.6.27 kernel. Please migrate to cifs\n"); + " from the 2.6.27 kernel. Please migrate to cifs\n"); } if (!raw_data) diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c index e48bd8235a8e..e37fe4deebd0 100644 --- a/fs/smbfs/sock.c +++ b/fs/smbfs/sock.c @@ -329,9 +329,8 @@ smb_receive(struct smb_sb_info *server, struct smb_request *req) msg.msg_control = NULL; /* Dont repeat bytes and count available bufferspace */ - rlen = smb_move_iov(&p, &num, iov, req->rq_bytes_recvd); - if (req->rq_rlen < rlen) - rlen = req->rq_rlen; + rlen = min_t(int, smb_move_iov(&p, &num, iov, req->rq_bytes_recvd), + (req->rq_rlen - req->rq_bytes_recvd)); result = kernel_recvmsg(sock, &msg, p, num, rlen, flags); diff --git a/fs/splice.c b/fs/splice.c index 4ee49e86edde..9b559ee711a8 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1179,6 +1179,9 @@ static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n) { int partial; + if (!access_ok(VERIFY_READ, src, n)) + return -EFAULT; + pagefault_disable(); partial = __copy_from_user_inatomic(dst, src, n); pagefault_enable(); @@ -1231,7 +1234,7 @@ static int get_iovec_page_array(const struct iovec __user *iov, if (unlikely(!len)) break; error = -EFAULT; - if (unlikely(!base)) + if (!access_ok(VERIFY_READ, base, len)) break; /* @@ -1387,6 +1390,11 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, break; } + if (unlikely(!access_ok(VERIFY_WRITE, base, len))) { + error = -EFAULT; + break; + } + sd.len = 0; sd.total_len = len; sd.flags = flags; diff --git a/fs/stat.c b/fs/stat.c index 68510068a641..9cf41f719d50 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -62,8 +62,8 @@ int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd); if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); - path_release(&nd); + error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat); + path_put(&nd.path); } return error; } @@ -82,8 +82,8 @@ int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) error = __user_walk_fd(dfd, name, 0, &nd); if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); - path_release(&nd); + error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat); + path_put(&nd.path); } return error; } @@ -302,17 +302,18 @@ asmlinkage long sys_readlinkat(int dfd, const char __user *path, error = __user_walk_fd(dfd, path, 0, &nd); if (!error) { - struct inode * inode = nd.dentry->d_inode; + struct inode *inode = nd.path.dentry->d_inode; error = -EINVAL; if (inode->i_op && inode->i_op->readlink) { - error = security_inode_readlink(nd.dentry); + error = security_inode_readlink(nd.path.dentry); if (!error) { - touch_atime(nd.mnt, nd.dentry); - error = inode->i_op->readlink(nd.dentry, buf, bufsiz); + touch_atime(nd.path.mnt, nd.path.dentry); + error = inode->i_op->readlink(nd.path.dentry, + buf, bufsiz); } } - path_release(&nd); + path_put(&nd.path); } return error; } diff --git a/fs/super.c b/fs/super.c index ceaf2e3d594c..88811f60c8de 100644 --- a/fs/super.c +++ b/fs/super.c @@ -105,6 +105,7 @@ static inline void destroy_super(struct super_block *s) { security_sb_free(s); kfree(s->s_subtype); + kfree(s->s_options); kfree(s); } @@ -603,6 +604,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) mark_files_ro(sb); else if (!fs_may_remount_ro(sb)) return -EBUSY; + DQUOT_OFF(sb); } if (sb->s_op->remount_fs) { diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 0871c3dadce1..477904915032 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -77,7 +77,12 @@ void sysfs_remove_group(struct kobject * kobj, if (grp->name) { sd = sysfs_get_dirent(dir_sd, grp->name); - BUG_ON(!sd); + if (!sd) { + printk(KERN_WARNING "sysfs group %p not found for " + "kobject '%s'\n", grp, kobject_name(kobj)); + WARN_ON(!sd); + return; + } } else sd = sysfs_get(dir_sd); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 81ec6c548c07..c5d60de0658f 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -169,20 +169,27 @@ void sysv_set_inode(struct inode *inode, dev_t rdev) init_special_inode(inode, inode->i_mode, rdev); } -static void sysv_read_inode(struct inode *inode) +struct inode *sysv_iget(struct super_block *sb, unsigned int ino) { - struct super_block * sb = inode->i_sb; struct sysv_sb_info * sbi = SYSV_SB(sb); struct buffer_head * bh; struct sysv_inode * raw_inode; struct sysv_inode_info * si; - unsigned int block, ino = inode->i_ino; + struct inode *inode; + unsigned int block; if (!ino || ino > sbi->s_ninodes) { printk("Bad inode number on dev %s: %d is out of range\n", - inode->i_sb->s_id, ino); - goto bad_inode; + sb->s_id, ino); + return ERR_PTR(-EIO); } + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + raw_inode = sysv_raw_inode(sb, ino, &bh); if (!raw_inode) { printk("Major problem: unable to read inode from dev %s\n", @@ -214,11 +221,12 @@ static void sysv_read_inode(struct inode *inode) old_decode_dev(fs32_to_cpu(sbi, si->i_data[0]))); else sysv_set_inode(inode, 0); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } static struct buffer_head * sysv_update_inode(struct inode * inode) @@ -328,7 +336,6 @@ static void init_once(struct kmem_cache *cachep, void *p) const struct super_operations sysv_sops = { .alloc_inode = sysv_alloc_inode, .destroy_inode = sysv_destroy_inode, - .read_inode = sysv_read_inode, .write_inode = sysv_write_inode, .delete_inode = sysv_delete_inode, .put_super = sysv_put_super, diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 6bd850b7641a..a1f1ef33e81c 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -53,9 +53,9 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st ino = sysv_inode_by_name(dentry); if (ino) { - inode = iget(dir->i_sb, ino); - if (!inode) - return ERR_PTR(-EACCES); + inode = sysv_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); } d_add(dentry, inode); return NULL; diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 6f9707a1b954..5a903da54551 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -332,8 +332,8 @@ static int complete_read_super(struct super_block *sb, int silent, int size) sb->s_magic = SYSV_MAGIC_BASE + sbi->s_type; /* set up enough so that it can read an inode */ sb->s_op = &sysv_sops; - root_inode = iget(sb,SYSV_ROOT_INO); - if (!root_inode || is_bad_inode(root_inode)) { + root_inode = sysv_iget(sb, SYSV_ROOT_INO); + if (IS_ERR(root_inode)) { printk("SysV FS: get root inode failed\n"); return 0; } diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 64c03bdf06a5..42d51d1c05cd 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -141,6 +141,7 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata); /* inode.c */ +extern struct inode *sysv_iget(struct super_block *, unsigned int); extern int sysv_write_inode(struct inode *, int); extern int sysv_sync_inode(struct inode *); extern int sysv_sync_file(struct file *, struct dentry *, int); diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index ab26176f6b91..f855dcbbdfb8 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -28,15 +28,16 @@ #include "udf_i.h" #include "udf_sb.h" -#define udf_clear_bit(nr,addr) ext2_clear_bit(nr,addr) -#define udf_set_bit(nr,addr) ext2_set_bit(nr,addr) +#define udf_clear_bit(nr, addr) ext2_clear_bit(nr, addr) +#define udf_set_bit(nr, addr) ext2_set_bit(nr, addr) #define udf_test_bit(nr, addr) ext2_test_bit(nr, addr) #define udf_find_first_one_bit(addr, size) find_first_one_bit(addr, size) -#define udf_find_next_one_bit(addr, size, offset) find_next_one_bit(addr, size, offset) +#define udf_find_next_one_bit(addr, size, offset) \ + find_next_one_bit(addr, size, offset) #define leBPL_to_cpup(x) leNUM_to_cpup(BITS_PER_LONG, x) -#define leNUM_to_cpup(x,y) xleNUM_to_cpup(x,y) -#define xleNUM_to_cpup(x,y) (le ## x ## _to_cpup(y)) +#define leNUM_to_cpup(x, y) xleNUM_to_cpup(x, y) +#define xleNUM_to_cpup(x, y) (le ## x ## _to_cpup(y)) #define uintBPL_t uint(BITS_PER_LONG) #define uint(x) xuint(x) #define xuint(x) __le ## x @@ -62,7 +63,8 @@ static inline int find_next_one_bit(void *addr, int size, int offset) result += BITS_PER_LONG; } while (size & ~(BITS_PER_LONG - 1)) { - if ((tmp = leBPL_to_cpup(p++))) + tmp = leBPL_to_cpup(p++); + if (tmp) goto found_middle; result += BITS_PER_LONG; size -= BITS_PER_LONG; @@ -88,12 +90,12 @@ static int read_block_bitmap(struct super_block *sb, kernel_lb_addr loc; loc.logicalBlockNum = bitmap->s_extPosition; - loc.partitionReferenceNum = UDF_SB_PARTITION(sb); + loc.partitionReferenceNum = UDF_SB(sb)->s_partition; bh = udf_tread(sb, udf_get_lb_pblock(sb, loc, block)); - if (!bh) { + if (!bh) retval = -EIO; - } + bitmap->s_block_bitmap[bitmap_nr] = bh; return retval; } @@ -138,6 +140,20 @@ static inline int load_block_bitmap(struct super_block *sb, return slot; } +static bool udf_add_free_space(struct udf_sb_info *sbi, + u16 partition, u32 cnt) +{ + struct logicalVolIntegrityDesc *lvid; + + if (sbi->s_lvid_bh == NULL) + return false; + + lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; + lvid->freeSpaceTable[partition] = cpu_to_le32(le32_to_cpu( + lvid->freeSpaceTable[partition]) + cnt); + return true; +} + static void udf_bitmap_free_blocks(struct super_block *sb, struct inode *inode, struct udf_bitmap *bitmap, @@ -155,57 +171,58 @@ static void udf_bitmap_free_blocks(struct super_block *sb, mutex_lock(&sbi->s_alloc_mutex); if (bloc.logicalBlockNum < 0 || - (bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)) { + (bloc.logicalBlockNum + count) > + sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) { udf_debug("%d < %d || %d + %d > %d\n", bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, - UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)); + sbi->s_partmaps[bloc.partitionReferenceNum]. + s_partition_len); goto error_return; } - block = bloc.logicalBlockNum + offset + (sizeof(struct spaceBitmapDesc) << 3); + block = bloc.logicalBlockNum + offset + + (sizeof(struct spaceBitmapDesc) << 3); -do_more: - overflow = 0; - block_group = block >> (sb->s_blocksize_bits + 3); - bit = block % (sb->s_blocksize << 3); + do { + overflow = 0; + block_group = block >> (sb->s_blocksize_bits + 3); + bit = block % (sb->s_blocksize << 3); - /* - * Check to see if we are freeing blocks across a group boundary. - */ - if (bit + count > (sb->s_blocksize << 3)) { - overflow = bit + count - (sb->s_blocksize << 3); - count -= overflow; - } - bitmap_nr = load_block_bitmap(sb, bitmap, block_group); - if (bitmap_nr < 0) - goto error_return; + /* + * Check to see if we are freeing blocks across a group boundary. + */ + if (bit + count > (sb->s_blocksize << 3)) { + overflow = bit + count - (sb->s_blocksize << 3); + count -= overflow; + } + bitmap_nr = load_block_bitmap(sb, bitmap, block_group); + if (bitmap_nr < 0) + goto error_return; - bh = bitmap->s_block_bitmap[bitmap_nr]; - for (i = 0; i < count; i++) { - if (udf_set_bit(bit + i, bh->b_data)) { - udf_debug("bit %ld already set\n", bit + i); - udf_debug("byte=%2x\n", ((char *)bh->b_data)[(bit + i) >> 3]); - } else { - if (inode) - DQUOT_FREE_BLOCK(inode, 1); - if (UDF_SB_LVIDBH(sb)) { - UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]) + 1); + bh = bitmap->s_block_bitmap[bitmap_nr]; + for (i = 0; i < count; i++) { + if (udf_set_bit(bit + i, bh->b_data)) { + udf_debug("bit %ld already set\n", bit + i); + udf_debug("byte=%2x\n", + ((char *)bh->b_data)[(bit + i) >> 3]); + } else { + if (inode) + DQUOT_FREE_BLOCK(inode, 1); + udf_add_free_space(sbi, sbi->s_partition, 1); } } - } - mark_buffer_dirty(bh); - if (overflow) { - block += count; - count = overflow; - goto do_more; - } + mark_buffer_dirty(bh); + if (overflow) { + block += count; + count = overflow; + } + } while (overflow); + error_return: sb->s_dirt = 1; - if (UDF_SB_LVIDBH(sb)) - mark_buffer_dirty(UDF_SB_LVIDBH(sb)); + if (sbi->s_lvid_bh) + mark_buffer_dirty(sbi->s_lvid_bh); mutex_unlock(&sbi->s_alloc_mutex); - return; } static int udf_bitmap_prealloc_blocks(struct super_block *sb, @@ -219,53 +236,50 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb, int bit, block, block_group, group_start; int nr_groups, bitmap_nr; struct buffer_head *bh; + __u32 part_len; mutex_lock(&sbi->s_alloc_mutex); - if (first_block < 0 || first_block >= UDF_SB_PARTLEN(sb, partition)) + part_len = sbi->s_partmaps[partition].s_partition_len; + if (first_block < 0 || first_block >= part_len) goto out; - if (first_block + block_count > UDF_SB_PARTLEN(sb, partition)) - block_count = UDF_SB_PARTLEN(sb, partition) - first_block; + if (first_block + block_count > part_len) + block_count = part_len - first_block; -repeat: - nr_groups = (UDF_SB_PARTLEN(sb, partition) + - (sizeof(struct spaceBitmapDesc) << 3) + - (sb->s_blocksize * 8) - 1) / (sb->s_blocksize * 8); - block = first_block + (sizeof(struct spaceBitmapDesc) << 3); - block_group = block >> (sb->s_blocksize_bits + 3); - group_start = block_group ? 0 : sizeof(struct spaceBitmapDesc); + do { + nr_groups = udf_compute_nr_groups(sb, partition); + block = first_block + (sizeof(struct spaceBitmapDesc) << 3); + block_group = block >> (sb->s_blocksize_bits + 3); + group_start = block_group ? 0 : sizeof(struct spaceBitmapDesc); - bitmap_nr = load_block_bitmap(sb, bitmap, block_group); - if (bitmap_nr < 0) - goto out; - bh = bitmap->s_block_bitmap[bitmap_nr]; + bitmap_nr = load_block_bitmap(sb, bitmap, block_group); + if (bitmap_nr < 0) + goto out; + bh = bitmap->s_block_bitmap[bitmap_nr]; - bit = block % (sb->s_blocksize << 3); + bit = block % (sb->s_blocksize << 3); - while (bit < (sb->s_blocksize << 3) && block_count > 0) { - if (!udf_test_bit(bit, bh->b_data)) { - goto out; - } else if (DQUOT_PREALLOC_BLOCK(inode, 1)) { - goto out; - } else if (!udf_clear_bit(bit, bh->b_data)) { - udf_debug("bit already cleared for block %d\n", bit); - DQUOT_FREE_BLOCK(inode, 1); - goto out; + while (bit < (sb->s_blocksize << 3) && block_count > 0) { + if (!udf_test_bit(bit, bh->b_data)) + goto out; + else if (DQUOT_PREALLOC_BLOCK(inode, 1)) + goto out; + else if (!udf_clear_bit(bit, bh->b_data)) { + udf_debug("bit already cleared for block %d\n", bit); + DQUOT_FREE_BLOCK(inode, 1); + goto out; + } + block_count--; + alloc_count++; + bit++; + block++; } - block_count--; - alloc_count++; - bit++; - block++; - } - mark_buffer_dirty(bh); - if (block_count > 0) - goto repeat; + mark_buffer_dirty(bh); + } while (block_count > 0); + out: - if (UDF_SB_LVIDBH(sb)) { - UDF_SB_LVID(sb)->freeSpaceTable[partition] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition]) - alloc_count); - mark_buffer_dirty(UDF_SB_LVIDBH(sb)); - } + if (udf_add_free_space(sbi, partition, -alloc_count)) + mark_buffer_dirty(sbi->s_lvid_bh); sb->s_dirt = 1; mutex_unlock(&sbi->s_alloc_mutex); return alloc_count; @@ -287,7 +301,7 @@ static int udf_bitmap_new_block(struct super_block *sb, mutex_lock(&sbi->s_alloc_mutex); repeat: - if (goal < 0 || goal >= UDF_SB_PARTLEN(sb, partition)) + if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) goal = 0; nr_groups = bitmap->s_nr_groups; @@ -312,14 +326,16 @@ repeat: if (bit < end_goal) goto got_block; - ptr = memscan((char *)bh->b_data + (bit >> 3), 0xFF, sb->s_blocksize - ((bit + 7) >> 3)); + ptr = memscan((char *)bh->b_data + (bit >> 3), 0xFF, + sb->s_blocksize - ((bit + 7) >> 3)); newbit = (ptr - ((char *)bh->b_data)) << 3; if (newbit < sb->s_blocksize << 3) { bit = newbit; goto search_back; } - newbit = udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, bit); + newbit = udf_find_next_one_bit(bh->b_data, + sb->s_blocksize << 3, bit); if (newbit < sb->s_blocksize << 3) { bit = newbit; goto got_block; @@ -358,15 +374,20 @@ repeat: if (bit < sb->s_blocksize << 3) goto search_back; else - bit = udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, group_start << 3); + bit = udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, + group_start << 3); if (bit >= sb->s_blocksize << 3) { mutex_unlock(&sbi->s_alloc_mutex); return 0; } search_back: - for (i = 0; i < 7 && bit > (group_start << 3) && udf_test_bit(bit - 1, bh->b_data); i++, bit--) - ; /* empty loop */ + i = 0; + while (i < 7 && bit > (group_start << 3) && + udf_test_bit(bit - 1, bh->b_data)) { + ++i; + --bit; + } got_block: @@ -389,11 +410,8 @@ got_block: mark_buffer_dirty(bh); - if (UDF_SB_LVIDBH(sb)) { - UDF_SB_LVID(sb)->freeSpaceTable[partition] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition]) - 1); - mark_buffer_dirty(UDF_SB_LVIDBH(sb)); - } + if (udf_add_free_space(sbi, partition, -1)) + mark_buffer_dirty(sbi->s_lvid_bh); sb->s_dirt = 1; mutex_unlock(&sbi->s_alloc_mutex); *err = 0; @@ -418,56 +436,70 @@ static void udf_table_free_blocks(struct super_block *sb, struct extent_position oepos, epos; int8_t etype; int i; + struct udf_inode_info *iinfo; mutex_lock(&sbi->s_alloc_mutex); if (bloc.logicalBlockNum < 0 || - (bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)) { + (bloc.logicalBlockNum + count) > + sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) { udf_debug("%d < %d || %d + %d > %d\n", bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, - UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)); + sbi->s_partmaps[bloc.partitionReferenceNum]. + s_partition_len); goto error_return; } - /* We do this up front - There are some error conditions that could occure, - but.. oh well */ + iinfo = UDF_I(table); + /* We do this up front - There are some error conditions that + could occure, but.. oh well */ if (inode) DQUOT_FREE_BLOCK(inode, count); - if (UDF_SB_LVIDBH(sb)) { - UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]) + count); - mark_buffer_dirty(UDF_SB_LVIDBH(sb)); - } + if (udf_add_free_space(sbi, sbi->s_partition, count)) + mark_buffer_dirty(sbi->s_lvid_bh); start = bloc.logicalBlockNum + offset; end = bloc.logicalBlockNum + offset + count - 1; epos.offset = oepos.offset = sizeof(struct unallocSpaceEntry); elen = 0; - epos.block = oepos.block = UDF_I_LOCATION(table); + epos.block = oepos.block = iinfo->i_location; epos.bh = oepos.bh = NULL; while (count && (etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { - if (((eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) == start)) { - if ((0x3FFFFFFF - elen) < (count << sb->s_blocksize_bits)) { - count -= ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); - start += ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); - elen = (etype << 30) | (0x40000000 - sb->s_blocksize); + if (((eloc.logicalBlockNum + + (elen >> sb->s_blocksize_bits)) == start)) { + if ((0x3FFFFFFF - elen) < + (count << sb->s_blocksize_bits)) { + uint32_t tmp = ((0x3FFFFFFF - elen) >> + sb->s_blocksize_bits); + count -= tmp; + start += tmp; + elen = (etype << 30) | + (0x40000000 - sb->s_blocksize); } else { - elen = (etype << 30) | (elen + (count << sb->s_blocksize_bits)); + elen = (etype << 30) | + (elen + + (count << sb->s_blocksize_bits)); start += count; count = 0; } udf_write_aext(table, &oepos, eloc, elen, 1); } else if (eloc.logicalBlockNum == (end + 1)) { - if ((0x3FFFFFFF - elen) < (count << sb->s_blocksize_bits)) { - count -= ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); - end -= ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); - eloc.logicalBlockNum -= ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); - elen = (etype << 30) | (0x40000000 - sb->s_blocksize); + if ((0x3FFFFFFF - elen) < + (count << sb->s_blocksize_bits)) { + uint32_t tmp = ((0x3FFFFFFF - elen) >> + sb->s_blocksize_bits); + count -= tmp; + end -= tmp; + eloc.logicalBlockNum -= tmp; + elen = (etype << 30) | + (0x40000000 - sb->s_blocksize); } else { eloc.logicalBlockNum = start; - elen = (etype << 30) | (elen + (count << sb->s_blocksize_bits)); + elen = (etype << 30) | + (elen + + (count << sb->s_blocksize_bits)); end -= count; count = 0; } @@ -488,9 +520,9 @@ static void udf_table_free_blocks(struct super_block *sb, if (count) { /* - * NOTE: we CANNOT use udf_add_aext here, as it can try to allocate - * a new block, and since we hold the super block lock already - * very bad things would happen :) + * NOTE: we CANNOT use udf_add_aext here, as it can try to + * allocate a new block, and since we hold the super block + * lock already very bad things would happen :) * * We copy the behavior of udf_add_aext, but instead of * trying to allocate a new block close to the existing one, @@ -509,11 +541,11 @@ static void udf_table_free_blocks(struct super_block *sb, elen = EXT_RECORDED_ALLOCATED | (count << sb->s_blocksize_bits); - if (UDF_I_ALLOCTYPE(table) == ICBTAG_FLAG_AD_SHORT) { + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(short_ad); - } else if (UDF_I_ALLOCTYPE(table) == ICBTAG_FLAG_AD_LONG) { + else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); - } else { + else { brelse(oepos.bh); brelse(epos.bh); goto error_return; @@ -531,56 +563,70 @@ static void udf_table_free_blocks(struct super_block *sb, eloc.logicalBlockNum++; elen -= sb->s_blocksize; - if (!(epos.bh = udf_tread(sb, udf_get_lb_pblock(sb, epos.block, 0)))) { + epos.bh = udf_tread(sb, + udf_get_lb_pblock(sb, epos.block, 0)); + if (!epos.bh) { brelse(oepos.bh); goto error_return; } aed = (struct allocExtDesc *)(epos.bh->b_data); - aed->previousAllocExtLocation = cpu_to_le32(oepos.block.logicalBlockNum); + aed->previousAllocExtLocation = + cpu_to_le32(oepos.block.logicalBlockNum); if (epos.offset + adsize > sb->s_blocksize) { loffset = epos.offset; aed->lengthAllocDescs = cpu_to_le32(adsize); - sptr = UDF_I_DATA(table) + epos.offset - adsize; - dptr = epos.bh->b_data + sizeof(struct allocExtDesc); + sptr = iinfo->i_ext.i_data + epos.offset + - adsize; + dptr = epos.bh->b_data + + sizeof(struct allocExtDesc); memcpy(dptr, sptr, adsize); - epos.offset = sizeof(struct allocExtDesc) + adsize; + epos.offset = sizeof(struct allocExtDesc) + + adsize; } else { loffset = epos.offset + adsize; aed->lengthAllocDescs = cpu_to_le32(0); if (oepos.bh) { sptr = oepos.bh->b_data + epos.offset; - aed = (struct allocExtDesc *)oepos.bh->b_data; + aed = (struct allocExtDesc *) + oepos.bh->b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); + cpu_to_le32(le32_to_cpu( + aed->lengthAllocDescs) + + adsize); } else { - sptr = UDF_I_DATA(table) + epos.offset; - UDF_I_LENALLOC(table) += adsize; + sptr = iinfo->i_ext.i_data + + epos.offset; + iinfo->i_lenAlloc += adsize; mark_inode_dirty(table); } epos.offset = sizeof(struct allocExtDesc); } - if (UDF_SB_UDFREV(sb) >= 0x0200) - udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 3, 1, - epos.block.logicalBlockNum, sizeof(tag)); + if (sbi->s_udfrev >= 0x0200) + udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, + 3, 1, epos.block.logicalBlockNum, + sizeof(tag)); else - udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 2, 1, - epos.block.logicalBlockNum, sizeof(tag)); - - switch (UDF_I_ALLOCTYPE(table)) { - case ICBTAG_FLAG_AD_SHORT: - sad = (short_ad *)sptr; - sad->extLength = cpu_to_le32( - EXT_NEXT_EXTENT_ALLOCDECS | - sb->s_blocksize); - sad->extPosition = cpu_to_le32(epos.block.logicalBlockNum); - break; - case ICBTAG_FLAG_AD_LONG: - lad = (long_ad *)sptr; - lad->extLength = cpu_to_le32( - EXT_NEXT_EXTENT_ALLOCDECS | - sb->s_blocksize); - lad->extLocation = cpu_to_lelb(epos.block); - break; + udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, + 2, 1, epos.block.logicalBlockNum, + sizeof(tag)); + + switch (iinfo->i_alloc_type) { + case ICBTAG_FLAG_AD_SHORT: + sad = (short_ad *)sptr; + sad->extLength = cpu_to_le32( + EXT_NEXT_EXTENT_ALLOCDECS | + sb->s_blocksize); + sad->extPosition = + cpu_to_le32(epos.block.logicalBlockNum); + break; + case ICBTAG_FLAG_AD_LONG: + lad = (long_ad *)sptr; + lad->extLength = cpu_to_le32( + EXT_NEXT_EXTENT_ALLOCDECS | + sb->s_blocksize); + lad->extLocation = + cpu_to_lelb(epos.block); + break; } if (oepos.bh) { udf_update_tag(oepos.bh->b_data, loffset); @@ -590,16 +636,18 @@ static void udf_table_free_blocks(struct super_block *sb, } } - if (elen) { /* It's possible that stealing the block emptied the extent */ + /* It's possible that stealing the block emptied the extent */ + if (elen) { udf_write_aext(table, &epos, eloc, elen, 1); if (!epos.bh) { - UDF_I_LENALLOC(table) += adsize; + iinfo->i_lenAlloc += adsize; mark_inode_dirty(table); } else { aed = (struct allocExtDesc *)epos.bh->b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); + cpu_to_le32(le32_to_cpu( + aed->lengthAllocDescs) + adsize); udf_update_tag(epos.bh->b_data, epos.offset); mark_buffer_dirty(epos.bh); } @@ -626,20 +674,23 @@ static int udf_table_prealloc_blocks(struct super_block *sb, kernel_lb_addr eloc; struct extent_position epos; int8_t etype = -1; + struct udf_inode_info *iinfo; - if (first_block < 0 || first_block >= UDF_SB_PARTLEN(sb, partition)) + if (first_block < 0 || + first_block >= sbi->s_partmaps[partition].s_partition_len) return 0; - if (UDF_I_ALLOCTYPE(table) == ICBTAG_FLAG_AD_SHORT) + iinfo = UDF_I(table); + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(table) == ICBTAG_FLAG_AD_LONG) + else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); else return 0; mutex_lock(&sbi->s_alloc_mutex); epos.offset = sizeof(struct unallocSpaceEntry); - epos.block = UDF_I_LOCATION(table); + epos.block = iinfo->i_location; epos.bh = NULL; eloc.logicalBlockNum = 0xFFFFFFFF; @@ -654,26 +705,26 @@ static int udf_table_prealloc_blocks(struct super_block *sb, epos.offset -= adsize; alloc_count = (elen >> sb->s_blocksize_bits); - if (inode && DQUOT_PREALLOC_BLOCK(inode, alloc_count > block_count ? block_count : alloc_count)) { + if (inode && DQUOT_PREALLOC_BLOCK(inode, + alloc_count > block_count ? block_count : alloc_count)) alloc_count = 0; - } else if (alloc_count > block_count) { + else if (alloc_count > block_count) { alloc_count = block_count; eloc.logicalBlockNum += alloc_count; elen -= (alloc_count << sb->s_blocksize_bits); - udf_write_aext(table, &epos, eloc, (etype << 30) | elen, 1); - } else { - udf_delete_aext(table, epos, eloc, (etype << 30) | elen); - } + udf_write_aext(table, &epos, eloc, + (etype << 30) | elen, 1); + } else + udf_delete_aext(table, epos, eloc, + (etype << 30) | elen); } else { alloc_count = 0; } brelse(epos.bh); - if (alloc_count && UDF_SB_LVIDBH(sb)) { - UDF_SB_LVID(sb)->freeSpaceTable[partition] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition]) - alloc_count); - mark_buffer_dirty(UDF_SB_LVIDBH(sb)); + if (alloc_count && udf_add_free_space(sbi, partition, -alloc_count)) { + mark_buffer_dirty(sbi->s_lvid_bh); sb->s_dirt = 1; } mutex_unlock(&sbi->s_alloc_mutex); @@ -692,33 +743,35 @@ static int udf_table_new_block(struct super_block *sb, kernel_lb_addr eloc, uninitialized_var(goal_eloc); struct extent_position epos, goal_epos; int8_t etype; + struct udf_inode_info *iinfo = UDF_I(table); *err = -ENOSPC; - if (UDF_I_ALLOCTYPE(table) == ICBTAG_FLAG_AD_SHORT) + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(table) == ICBTAG_FLAG_AD_LONG) + else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); else return newblock; mutex_lock(&sbi->s_alloc_mutex); - if (goal < 0 || goal >= UDF_SB_PARTLEN(sb, partition)) + if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) goal = 0; - /* We search for the closest matching block to goal. If we find a exact hit, - we stop. Otherwise we keep going till we run out of extents. - We store the buffer_head, bloc, and extoffset of the current closest - match and use that when we are done. + /* We search for the closest matching block to goal. If we find + a exact hit, we stop. Otherwise we keep going till we run out + of extents. We store the buffer_head, bloc, and extoffset + of the current closest match and use that when we are done. */ epos.offset = sizeof(struct unallocSpaceEntry); - epos.block = UDF_I_LOCATION(table); + epos.block = iinfo->i_location; epos.bh = goal_epos.bh = NULL; while (spread && (etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { if (goal >= eloc.logicalBlockNum) { - if (goal < eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) + if (goal < eloc.logicalBlockNum + + (elen >> sb->s_blocksize_bits)) nspread = 0; else nspread = goal - eloc.logicalBlockNum - @@ -771,11 +824,8 @@ static int udf_table_new_block(struct super_block *sb, udf_delete_aext(table, goal_epos, goal_eloc, goal_elen); brelse(goal_epos.bh); - if (UDF_SB_LVIDBH(sb)) { - UDF_SB_LVID(sb)->freeSpaceTable[partition] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition]) - 1); - mark_buffer_dirty(UDF_SB_LVIDBH(sb)); - } + if (udf_add_free_space(sbi, partition, -1)) + mark_buffer_dirty(sbi->s_lvid_bh); sb->s_dirt = 1; mutex_unlock(&sbi->s_alloc_mutex); @@ -789,22 +839,23 @@ inline void udf_free_blocks(struct super_block *sb, uint32_t count) { uint16_t partition = bloc.partitionReferenceNum; + struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; - if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) { + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { return udf_bitmap_free_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap, + map->s_uspace.s_bitmap, bloc, offset, count); - } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE) { + } else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) { return udf_table_free_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_table, + map->s_uspace.s_table, bloc, offset, count); - } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) { + } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) { return udf_bitmap_free_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_bitmap, + map->s_fspace.s_bitmap, bloc, offset, count); - } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) { + } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) { return udf_table_free_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_table, + map->s_fspace.s_table, bloc, offset, count); } else { return; @@ -816,51 +867,55 @@ inline int udf_prealloc_blocks(struct super_block *sb, uint16_t partition, uint32_t first_block, uint32_t block_count) { - if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) { + struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; + + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) return udf_bitmap_prealloc_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap, - partition, first_block, block_count); - } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE) { + map->s_uspace.s_bitmap, + partition, first_block, + block_count); + else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) return udf_table_prealloc_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_table, - partition, first_block, block_count); - } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) { + map->s_uspace.s_table, + partition, first_block, + block_count); + else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) return udf_bitmap_prealloc_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_bitmap, - partition, first_block, block_count); - } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) { + map->s_fspace.s_bitmap, + partition, first_block, + block_count); + else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) return udf_table_prealloc_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_table, - partition, first_block, block_count); - } else { + map->s_fspace.s_table, + partition, first_block, + block_count); + else return 0; - } } inline int udf_new_block(struct super_block *sb, struct inode *inode, uint16_t partition, uint32_t goal, int *err) { - int ret; + struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; - if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) { - ret = udf_bitmap_new_block(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap, + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) + return udf_bitmap_new_block(sb, inode, + map->s_uspace.s_bitmap, partition, goal, err); - return ret; - } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE) { + else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) return udf_table_new_block(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_table, + map->s_uspace.s_table, partition, goal, err); - } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) { + else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) return udf_bitmap_new_block(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_bitmap, + map->s_fspace.s_bitmap, partition, goal, err); - } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) { + else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) return udf_table_new_block(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_table, + map->s_fspace.s_table, partition, goal, err); - } else { + else { *err = -EIO; return 0; } diff --git a/fs/udf/crc.c b/fs/udf/crc.c index 85aaee5fab26..b1661296e786 100644 --- a/fs/udf/crc.c +++ b/fs/udf/crc.c @@ -79,7 +79,7 @@ static uint16_t crc_table[256] = { * July 21, 1997 - Andrew E. Mileski * Adapted from OSTA-UDF(tm) 1.50 standard. */ -uint16_t udf_crc(uint8_t * data, uint32_t size, uint16_t crc) +uint16_t udf_crc(uint8_t *data, uint32_t size, uint16_t crc) { while (size--) crc = crc_table[(crc >> 8 ^ *(data++)) & 0xffU] ^ (crc << 8); diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 9e3b9f97ddbc..8d8643ada199 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -36,80 +36,20 @@ #include "udf_i.h" #include "udf_sb.h" -/* Prototypes for file operations */ -static int udf_readdir(struct file *, void *, filldir_t); -static int do_udf_readdir(struct inode *, struct file *, filldir_t, void *); - -/* readdir and lookup functions */ - -const struct file_operations udf_dir_operations = { - .read = generic_read_dir, - .readdir = udf_readdir, - .ioctl = udf_ioctl, - .fsync = udf_fsync_file, -}; - -/* - * udf_readdir - * - * PURPOSE - * Read a directory entry. - * - * DESCRIPTION - * Optional - sys_getdents() will return -ENOTDIR if this routine is not - * available. - * - * Refer to sys_getdents() in fs/readdir.c - * sys_getdents() -> . - * - * PRE-CONDITIONS - * filp Pointer to directory file. - * buf Pointer to directory entry buffer. - * filldir Pointer to filldir function. - * - * POST-CONDITIONS - * <return> >=0 on success. - * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ - -int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - struct inode *dir = filp->f_path.dentry->d_inode; - int result; - - lock_kernel(); - - if (filp->f_pos == 0) { - if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < 0) { - unlock_kernel(); - return 0; - } - filp->f_pos++; - } - - result = do_udf_readdir(dir, filp, filldir, dirent); - unlock_kernel(); - return result; -} - -static int -do_udf_readdir(struct inode *dir, struct file *filp, filldir_t filldir, - void *dirent) +static int do_udf_readdir(struct inode *dir, struct file *filp, + filldir_t filldir, void *dirent) { struct udf_fileident_bh fibh; struct fileIdentDesc *fi = NULL; struct fileIdentDesc cfi; int block, iblock; - loff_t nf_pos = filp->f_pos - 1; + loff_t nf_pos = (filp->f_pos - 1) << 2; int flen; char fname[UDF_NAME_LEN]; char *nameptr; uint16_t liu; uint8_t lfi; - loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2; + loff_t size = udf_ext0_offset(dir) + dir->i_size; struct buffer_head *tmp, *bha[16]; kernel_lb_addr eloc; uint32_t elen; @@ -117,23 +57,26 @@ do_udf_readdir(struct inode *dir, struct file *filp, filldir_t filldir, int i, num; unsigned int dt_type; struct extent_position epos = { NULL, 0, {0, 0} }; + struct udf_inode_info *iinfo; if (nf_pos >= size) return 0; if (nf_pos == 0) - nf_pos = (udf_ext0_offset(dir) >> 2); + nf_pos = udf_ext0_offset(dir); - fibh.soffset = fibh.eoffset = (nf_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { + fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1); + iinfo = UDF_I(dir); + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { fibh.sbh = fibh.ebh = NULL; - } else if (inode_bmap(dir, nf_pos >> (dir->i_sb->s_blocksize_bits - 2), + } else if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits, &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) epos.offset -= sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) + else if (iinfo->i_alloc_type == + ICBTAG_FLAG_AD_LONG) epos.offset -= sizeof(long_ad); } else { offset = 0; @@ -168,7 +111,7 @@ do_udf_readdir(struct inode *dir, struct file *filp, filldir_t filldir, } while (nf_pos < size) { - filp->f_pos = nf_pos + 1; + filp->f_pos = (nf_pos >> 2) + 1; fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset); @@ -235,7 +178,7 @@ do_udf_readdir(struct inode *dir, struct file *filp, filldir_t filldir, } } /* end while */ - filp->f_pos = nf_pos + 1; + filp->f_pos = (nf_pos >> 2) + 1; if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); @@ -244,3 +187,57 @@ do_udf_readdir(struct inode *dir, struct file *filp, filldir_t filldir, return 0; } + +/* + * udf_readdir + * + * PURPOSE + * Read a directory entry. + * + * DESCRIPTION + * Optional - sys_getdents() will return -ENOTDIR if this routine is not + * available. + * + * Refer to sys_getdents() in fs/readdir.c + * sys_getdents() -> . + * + * PRE-CONDITIONS + * filp Pointer to directory file. + * buf Pointer to directory entry buffer. + * filldir Pointer to filldir function. + * + * POST-CONDITIONS + * <return> >=0 on success. + * + * HISTORY + * July 1, 1997 - Andrew E. Mileski + * Written, tested, and released. + */ + +static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *dir = filp->f_path.dentry->d_inode; + int result; + + lock_kernel(); + + if (filp->f_pos == 0) { + if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < 0) { + unlock_kernel(); + return 0; + } + filp->f_pos++; + } + + result = do_udf_readdir(dir, filp, filldir, dirent); + unlock_kernel(); + return result; +} + +/* readdir and lookup functions */ +const struct file_operations udf_dir_operations = { + .read = generic_read_dir, + .readdir = udf_readdir, + .ioctl = udf_ioctl, + .fsync = udf_fsync_file, +}; diff --git a/fs/udf/directory.c b/fs/udf/directory.c index ff8c08fd7bf5..2820f8fcf4cc 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -19,7 +19,7 @@ #include <linux/buffer_head.h> #if 0 -static uint8_t *udf_filead_read(struct inode *dir, uint8_t * tmpad, +static uint8_t *udf_filead_read(struct inode *dir, uint8_t *tmpad, uint8_t ad_size, kernel_lb_addr fe_loc, int *pos, int *offset, struct buffer_head **bh, int *error) @@ -45,7 +45,8 @@ static uint8_t *udf_filead_read(struct inode *dir, uint8_t * tmpad, block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos); if (!block) return NULL; - if (!(*bh = udf_tread(dir->i_sb, block))) + *bh = udf_tread(dir->i_sb, block); + if (!*bh) return NULL; } else if (*offset > dir->i_sb->s_blocksize) { ad = tmpad; @@ -57,10 +58,12 @@ static uint8_t *udf_filead_read(struct inode *dir, uint8_t * tmpad, block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos); if (!block) return NULL; - if (!((*bh) = udf_tread(dir->i_sb, block))) + (*bh) = udf_tread(dir->i_sb, block); + if (!*bh) return NULL; - memcpy((uint8_t *)ad + remainder, (*bh)->b_data, ad_size - remainder); + memcpy((uint8_t *)ad + remainder, (*bh)->b_data, + ad_size - remainder); *offset = ad_size - remainder; } @@ -68,29 +71,31 @@ static uint8_t *udf_filead_read(struct inode *dir, uint8_t * tmpad, } #endif -struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t * nf_pos, +struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, struct udf_fileident_bh *fibh, struct fileIdentDesc *cfi, struct extent_position *epos, - kernel_lb_addr * eloc, uint32_t * elen, - sector_t * offset) + kernel_lb_addr *eloc, uint32_t *elen, + sector_t *offset) { struct fileIdentDesc *fi; int i, num, block; struct buffer_head *tmp, *bha[16]; + struct udf_inode_info *iinfo = UDF_I(dir); fibh->soffset = fibh->eoffset; - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { - fi = udf_get_fileident(UDF_I_DATA(dir) - - (UDF_I_EFE(dir) ? + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { + fi = udf_get_fileident(iinfo->i_ext.i_data - + (iinfo->i_efe ? sizeof(struct extendedFileEntry) : sizeof(struct fileEntry)), - dir->i_sb->s_blocksize, &(fibh->eoffset)); + dir->i_sb->s_blocksize, + &(fibh->eoffset)); if (!fi) return NULL; - *nf_pos += ((fibh->eoffset - fibh->soffset) >> 2); + *nf_pos += fibh->eoffset - fibh->soffset; memcpy((uint8_t *)cfi, (uint8_t *)fi, sizeof(struct fileIdentDesc)); @@ -100,6 +105,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t * nf_pos, if (fibh->eoffset == dir->i_sb->s_blocksize) { int lextoffset = epos->offset; + unsigned char blocksize_bits = dir->i_sb->s_blocksize_bits; if (udf_next_aext(dir, epos, eloc, elen, 1) != (EXT_RECORDED_ALLOCATED >> 30)) @@ -109,24 +115,27 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t * nf_pos, (*offset)++; - if ((*offset << dir->i_sb->s_blocksize_bits) >= *elen) + if ((*offset << blocksize_bits) >= *elen) *offset = 0; else epos->offset = lextoffset; brelse(fibh->sbh); - if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) + fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block); + if (!fibh->sbh) return NULL; fibh->soffset = fibh->eoffset = 0; - if (!(*offset & ((16 >> (dir->i_sb->s_blocksize_bits - 9)) - 1))) { - i = 16 >> (dir->i_sb->s_blocksize_bits - 9); - if (i + *offset > (*elen >> dir->i_sb->s_blocksize_bits)) - i = (*elen >> dir->i_sb->s_blocksize_bits)-*offset; + if (!(*offset & ((16 >> (blocksize_bits - 9)) - 1))) { + i = 16 >> (blocksize_bits - 9); + if (i + *offset > (*elen >> blocksize_bits)) + i = (*elen >> blocksize_bits)-*offset; for (num = 0; i > 0; i--) { - block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset + i); + block = udf_get_lb_pblock(dir->i_sb, *eloc, + *offset + i); tmp = udf_tgetblk(dir->i_sb, block); - if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp)) + if (tmp && !buffer_uptodate(tmp) && + !buffer_locked(tmp)) bha[num++] = tmp; else brelse(tmp); @@ -148,7 +157,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t * nf_pos, if (!fi) return NULL; - *nf_pos += ((fibh->eoffset - fibh->soffset) >> 2); + *nf_pos += fibh->eoffset - fibh->soffset; if (fibh->eoffset <= dir->i_sb->s_blocksize) { memcpy((uint8_t *)cfi, (uint8_t *)fi, @@ -172,20 +181,23 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t * nf_pos, fibh->soffset -= dir->i_sb->s_blocksize; fibh->eoffset -= dir->i_sb->s_blocksize; - if (!(fibh->ebh = udf_tread(dir->i_sb, block))) + fibh->ebh = udf_tread(dir->i_sb, block); + if (!fibh->ebh) return NULL; if (sizeof(struct fileIdentDesc) > -fibh->soffset) { int fi_len; memcpy((uint8_t *)cfi, (uint8_t *)fi, -fibh->soffset); - memcpy((uint8_t *)cfi - fibh->soffset, fibh->ebh->b_data, + memcpy((uint8_t *)cfi - fibh->soffset, + fibh->ebh->b_data, sizeof(struct fileIdentDesc) + fibh->soffset); - fi_len = (sizeof(struct fileIdentDesc) + cfi->lengthFileIdent + + fi_len = (sizeof(struct fileIdentDesc) + + cfi->lengthFileIdent + le16_to_cpu(cfi->lengthOfImpUse) + 3) & ~3; - *nf_pos += ((fi_len - (fibh->eoffset - fibh->soffset)) >> 2); + *nf_pos += fi_len - (fibh->eoffset - fibh->soffset); fibh->eoffset = fibh->soffset + fi_len; } else { memcpy((uint8_t *)cfi, (uint8_t *)fi, @@ -210,11 +222,10 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset) ptr = buffer; - if ((*offset > 0) && (*offset < bufsize)) { + if ((*offset > 0) && (*offset < bufsize)) ptr += *offset; - } fi = (struct fileIdentDesc *)ptr; - if (le16_to_cpu(fi->descTag.tagIdent) != TAG_IDENT_FID) { + if (fi->descTag.tagIdent != cpu_to_le16(TAG_IDENT_FID)) { udf_debug("0x%x != TAG_IDENT_FID\n", le16_to_cpu(fi->descTag.tagIdent)); udf_debug("offset: %u sizeof: %lu bufsize: %u\n", @@ -222,12 +233,11 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset) bufsize); return NULL; } - if ((*offset + sizeof(struct fileIdentDesc)) > bufsize) { + if ((*offset + sizeof(struct fileIdentDesc)) > bufsize) lengthThisIdent = sizeof(struct fileIdentDesc); - } else { + else lengthThisIdent = sizeof(struct fileIdentDesc) + fi->lengthFileIdent + le16_to_cpu(fi->lengthOfImpUse); - } /* we need to figure padding, too! */ padlen = lengthThisIdent % UDF_NAME_PAD; @@ -252,17 +262,17 @@ static extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset) fe = (struct fileEntry *)buffer; - if (le16_to_cpu(fe->descTag.tagIdent) != TAG_IDENT_FE) { + if (fe->descTag.tagIdent != cpu_to_le16(TAG_IDENT_FE)) { udf_debug("0x%x != TAG_IDENT_FE\n", le16_to_cpu(fe->descTag.tagIdent)); return NULL; } - ptr = (uint8_t *)(fe->extendedAttr) + le32_to_cpu(fe->lengthExtendedAttr); + ptr = (uint8_t *)(fe->extendedAttr) + + le32_to_cpu(fe->lengthExtendedAttr); - if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs))) { + if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs))) ptr += *offset; - } ext = (extent_ad *)ptr; @@ -271,7 +281,7 @@ static extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset) } #endif -short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, int *offset, +short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset, int inc) { short_ad *sa; @@ -281,17 +291,20 @@ short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, int *offset, return NULL; } - if ((*offset < 0) || ((*offset + sizeof(short_ad)) > maxoffset)) - return NULL; - else if ((sa = (short_ad *)ptr)->extLength == 0) + if ((*offset + sizeof(short_ad)) > maxoffset) return NULL; + else { + sa = (short_ad *)ptr; + if (sa->extLength == 0) + return NULL; + } if (inc) *offset += sizeof(short_ad); return sa; } -long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, int *offset, int inc) +long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset, int inc) { long_ad *la; @@ -300,10 +313,13 @@ long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, int *offset, int inc) return NULL; } - if ((*offset < 0) || ((*offset + sizeof(long_ad)) > maxoffset)) - return NULL; - else if ((la = (long_ad *)ptr)->extLength == 0) + if ((*offset + sizeof(long_ad)) > maxoffset) return NULL; + else { + la = (long_ad *)ptr; + if (la->extLength == 0) + return NULL; + } if (inc) *offset += sizeof(long_ad); diff --git a/fs/udf/file.c b/fs/udf/file.c index 7c7a1b39d56c..97c71ae7c689 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -45,12 +45,13 @@ static int udf_adinicb_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; char *kaddr; + struct udf_inode_info *iinfo = UDF_I(inode); BUG_ON(!PageLocked(page)); kaddr = kmap(page); memset(kaddr, 0, PAGE_CACHE_SIZE); - memcpy(kaddr, UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), inode->i_size); + memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); flush_dcache_page(page); SetPageUptodate(page); kunmap(page); @@ -59,15 +60,17 @@ static int udf_adinicb_readpage(struct file *file, struct page *page) return 0; } -static int udf_adinicb_writepage(struct page *page, struct writeback_control *wbc) +static int udf_adinicb_writepage(struct page *page, + struct writeback_control *wbc) { struct inode *inode = page->mapping->host; char *kaddr; + struct udf_inode_info *iinfo = UDF_I(inode); BUG_ON(!PageLocked(page)); kaddr = kmap(page); - memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), kaddr, inode->i_size); + memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size); mark_inode_dirty(inode); SetPageUptodate(page); kunmap(page); @@ -84,9 +87,10 @@ static int udf_adinicb_write_end(struct file *file, struct inode *inode = mapping->host; unsigned offset = pos & (PAGE_CACHE_SIZE - 1); char *kaddr; + struct udf_inode_info *iinfo = UDF_I(inode); kaddr = kmap_atomic(page, KM_USER0); - memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode) + offset, + memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset, kaddr + offset, copied); kunmap_atomic(kaddr, KM_USER0); @@ -109,25 +113,27 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, struct inode *inode = file->f_path.dentry->d_inode; int err, pos; size_t count = iocb->ki_left; + struct udf_inode_info *iinfo = UDF_I(inode); - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) { + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { if (file->f_flags & O_APPEND) pos = inode->i_size; else pos = ppos; - if (inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) + + if (inode->i_sb->s_blocksize < + (udf_file_entry_alloc_offset(inode) + pos + count)) { udf_expand_file_adinicb(inode, pos + count, &err); - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) { + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { udf_debug("udf_expand_adinicb: err=%d\n", err); return err; } } else { if (pos + count > inode->i_size) - UDF_I_LENALLOC(inode) = pos + count; + iinfo->i_lenAlloc = pos + count; else - UDF_I_LENALLOC(inode) = inode->i_size; + iinfo->i_lenAlloc = inode->i_size; } } @@ -191,23 +197,28 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, switch (cmd) { case UDF_GETVOLIDENT: - return copy_to_user((char __user *)arg, - UDF_SB_VOLIDENT(inode->i_sb), 32) ? -EFAULT : 0; + if (copy_to_user((char __user *)arg, + UDF_SB(inode->i_sb)->s_volume_ident, 32)) + return -EFAULT; + else + return 0; case UDF_RELOCATE_BLOCKS: if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (get_user(old_block, (long __user *)arg)) return -EFAULT; - if ((result = udf_relocate_blocks(inode->i_sb, - old_block, &new_block)) == 0) + result = udf_relocate_blocks(inode->i_sb, + old_block, &new_block); + if (result == 0) result = put_user(new_block, (long __user *)arg); return result; case UDF_GETEASIZE: - result = put_user(UDF_I_LENEATTR(inode), (int __user *)arg); + result = put_user(UDF_I(inode)->i_lenEAttr, (int __user *)arg); break; case UDF_GETEABLOCK: - result = copy_to_user((char __user *)arg, UDF_I_DATA(inode), - UDF_I_LENEATTR(inode)) ? -EFAULT : 0; + result = copy_to_user((char __user *)arg, + UDF_I(inode)->i_ext.i_data, + UDF_I(inode)->i_lenEAttr) ? -EFAULT : 0; break; } diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 636d8f613929..84360315aca2 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -43,19 +43,21 @@ void udf_free_inode(struct inode *inode) clear_inode(inode); mutex_lock(&sbi->s_alloc_mutex); - if (sbi->s_lvidbh) { + if (sbi->s_lvid_bh) { + struct logicalVolIntegrityDescImpUse *lvidiu = + udf_sb_lvidiu(sbi); if (S_ISDIR(inode->i_mode)) - UDF_SB_LVIDIU(sb)->numDirs = - cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs) - 1); + lvidiu->numDirs = + cpu_to_le32(le32_to_cpu(lvidiu->numDirs) - 1); else - UDF_SB_LVIDIU(sb)->numFiles = - cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) - 1); + lvidiu->numFiles = + cpu_to_le32(le32_to_cpu(lvidiu->numFiles) - 1); - mark_buffer_dirty(sbi->s_lvidbh); + mark_buffer_dirty(sbi->s_lvid_bh); } mutex_unlock(&sbi->s_alloc_mutex); - udf_free_blocks(sb, NULL, UDF_I_LOCATION(inode), 0, 1); + udf_free_blocks(sb, NULL, UDF_I(inode)->i_location, 0, 1); } struct inode *udf_new_inode(struct inode *dir, int mode, int *err) @@ -64,7 +66,9 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) struct udf_sb_info *sbi = UDF_SB(sb); struct inode *inode; int block; - uint32_t start = UDF_I_LOCATION(dir).logicalBlockNum; + uint32_t start = UDF_I(dir)->i_location.logicalBlockNum; + struct udf_inode_info *iinfo; + struct udf_inode_info *dinfo = UDF_I(dir); inode = new_inode(sb); @@ -74,13 +78,15 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) } *err = -ENOSPC; - UDF_I_UNIQUE(inode) = 0; - UDF_I_LENEXTENTS(inode) = 0; - UDF_I_NEXT_ALLOC_BLOCK(inode) = 0; - UDF_I_NEXT_ALLOC_GOAL(inode) = 0; - UDF_I_STRAT4096(inode) = 0; + iinfo = UDF_I(inode); + iinfo->i_unique = 0; + iinfo->i_lenExtents = 0; + iinfo->i_next_alloc_block = 0; + iinfo->i_next_alloc_goal = 0; + iinfo->i_strat4096 = 0; - block = udf_new_block(dir->i_sb, NULL, UDF_I_LOCATION(dir).partitionReferenceNum, + block = udf_new_block(dir->i_sb, NULL, + dinfo->i_location.partitionReferenceNum, start, err); if (*err) { iput(inode); @@ -88,21 +94,27 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) } mutex_lock(&sbi->s_alloc_mutex); - if (UDF_SB_LVIDBH(sb)) { + if (sbi->s_lvid_bh) { + struct logicalVolIntegrityDesc *lvid = + (struct logicalVolIntegrityDesc *) + sbi->s_lvid_bh->b_data; + struct logicalVolIntegrityDescImpUse *lvidiu = + udf_sb_lvidiu(sbi); struct logicalVolHeaderDesc *lvhd; uint64_t uniqueID; - lvhd = (struct logicalVolHeaderDesc *)(UDF_SB_LVID(sb)->logicalVolContentsUse); + lvhd = (struct logicalVolHeaderDesc *) + (lvid->logicalVolContentsUse); if (S_ISDIR(mode)) - UDF_SB_LVIDIU(sb)->numDirs = - cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs) + 1); + lvidiu->numDirs = + cpu_to_le32(le32_to_cpu(lvidiu->numDirs) + 1); else - UDF_SB_LVIDIU(sb)->numFiles = - cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) + 1); - UDF_I_UNIQUE(inode) = uniqueID = le64_to_cpu(lvhd->uniqueID); + lvidiu->numFiles = + cpu_to_le32(le32_to_cpu(lvidiu->numFiles) + 1); + iinfo->i_unique = uniqueID = le64_to_cpu(lvhd->uniqueID); if (!(++uniqueID & 0x00000000FFFFFFFFUL)) uniqueID += 16; lvhd->uniqueID = cpu_to_le64(uniqueID); - mark_buffer_dirty(UDF_SB_LVIDBH(sb)); + mark_buffer_dirty(sbi->s_lvid_bh); } inode->i_mode = mode; inode->i_uid = current->fsuid; @@ -114,35 +126,41 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) inode->i_gid = current->fsgid; } - UDF_I_LOCATION(inode).logicalBlockNum = block; - UDF_I_LOCATION(inode).partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum; - inode->i_ino = udf_get_lb_pblock(sb, UDF_I_LOCATION(inode), 0); + iinfo->i_location.logicalBlockNum = block; + iinfo->i_location.partitionReferenceNum = + dinfo->i_location.partitionReferenceNum; + inode->i_ino = udf_get_lb_pblock(sb, iinfo->i_location, 0); inode->i_blocks = 0; - UDF_I_LENEATTR(inode) = 0; - UDF_I_LENALLOC(inode) = 0; - UDF_I_USE(inode) = 0; + iinfo->i_lenEAttr = 0; + iinfo->i_lenAlloc = 0; + iinfo->i_use = 0; if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { - UDF_I_EFE(inode) = 1; - UDF_UPDATE_UDFREV(inode->i_sb, UDF_VERS_USE_EXTENDED_FE); - UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry), GFP_KERNEL); + iinfo->i_efe = 1; + if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) + sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; + iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry), + GFP_KERNEL); } else { - UDF_I_EFE(inode) = 0; - UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL); + iinfo->i_efe = 0; + iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - + sizeof(struct fileEntry), + GFP_KERNEL); } - if (!UDF_I_DATA(inode)) { + if (!iinfo->i_ext.i_data) { iput(inode); *err = -ENOMEM; mutex_unlock(&sbi->s_alloc_mutex); return NULL; } if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) - UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; + iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) - UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_SHORT; + iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; else - UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_LONG; + iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; inode->i_mtime = inode->i_atime = inode->i_ctime = - UDF_I_CRTIME(inode) = current_fs_time(inode->i_sb); + iinfo->i_crtime = current_fs_time(inode->i_sb); insert_inode_hash(inode); mark_inode_dirty(inode); mutex_unlock(&sbi->s_alloc_mutex); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 6ff8151984cf..24cfa55d0fdc 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -19,7 +19,8 @@ * 10/04/98 dgb Added rudimentary directory functions * 10/07/98 Fully working udf_block_map! It works! * 11/25/98 bmap altered to better support extents - * 12/06/98 blf partition support in udf_iget, udf_block_map and udf_read_inode + * 12/06/98 blf partition support in udf_iget, udf_block_map + * and udf_read_inode * 12/12/98 rewrote udf_block_map to handle next extents and descs across * block boundaries (which is not actually allowed) * 12/20/98 added support for strategy 4096 @@ -51,7 +52,7 @@ static int udf_update_inode(struct inode *, int); static void udf_fill_inode(struct inode *, struct buffer_head *); static int udf_alloc_i_data(struct inode *inode, size_t size); static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, - long *, int *); + sector_t *, int *); static int8_t udf_insert_aext(struct inode *, struct extent_position, kernel_lb_addr, uint32_t); static void udf_split_extents(struct inode *, int *, int, int, @@ -111,16 +112,18 @@ no_delete: */ void udf_clear_inode(struct inode *inode) { + struct udf_inode_info *iinfo; if (!(inode->i_sb->s_flags & MS_RDONLY)) { lock_kernel(); /* Discard preallocation for directories, symlinks, etc. */ udf_discard_prealloc(inode); udf_truncate_tail_extent(inode); unlock_kernel(); - write_inode_now(inode, 1); + write_inode_now(inode, 0); } - kfree(UDF_I_DATA(inode)); - UDF_I_DATA(inode) = NULL; + iinfo = UDF_I(inode); + kfree(iinfo->i_ext.i_data); + iinfo->i_ext.i_data = NULL; } static int udf_writepage(struct page *page, struct writeback_control *wbc) @@ -160,6 +163,7 @@ void udf_expand_file_adinicb(struct inode *inode, int newsize, int *err) { struct page *page; char *kaddr; + struct udf_inode_info *iinfo = UDF_I(inode); struct writeback_control udf_wbc = { .sync_mode = WB_SYNC_NONE, .nr_to_write = 1, @@ -168,11 +172,11 @@ void udf_expand_file_adinicb(struct inode *inode, int newsize, int *err) /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; - if (!UDF_I_LENALLOC(inode)) { + if (!iinfo->i_lenAlloc) { if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) - UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_SHORT; + iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; else - UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_LONG; + iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; mark_inode_dirty(inode); return; } @@ -182,21 +186,21 @@ void udf_expand_file_adinicb(struct inode *inode, int newsize, int *err) if (!PageUptodate(page)) { kaddr = kmap(page); - memset(kaddr + UDF_I_LENALLOC(inode), 0x00, - PAGE_CACHE_SIZE - UDF_I_LENALLOC(inode)); - memcpy(kaddr, UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), - UDF_I_LENALLOC(inode)); + memset(kaddr + iinfo->i_lenAlloc, 0x00, + PAGE_CACHE_SIZE - iinfo->i_lenAlloc); + memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, + iinfo->i_lenAlloc); flush_dcache_page(page); SetPageUptodate(page); kunmap(page); } - memset(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), 0x00, - UDF_I_LENALLOC(inode)); - UDF_I_LENALLOC(inode) = 0; + memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00, + iinfo->i_lenAlloc); + iinfo->i_lenAlloc = 0; if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) - UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_SHORT; + iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; else - UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_LONG; + iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; inode->i_data.a_ops->writepage(page, &udf_wbc); page_cache_release(page); @@ -215,9 +219,10 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block, struct extent_position epos; struct udf_fileident_bh sfibh, dfibh; - loff_t f_pos = udf_ext0_offset(inode) >> 2; - int size = (udf_ext0_offset(inode) + inode->i_size) >> 2; + loff_t f_pos = udf_ext0_offset(inode); + int size = udf_ext0_offset(inode) + inode->i_size; struct fileIdentDesc cfi, *sfi, *dfi; + struct udf_inode_info *iinfo = UDF_I(inode); if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) alloctype = ICBTAG_FLAG_AD_SHORT; @@ -225,19 +230,20 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block, alloctype = ICBTAG_FLAG_AD_LONG; if (!inode->i_size) { - UDF_I_ALLOCTYPE(inode) = alloctype; + iinfo->i_alloc_type = alloctype; mark_inode_dirty(inode); return NULL; } /* alloc block, and copy data to it */ *block = udf_new_block(inode->i_sb, inode, - UDF_I_LOCATION(inode).partitionReferenceNum, - UDF_I_LOCATION(inode).logicalBlockNum, err); + iinfo->i_location.partitionReferenceNum, + iinfo->i_location.logicalBlockNum, err); if (!(*block)) return NULL; newblock = udf_get_pblock(inode->i_sb, *block, - UDF_I_LOCATION(inode).partitionReferenceNum, 0); + iinfo->i_location.partitionReferenceNum, + 0); if (!newblock) return NULL; dbh = udf_tgetblk(inode->i_sb, newblock); @@ -249,39 +255,44 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block, unlock_buffer(dbh); mark_buffer_dirty_inode(dbh, inode); - sfibh.soffset = sfibh.eoffset = (f_pos & ((inode->i_sb->s_blocksize - 1) >> 2)) << 2; + sfibh.soffset = sfibh.eoffset = + f_pos & (inode->i_sb->s_blocksize - 1); sfibh.sbh = sfibh.ebh = NULL; dfibh.soffset = dfibh.eoffset = 0; dfibh.sbh = dfibh.ebh = dbh; - while ((f_pos < size)) { - UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; - sfi = udf_fileident_read(inode, &f_pos, &sfibh, &cfi, NULL, NULL, NULL, NULL); + while (f_pos < size) { + iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; + sfi = udf_fileident_read(inode, &f_pos, &sfibh, &cfi, NULL, + NULL, NULL, NULL); if (!sfi) { brelse(dbh); return NULL; } - UDF_I_ALLOCTYPE(inode) = alloctype; + iinfo->i_alloc_type = alloctype; sfi->descTag.tagLocation = cpu_to_le32(*block); dfibh.soffset = dfibh.eoffset; dfibh.eoffset += (sfibh.eoffset - sfibh.soffset); dfi = (struct fileIdentDesc *)(dbh->b_data + dfibh.soffset); if (udf_write_fi(inode, sfi, dfi, &dfibh, sfi->impUse, - sfi->fileIdent + le16_to_cpu(sfi->lengthOfImpUse))) { - UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; + sfi->fileIdent + + le16_to_cpu(sfi->lengthOfImpUse))) { + iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; brelse(dbh); return NULL; } } mark_buffer_dirty_inode(dbh, inode); - memset(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), 0, UDF_I_LENALLOC(inode)); - UDF_I_LENALLOC(inode) = 0; + memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0, + iinfo->i_lenAlloc); + iinfo->i_lenAlloc = 0; eloc.logicalBlockNum = *block; - eloc.partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum; - elen = inode->i_size; - UDF_I_LENEXTENTS(inode) = elen; + eloc.partitionReferenceNum = + iinfo->i_location.partitionReferenceNum; + elen = inode->i_sb->s_blocksize; + iinfo->i_lenExtents = elen; epos.bh = NULL; - epos.block = UDF_I_LOCATION(inode); + epos.block = iinfo->i_location; epos.offset = udf_file_entry_alloc_offset(inode); udf_add_aext(inode, &epos, eloc, elen, 0); /* UniqueID stuff */ @@ -296,7 +307,8 @@ static int udf_get_block(struct inode *inode, sector_t block, { int err, new; struct buffer_head *bh; - unsigned long phys; + sector_t phys = 0; + struct udf_inode_info *iinfo; if (!create) { phys = udf_block_map(inode, block); @@ -314,9 +326,10 @@ static int udf_get_block(struct inode *inode, sector_t block, if (block < 0) goto abort_negative; - if (block == UDF_I_NEXT_ALLOC_BLOCK(inode) + 1) { - UDF_I_NEXT_ALLOC_BLOCK(inode)++; - UDF_I_NEXT_ALLOC_GOAL(inode)++; + iinfo = UDF_I(inode); + if (block == iinfo->i_next_alloc_block + 1) { + iinfo->i_next_alloc_block++; + iinfo->i_next_alloc_goal++; } err = 0; @@ -366,32 +379,35 @@ static struct buffer_head *udf_getblk(struct inode *inode, long block, /* Extend the file by 'blocks' blocks, return the number of extents added */ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, - kernel_long_ad * last_ext, sector_t blocks) + kernel_long_ad *last_ext, sector_t blocks) { sector_t add; int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); struct super_block *sb = inode->i_sb; kernel_lb_addr prealloc_loc = {}; int prealloc_len = 0; + struct udf_inode_info *iinfo; /* The previous extent is fake and we should not extend by anything * - there's nothing to do... */ if (!blocks && fake) return 0; + iinfo = UDF_I(inode); /* Round the last extent up to a multiple of block size */ if (last_ext->extLength & (sb->s_blocksize - 1)) { last_ext->extLength = (last_ext->extLength & UDF_EXTENT_FLAG_MASK) | (((last_ext->extLength & UDF_EXTENT_LENGTH_MASK) + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1)); - UDF_I_LENEXTENTS(inode) = - (UDF_I_LENEXTENTS(inode) + sb->s_blocksize - 1) & + iinfo->i_lenExtents = + (iinfo->i_lenExtents + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1); } /* Last extent are just preallocated blocks? */ - if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_ALLOCATED) { + if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == + EXT_NOT_RECORDED_ALLOCATED) { /* Save the extent so that we can reattach it to the end */ prealloc_loc = last_ext->extLocation; prealloc_len = last_ext->extLength; @@ -399,13 +415,15 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); last_ext->extLocation.logicalBlockNum = 0; - last_ext->extLocation.partitionReferenceNum = 0; + last_ext->extLocation.partitionReferenceNum = 0; } /* Can we merge with the previous extent? */ - if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_NOT_ALLOCATED) { - add = ((1 << 30) - sb->s_blocksize - (last_ext->extLength & - UDF_EXTENT_LENGTH_MASK)) >> sb->s_blocksize_bits; + if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == + EXT_NOT_RECORDED_NOT_ALLOCATED) { + add = ((1 << 30) - sb->s_blocksize - + (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) >> + sb->s_blocksize_bits; if (add > blocks) add = blocks; blocks -= add; @@ -416,9 +434,9 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, udf_add_aext(inode, last_pos, last_ext->extLocation, last_ext->extLength, 1); count++; - } else { - udf_write_aext(inode, last_pos, last_ext->extLocation, last_ext->extLength, 1); - } + } else + udf_write_aext(inode, last_pos, last_ext->extLocation, + last_ext->extLength, 1); /* Managed to do everything necessary? */ if (!blocks) @@ -426,9 +444,10 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, /* All further extents will be NOT_RECORDED_NOT_ALLOCATED */ last_ext->extLocation.logicalBlockNum = 0; - last_ext->extLocation.partitionReferenceNum = 0; + last_ext->extLocation.partitionReferenceNum = 0; add = (1 << (30-sb->s_blocksize_bits)) - 1; - last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | (add << sb->s_blocksize_bits); + last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | + (add << sb->s_blocksize_bits); /* Create enough extents to cover the whole hole */ while (blocks > add) { @@ -450,7 +469,8 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, out: /* Do we have some preallocated blocks saved? */ if (prealloc_len) { - if (udf_add_aext(inode, last_pos, prealloc_loc, prealloc_len, 1) == -1) + if (udf_add_aext(inode, last_pos, prealloc_loc, + prealloc_len, 1) == -1) return -1; last_ext->extLocation = prealloc_loc; last_ext->extLength = prealloc_len; @@ -458,9 +478,9 @@ out: } /* last_pos should point to the last written extent... */ - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) last_pos->offset -= sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) + else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) last_pos->offset -= sizeof(long_ad); else return -1; @@ -469,7 +489,7 @@ out: } static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, - int *err, long *phys, int *new) + int *err, sector_t *phys, int *new) { static sector_t last_block; struct buffer_head *result = NULL; @@ -483,11 +503,12 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, uint32_t newblocknum, newblock; sector_t offset = 0; int8_t etype; - int goal = 0, pgoal = UDF_I_LOCATION(inode).logicalBlockNum; + struct udf_inode_info *iinfo = UDF_I(inode); + int goal = 0, pgoal = iinfo->i_location.logicalBlockNum; int lastblock = 0; prev_epos.offset = udf_file_entry_alloc_offset(inode); - prev_epos.block = UDF_I_LOCATION(inode); + prev_epos.block = iinfo->i_location; prev_epos.bh = NULL; cur_epos = next_epos = prev_epos; b_off = (loff_t)block << inode->i_sb->s_blocksize_bits; @@ -515,7 +536,8 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, prev_epos.offset = cur_epos.offset; cur_epos.offset = next_epos.offset; - if ((etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 1)) == -1) + etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 1); + if (etype == -1) break; c = !c; @@ -569,9 +591,11 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, startnum = 1; } else { /* Create a fake extent when there's not one */ - memset(&laarr[0].extLocation, 0x00, sizeof(kernel_lb_addr)); + memset(&laarr[0].extLocation, 0x00, + sizeof(kernel_lb_addr)); laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; - /* Will udf_extend_file() create real extent from a fake one? */ + /* Will udf_extend_file() create real extent from + a fake one? */ startnum = (offset > 0); } /* Create extents for the hole between EOF and offset */ @@ -589,14 +613,16 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, offset = 0; count += ret; /* We are not covered by a preallocated extent? */ - if ((laarr[0].extLength & UDF_EXTENT_FLAG_MASK) != EXT_NOT_RECORDED_ALLOCATED) { + if ((laarr[0].extLength & UDF_EXTENT_FLAG_MASK) != + EXT_NOT_RECORDED_ALLOCATED) { /* Is there any real extent? - otherwise we overwrite * the fake one... */ if (count) c = !c; laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | inode->i_sb->s_blocksize; - memset(&laarr[c].extLocation, 0x00, sizeof(kernel_lb_addr)); + memset(&laarr[c].extLocation, 0x00, + sizeof(kernel_lb_addr)); count++; endnum++; } @@ -605,7 +631,8 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, } else { endnum = startnum = ((count > 2) ? 2 : count); - /* if the current extent is in position 0, swap it with the previous */ + /* if the current extent is in position 0, + swap it with the previous */ if (!c && count != 1) { laarr[2] = laarr[0]; laarr[0] = laarr[1]; @@ -613,44 +640,47 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, c = 1; } - /* if the current block is located in an extent, read the next extent */ - if ((etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 0)) != -1) { + /* if the current block is located in an extent, + read the next extent */ + etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 0); + if (etype != -1) { laarr[c + 1].extLength = (etype << 30) | elen; laarr[c + 1].extLocation = eloc; count++; startnum++; endnum++; - } else { + } else lastblock = 1; - } } /* if the current extent is not recorded but allocated, get the * block in the extent corresponding to the requested block */ - if ((laarr[c].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { + if ((laarr[c].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30)) newblocknum = laarr[c].extLocation.logicalBlockNum + offset; - } else { /* otherwise, allocate a new block */ - if (UDF_I_NEXT_ALLOC_BLOCK(inode) == block) - goal = UDF_I_NEXT_ALLOC_GOAL(inode); + else { /* otherwise, allocate a new block */ + if (iinfo->i_next_alloc_block == block) + goal = iinfo->i_next_alloc_goal; if (!goal) { - if (!(goal = pgoal)) - goal = UDF_I_LOCATION(inode).logicalBlockNum + 1; + if (!(goal = pgoal)) /* XXX: what was intended here? */ + goal = iinfo->i_location.logicalBlockNum + 1; } - if (!(newblocknum = udf_new_block(inode->i_sb, inode, - UDF_I_LOCATION(inode).partitionReferenceNum, - goal, err))) { + newblocknum = udf_new_block(inode->i_sb, inode, + iinfo->i_location.partitionReferenceNum, + goal, err); + if (!newblocknum) { brelse(prev_epos.bh); *err = -ENOSPC; return NULL; } - UDF_I_LENEXTENTS(inode) += inode->i_sb->s_blocksize; + iinfo->i_lenExtents += inode->i_sb->s_blocksize; } - /* if the extent the requsted block is located in contains multiple blocks, - * split the extent into at most three extents. blocks prior to requested - * block, requested block, and blocks after requested block */ + /* if the extent the requsted block is located in contains multiple + * blocks, split the extent into at most three extents. blocks prior + * to requested block, requested block, and blocks after requested + * block */ udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum); #ifdef UDF_PREALLOCATE @@ -668,15 +698,15 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, brelse(prev_epos.bh); - if (!(newblock = udf_get_pblock(inode->i_sb, newblocknum, - UDF_I_LOCATION(inode).partitionReferenceNum, 0))) { + newblock = udf_get_pblock(inode->i_sb, newblocknum, + iinfo->i_location.partitionReferenceNum, 0); + if (!newblock) return NULL; - } *phys = newblock; *err = 0; *new = 1; - UDF_I_NEXT_ALLOC_BLOCK(inode) = block; - UDF_I_NEXT_ALLOC_GOAL(inode) = newblocknum; + iinfo->i_next_alloc_block = block; + iinfo->i_next_alloc_goal = newblocknum; inode->i_ctime = current_fs_time(inode->i_sb); if (IS_SYNC(inode)) @@ -692,16 +722,20 @@ static void udf_split_extents(struct inode *inode, int *c, int offset, kernel_long_ad laarr[EXTENT_MERGE_SIZE], int *endnum) { + unsigned long blocksize = inode->i_sb->s_blocksize; + unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; + if ((laarr[*c].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30) || - (laarr[*c].extLength >> 30) == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) { + (laarr[*c].extLength >> 30) == + (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) { int curr = *c; int blen = ((laarr[curr].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; + blocksize - 1) >> blocksize_bits; int8_t etype = (laarr[curr].extLength >> 30); - if (blen == 1) { + if (blen == 1) ; - } else if (!offset || blen == offset + 1) { + else if (!offset || blen == offset + 1) { laarr[curr + 2] = laarr[curr + 1]; laarr[curr + 1] = laarr[curr]; } else { @@ -711,15 +745,18 @@ static void udf_split_extents(struct inode *inode, int *c, int offset, if (offset) { if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { - udf_free_blocks(inode->i_sb, inode, laarr[curr].extLocation, 0, offset); - laarr[curr].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - (offset << inode->i_sb->s_blocksize_bits); + udf_free_blocks(inode->i_sb, inode, + laarr[curr].extLocation, + 0, offset); + laarr[curr].extLength = + EXT_NOT_RECORDED_NOT_ALLOCATED | + (offset << blocksize_bits); laarr[curr].extLocation.logicalBlockNum = 0; - laarr[curr].extLocation.partitionReferenceNum = 0; - } else { + laarr[curr].extLocation. + partitionReferenceNum = 0; + } else laarr[curr].extLength = (etype << 30) | - (offset << inode->i_sb->s_blocksize_bits); - } + (offset << blocksize_bits); curr++; (*c)++; (*endnum)++; @@ -728,16 +765,17 @@ static void udf_split_extents(struct inode *inode, int *c, int offset, laarr[curr].extLocation.logicalBlockNum = newblocknum; if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) laarr[curr].extLocation.partitionReferenceNum = - UDF_I_LOCATION(inode).partitionReferenceNum; + UDF_I(inode)->i_location.partitionReferenceNum; laarr[curr].extLength = EXT_RECORDED_ALLOCATED | - inode->i_sb->s_blocksize; + blocksize; curr++; if (blen != offset + 1) { if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) - laarr[curr].extLocation.logicalBlockNum += (offset + 1); + laarr[curr].extLocation.logicalBlockNum += + offset + 1; laarr[curr].extLength = (etype << 30) | - ((blen - (offset + 1)) << inode->i_sb->s_blocksize_bits); + ((blen - (offset + 1)) << blocksize_bits); curr++; (*endnum)++; } @@ -756,69 +794,86 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock, else start = c; } else { - if ((laarr[c + 1].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { + if ((laarr[c + 1].extLength >> 30) == + (EXT_NOT_RECORDED_ALLOCATED >> 30)) { start = c + 1; - length = currlength = (((laarr[c + 1].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits); - } else { + length = currlength = + (((laarr[c + 1].extLength & + UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - 1) >> + inode->i_sb->s_blocksize_bits); + } else start = c; - } } for (i = start + 1; i <= *endnum; i++) { if (i == *endnum) { if (lastblock) length += UDF_DEFAULT_PREALLOC_BLOCKS; - } else if ((laarr[i].extLength >> 30) == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) { - length += (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits); - } else { + } else if ((laarr[i].extLength >> 30) == + (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) { + length += (((laarr[i].extLength & + UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - 1) >> + inode->i_sb->s_blocksize_bits); + } else break; - } } if (length) { int next = laarr[start].extLocation.logicalBlockNum + (((laarr[start].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits); + inode->i_sb->s_blocksize - 1) >> + inode->i_sb->s_blocksize_bits); int numalloc = udf_prealloc_blocks(inode->i_sb, inode, - laarr[start].extLocation.partitionReferenceNum, - next, (UDF_DEFAULT_PREALLOC_BLOCKS > length ? length : - UDF_DEFAULT_PREALLOC_BLOCKS) - currlength); + laarr[start].extLocation.partitionReferenceNum, + next, (UDF_DEFAULT_PREALLOC_BLOCKS > length ? + length : UDF_DEFAULT_PREALLOC_BLOCKS) - + currlength); if (numalloc) { - if (start == (c + 1)) { + if (start == (c + 1)) laarr[start].extLength += - (numalloc << inode->i_sb->s_blocksize_bits); - } else { + (numalloc << + inode->i_sb->s_blocksize_bits); + else { memmove(&laarr[c + 2], &laarr[c + 1], sizeof(long_ad) * (*endnum - (c + 1))); (*endnum)++; laarr[c + 1].extLocation.logicalBlockNum = next; laarr[c + 1].extLocation.partitionReferenceNum = - laarr[c].extLocation.partitionReferenceNum; - laarr[c + 1].extLength = EXT_NOT_RECORDED_ALLOCATED | - (numalloc << inode->i_sb->s_blocksize_bits); + laarr[c].extLocation. + partitionReferenceNum; + laarr[c + 1].extLength = + EXT_NOT_RECORDED_ALLOCATED | + (numalloc << + inode->i_sb->s_blocksize_bits); start = c + 1; } for (i = start + 1; numalloc && i < *endnum; i++) { - int elen = ((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; + int elen = ((laarr[i].extLength & + UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - 1) >> + inode->i_sb->s_blocksize_bits; if (elen > numalloc) { laarr[i].extLength -= - (numalloc << inode->i_sb->s_blocksize_bits); + (numalloc << + inode->i_sb->s_blocksize_bits); numalloc = 0; } else { numalloc -= elen; if (*endnum > (i + 1)) - memmove(&laarr[i], &laarr[i + 1], - sizeof(long_ad) * (*endnum - (i + 1))); + memmove(&laarr[i], + &laarr[i + 1], + sizeof(long_ad) * + (*endnum - (i + 1))); i--; (*endnum)--; } } - UDF_I_LENEXTENTS(inode) += numalloc << inode->i_sb->s_blocksize_bits; + UDF_I(inode)->i_lenExtents += + numalloc << inode->i_sb->s_blocksize_bits; } } } @@ -828,70 +883,97 @@ static void udf_merge_extents(struct inode *inode, int *endnum) { int i; + unsigned long blocksize = inode->i_sb->s_blocksize; + unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; for (i = 0; i < (*endnum - 1); i++) { - if ((laarr[i].extLength >> 30) == (laarr[i + 1].extLength >> 30)) { - if (((laarr[i].extLength >> 30) == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) || - ((laarr[i + 1].extLocation.logicalBlockNum - laarr[i].extLocation.logicalBlockNum) == - (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits))) { - if (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - (laarr[i + 1].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) & ~UDF_EXTENT_LENGTH_MASK) { - laarr[i + 1].extLength = (laarr[i + 1].extLength - - (laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - UDF_EXTENT_LENGTH_MASK) & ~(inode->i_sb->s_blocksize - 1); - laarr[i].extLength = (laarr[i].extLength & UDF_EXTENT_FLAG_MASK) + - (UDF_EXTENT_LENGTH_MASK + 1) - inode->i_sb->s_blocksize; - laarr[i + 1].extLocation.logicalBlockNum = - laarr[i].extLocation.logicalBlockNum + - ((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) >> - inode->i_sb->s_blocksize_bits); - } else { - laarr[i].extLength = laarr[i + 1].extLength + - (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) & ~(inode->i_sb->s_blocksize - 1)); - if (*endnum > (i + 2)) - memmove(&laarr[i + 1], &laarr[i + 2], - sizeof(long_ad) * (*endnum - (i + 2))); - i--; - (*endnum)--; - } + kernel_long_ad *li /*l[i]*/ = &laarr[i]; + kernel_long_ad *lip1 /*l[i plus 1]*/ = &laarr[i + 1]; + + if (((li->extLength >> 30) == (lip1->extLength >> 30)) && + (((li->extLength >> 30) == + (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) || + ((lip1->extLocation.logicalBlockNum - + li->extLocation.logicalBlockNum) == + (((li->extLength & UDF_EXTENT_LENGTH_MASK) + + blocksize - 1) >> blocksize_bits)))) { + + if (((li->extLength & UDF_EXTENT_LENGTH_MASK) + + (lip1->extLength & UDF_EXTENT_LENGTH_MASK) + + blocksize - 1) & ~UDF_EXTENT_LENGTH_MASK) { + lip1->extLength = (lip1->extLength - + (li->extLength & + UDF_EXTENT_LENGTH_MASK) + + UDF_EXTENT_LENGTH_MASK) & + ~(blocksize - 1); + li->extLength = (li->extLength & + UDF_EXTENT_FLAG_MASK) + + (UDF_EXTENT_LENGTH_MASK + 1) - + blocksize; + lip1->extLocation.logicalBlockNum = + li->extLocation.logicalBlockNum + + ((li->extLength & + UDF_EXTENT_LENGTH_MASK) >> + blocksize_bits); + } else { + li->extLength = lip1->extLength + + (((li->extLength & + UDF_EXTENT_LENGTH_MASK) + + blocksize - 1) & ~(blocksize - 1)); + if (*endnum > (i + 2)) + memmove(&laarr[i + 1], &laarr[i + 2], + sizeof(long_ad) * + (*endnum - (i + 2))); + i--; + (*endnum)--; } - } else if (((laarr[i].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30)) && - ((laarr[i + 1].extLength >> 30) == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))) { - udf_free_blocks(inode->i_sb, inode, laarr[i].extLocation, 0, - ((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits); - laarr[i].extLocation.logicalBlockNum = 0; - laarr[i].extLocation.partitionReferenceNum = 0; - - if (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - (laarr[i + 1].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) & ~UDF_EXTENT_LENGTH_MASK) { - laarr[i + 1].extLength = (laarr[i + 1].extLength - - (laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - UDF_EXTENT_LENGTH_MASK) & ~(inode->i_sb->s_blocksize - 1); - laarr[i].extLength = (laarr[i].extLength & UDF_EXTENT_FLAG_MASK) + - (UDF_EXTENT_LENGTH_MASK + 1) - inode->i_sb->s_blocksize; + } else if (((li->extLength >> 30) == + (EXT_NOT_RECORDED_ALLOCATED >> 30)) && + ((lip1->extLength >> 30) == + (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))) { + udf_free_blocks(inode->i_sb, inode, li->extLocation, 0, + ((li->extLength & + UDF_EXTENT_LENGTH_MASK) + + blocksize - 1) >> blocksize_bits); + li->extLocation.logicalBlockNum = 0; + li->extLocation.partitionReferenceNum = 0; + + if (((li->extLength & UDF_EXTENT_LENGTH_MASK) + + (lip1->extLength & UDF_EXTENT_LENGTH_MASK) + + blocksize - 1) & ~UDF_EXTENT_LENGTH_MASK) { + lip1->extLength = (lip1->extLength - + (li->extLength & + UDF_EXTENT_LENGTH_MASK) + + UDF_EXTENT_LENGTH_MASK) & + ~(blocksize - 1); + li->extLength = (li->extLength & + UDF_EXTENT_FLAG_MASK) + + (UDF_EXTENT_LENGTH_MASK + 1) - + blocksize; } else { - laarr[i].extLength = laarr[i + 1].extLength + - (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) & ~(inode->i_sb->s_blocksize - 1)); + li->extLength = lip1->extLength + + (((li->extLength & + UDF_EXTENT_LENGTH_MASK) + + blocksize - 1) & ~(blocksize - 1)); if (*endnum > (i + 2)) memmove(&laarr[i + 1], &laarr[i + 2], - sizeof(long_ad) * (*endnum - (i + 2))); + sizeof(long_ad) * + (*endnum - (i + 2))); i--; (*endnum)--; } - } else if ((laarr[i].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { - udf_free_blocks(inode->i_sb, inode, laarr[i].extLocation, 0, - ((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits); - laarr[i].extLocation.logicalBlockNum = 0; - laarr[i].extLocation.partitionReferenceNum = 0; - laarr[i].extLength = (laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) | - EXT_NOT_RECORDED_NOT_ALLOCATED; + } else if ((li->extLength >> 30) == + (EXT_NOT_RECORDED_ALLOCATED >> 30)) { + udf_free_blocks(inode->i_sb, inode, + li->extLocation, 0, + ((li->extLength & + UDF_EXTENT_LENGTH_MASK) + + blocksize - 1) >> blocksize_bits); + li->extLocation.logicalBlockNum = 0; + li->extLocation.partitionReferenceNum = 0; + li->extLength = (li->extLength & + UDF_EXTENT_LENGTH_MASK) | + EXT_NOT_RECORDED_NOT_ALLOCATED; } } } @@ -953,6 +1035,7 @@ void udf_truncate(struct inode *inode) { int offset; int err; + struct udf_inode_info *iinfo; if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) @@ -961,25 +1044,28 @@ void udf_truncate(struct inode *inode) return; lock_kernel(); - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) { - if (inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) + - inode->i_size)) { + iinfo = UDF_I(inode); + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { + if (inode->i_sb->s_blocksize < + (udf_file_entry_alloc_offset(inode) + + inode->i_size)) { udf_expand_file_adinicb(inode, inode->i_size, &err); - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) { - inode->i_size = UDF_I_LENALLOC(inode); + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { + inode->i_size = iinfo->i_lenAlloc; unlock_kernel(); return; - } else { + } else udf_truncate_extents(inode); - } } else { offset = inode->i_size & (inode->i_sb->s_blocksize - 1); - memset(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode) + offset, 0x00, - inode->i_sb->s_blocksize - offset - udf_file_entry_alloc_offset(inode)); - UDF_I_LENALLOC(inode) = inode->i_size; + memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset, + 0x00, inode->i_sb->s_blocksize - + offset - udf_file_entry_alloc_offset(inode)); + iinfo->i_lenAlloc = inode->i_size; } } else { - block_truncate_page(inode->i_mapping, inode->i_size, udf_get_block); + block_truncate_page(inode->i_mapping, inode->i_size, + udf_get_block); udf_truncate_extents(inode); } @@ -996,6 +1082,7 @@ static void __udf_read_inode(struct inode *inode) struct buffer_head *bh = NULL; struct fileEntry *fe; uint16_t ident; + struct udf_inode_info *iinfo = UDF_I(inode); /* * Set defaults, but the inode is still incomplete! @@ -1009,7 +1096,7 @@ static void __udf_read_inode(struct inode *inode) * i_nlink = 1 * i_op = NULL; */ - bh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 0, &ident); + bh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 0, &ident); if (!bh) { printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n", inode->i_ino); @@ -1019,8 +1106,8 @@ static void __udf_read_inode(struct inode *inode) if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE && ident != TAG_IDENT_USE) { - printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed ident=%d\n", - inode->i_ino, ident); + printk(KERN_ERR "udf: udf_read_inode(ino %ld) " + "failed ident=%d\n", inode->i_ino, ident); brelse(bh); make_bad_inode(inode); return; @@ -1028,11 +1115,12 @@ static void __udf_read_inode(struct inode *inode) fe = (struct fileEntry *)bh->b_data; - if (le16_to_cpu(fe->icbTag.strategyType) == 4096) { + if (fe->icbTag.strategyType == cpu_to_le16(4096)) { struct buffer_head *ibh = NULL, *nbh = NULL; struct indirectEntry *ie; - ibh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 1, &ident); + ibh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 1, + &ident); if (ident == TAG_IDENT_IE) { if (ibh) { kernel_lb_addr loc; @@ -1041,10 +1129,12 @@ static void __udf_read_inode(struct inode *inode) loc = lelb_to_cpu(ie->indirectICB.extLocation); if (ie->indirectICB.extLength && - (nbh = udf_read_ptagged(inode->i_sb, loc, 0, &ident))) { + (nbh = udf_read_ptagged(inode->i_sb, loc, 0, + &ident))) { if (ident == TAG_IDENT_FE || ident == TAG_IDENT_EFE) { - memcpy(&UDF_I_LOCATION(inode), &loc, + memcpy(&iinfo->i_location, + &loc, sizeof(kernel_lb_addr)); brelse(bh); brelse(ibh); @@ -1062,7 +1152,7 @@ static void __udf_read_inode(struct inode *inode) } else { brelse(ibh); } - } else if (le16_to_cpu(fe->icbTag.strategyType) != 4) { + } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { printk(KERN_ERR "udf: unsupported strategy type: %d\n", le16_to_cpu(fe->icbTag.strategyType)); brelse(bh); @@ -1081,51 +1171,63 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) time_t convtime; long convtime_usec; int offset; + struct udf_sb_info *sbi = UDF_SB(inode->i_sb); + struct udf_inode_info *iinfo = UDF_I(inode); fe = (struct fileEntry *)bh->b_data; efe = (struct extendedFileEntry *)bh->b_data; - if (le16_to_cpu(fe->icbTag.strategyType) == 4) - UDF_I_STRAT4096(inode) = 0; - else /* if (le16_to_cpu(fe->icbTag.strategyType) == 4096) */ - UDF_I_STRAT4096(inode) = 1; - - UDF_I_ALLOCTYPE(inode) = le16_to_cpu(fe->icbTag.flags) & ICBTAG_FLAG_AD_MASK; - UDF_I_UNIQUE(inode) = 0; - UDF_I_LENEATTR(inode) = 0; - UDF_I_LENEXTENTS(inode) = 0; - UDF_I_LENALLOC(inode) = 0; - UDF_I_NEXT_ALLOC_BLOCK(inode) = 0; - UDF_I_NEXT_ALLOC_GOAL(inode) = 0; - if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_EFE) { - UDF_I_EFE(inode) = 1; - UDF_I_USE(inode) = 0; - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry))) { + if (fe->icbTag.strategyType == cpu_to_le16(4)) + iinfo->i_strat4096 = 0; + else /* if (fe->icbTag.strategyType == cpu_to_le16(4096)) */ + iinfo->i_strat4096 = 1; + + iinfo->i_alloc_type = le16_to_cpu(fe->icbTag.flags) & + ICBTAG_FLAG_AD_MASK; + iinfo->i_unique = 0; + iinfo->i_lenEAttr = 0; + iinfo->i_lenExtents = 0; + iinfo->i_lenAlloc = 0; + iinfo->i_next_alloc_block = 0; + iinfo->i_next_alloc_goal = 0; + if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) { + iinfo->i_efe = 1; + iinfo->i_use = 0; + if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry))) { make_bad_inode(inode); return; } - memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct extendedFileEntry), - inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)); - } else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_FE) { - UDF_I_EFE(inode) = 0; - UDF_I_USE(inode) = 0; - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct fileEntry))) { + memcpy(iinfo->i_ext.i_data, + bh->b_data + sizeof(struct extendedFileEntry), + inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry)); + } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { + iinfo->i_efe = 0; + iinfo->i_use = 0; + if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + sizeof(struct fileEntry))) { make_bad_inode(inode); return; } - memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct fileEntry), + memcpy(iinfo->i_ext.i_data, + bh->b_data + sizeof(struct fileEntry), inode->i_sb->s_blocksize - sizeof(struct fileEntry)); - } else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE) { - UDF_I_EFE(inode) = 0; - UDF_I_USE(inode) = 1; - UDF_I_LENALLOC(inode) = - le32_to_cpu(((struct unallocSpaceEntry *)bh->b_data)->lengthAllocDescs); - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry))) { + } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_USE)) { + iinfo->i_efe = 0; + iinfo->i_use = 1; + iinfo->i_lenAlloc = le32_to_cpu( + ((struct unallocSpaceEntry *)bh->b_data)-> + lengthAllocDescs); + if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + sizeof(struct unallocSpaceEntry))) { make_bad_inode(inode); return; } - memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct unallocSpaceEntry), - inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); + memcpy(iinfo->i_ext.i_data, + bh->b_data + sizeof(struct unallocSpaceEntry), + inode->i_sb->s_blocksize - + sizeof(struct unallocSpaceEntry)); return; } @@ -1146,12 +1248,12 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_nlink = 1; inode->i_size = le64_to_cpu(fe->informationLength); - UDF_I_LENEXTENTS(inode) = inode->i_size; + iinfo->i_lenExtents = inode->i_size; inode->i_mode = udf_convert_permissions(fe); inode->i_mode &= ~UDF_SB(inode->i_sb)->s_umask; - if (UDF_I_EFE(inode) == 0) { + if (iinfo->i_efe == 0) { inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << (inode->i_sb->s_blocksize_bits - 9); @@ -1160,7 +1262,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_atime.tv_sec = convtime; inode->i_atime.tv_nsec = convtime_usec * 1000; } else { - inode->i_atime = UDF_SB_RECORDTIME(inode->i_sb); + inode->i_atime = sbi->s_record_time; } if (udf_stamp_to_time(&convtime, &convtime_usec, @@ -1168,7 +1270,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_mtime.tv_sec = convtime; inode->i_mtime.tv_nsec = convtime_usec * 1000; } else { - inode->i_mtime = UDF_SB_RECORDTIME(inode->i_sb); + inode->i_mtime = sbi->s_record_time; } if (udf_stamp_to_time(&convtime, &convtime_usec, @@ -1176,13 +1278,13 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_ctime.tv_sec = convtime; inode->i_ctime.tv_nsec = convtime_usec * 1000; } else { - inode->i_ctime = UDF_SB_RECORDTIME(inode->i_sb); + inode->i_ctime = sbi->s_record_time; } - UDF_I_UNIQUE(inode) = le64_to_cpu(fe->uniqueID); - UDF_I_LENEATTR(inode) = le32_to_cpu(fe->lengthExtendedAttr); - UDF_I_LENALLOC(inode) = le32_to_cpu(fe->lengthAllocDescs); - offset = sizeof(struct fileEntry) + UDF_I_LENEATTR(inode); + iinfo->i_unique = le64_to_cpu(fe->uniqueID); + iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); + iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs); + offset = sizeof(struct fileEntry) + iinfo->i_lenEAttr; } else { inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << (inode->i_sb->s_blocksize_bits - 9); @@ -1192,7 +1294,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_atime.tv_sec = convtime; inode->i_atime.tv_nsec = convtime_usec * 1000; } else { - inode->i_atime = UDF_SB_RECORDTIME(inode->i_sb); + inode->i_atime = sbi->s_record_time; } if (udf_stamp_to_time(&convtime, &convtime_usec, @@ -1200,15 +1302,15 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_mtime.tv_sec = convtime; inode->i_mtime.tv_nsec = convtime_usec * 1000; } else { - inode->i_mtime = UDF_SB_RECORDTIME(inode->i_sb); + inode->i_mtime = sbi->s_record_time; } if (udf_stamp_to_time(&convtime, &convtime_usec, lets_to_cpu(efe->createTime))) { - UDF_I_CRTIME(inode).tv_sec = convtime; - UDF_I_CRTIME(inode).tv_nsec = convtime_usec * 1000; + iinfo->i_crtime.tv_sec = convtime; + iinfo->i_crtime.tv_nsec = convtime_usec * 1000; } else { - UDF_I_CRTIME(inode) = UDF_SB_RECORDTIME(inode->i_sb); + iinfo->i_crtime = sbi->s_record_time; } if (udf_stamp_to_time(&convtime, &convtime_usec, @@ -1216,13 +1318,14 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_ctime.tv_sec = convtime; inode->i_ctime.tv_nsec = convtime_usec * 1000; } else { - inode->i_ctime = UDF_SB_RECORDTIME(inode->i_sb); + inode->i_ctime = sbi->s_record_time; } - UDF_I_UNIQUE(inode) = le64_to_cpu(efe->uniqueID); - UDF_I_LENEATTR(inode) = le32_to_cpu(efe->lengthExtendedAttr); - UDF_I_LENALLOC(inode) = le32_to_cpu(efe->lengthAllocDescs); - offset = sizeof(struct extendedFileEntry) + UDF_I_LENEATTR(inode); + iinfo->i_unique = le64_to_cpu(efe->uniqueID); + iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); + iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); + offset = sizeof(struct extendedFileEntry) + + iinfo->i_lenEAttr; } switch (fe->icbTag.fileType) { @@ -1235,7 +1338,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) case ICBTAG_FILE_TYPE_REALTIME: case ICBTAG_FILE_TYPE_REGULAR: case ICBTAG_FILE_TYPE_UNDEF: - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) inode->i_data.a_ops = &udf_adinicb_aops; else inode->i_data.a_ops = &udf_aops; @@ -1261,31 +1364,33 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_mode = S_IFLNK | S_IRWXUGO; break; default: - printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown file type=%d\n", - inode->i_ino, fe->icbTag.fileType); + printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown " + "file type=%d\n", inode->i_ino, + fe->icbTag.fileType); make_bad_inode(inode); return; } if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - struct deviceSpec *dsea = (struct deviceSpec *)udf_get_extendedattr(inode, 12, 1); + struct deviceSpec *dsea = + (struct deviceSpec *)udf_get_extendedattr(inode, 12, 1); if (dsea) { init_special_inode(inode, inode->i_mode, - MKDEV(le32_to_cpu(dsea->majorDeviceIdent), - le32_to_cpu(dsea->minorDeviceIdent))); + MKDEV(le32_to_cpu(dsea->majorDeviceIdent), + le32_to_cpu(dsea->minorDeviceIdent))); /* Developer ID ??? */ - } else { + } else make_bad_inode(inode); - } } } static int udf_alloc_i_data(struct inode *inode, size_t size) { - UDF_I_DATA(inode) = kmalloc(size, GFP_KERNEL); + struct udf_inode_info *iinfo = UDF_I(inode); + iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL); - if (!UDF_I_DATA(inode)) { - printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) no free memory\n", - inode->i_ino); + if (!iinfo->i_ext.i_data) { + printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) " + "no free memory\n", inode->i_ino); return -ENOMEM; } @@ -1301,12 +1406,12 @@ static mode_t udf_convert_permissions(struct fileEntry *fe) permissions = le32_to_cpu(fe->permissions); flags = le16_to_cpu(fe->icbTag.flags); - mode = (( permissions ) & S_IRWXO) | - (( permissions >> 2 ) & S_IRWXG) | - (( permissions >> 4 ) & S_IRWXU) | - (( flags & ICBTAG_FLAG_SETUID) ? S_ISUID : 0) | - (( flags & ICBTAG_FLAG_SETGID) ? S_ISGID : 0) | - (( flags & ICBTAG_FLAG_STICKY) ? S_ISVTX : 0); + mode = ((permissions) & S_IRWXO) | + ((permissions >> 2) & S_IRWXG) | + ((permissions >> 4) & S_IRWXU) | + ((flags & ICBTAG_FLAG_SETUID) ? S_ISUID : 0) | + ((flags & ICBTAG_FLAG_SETGID) ? S_ISGID : 0) | + ((flags & ICBTAG_FLAG_STICKY) ? S_ISVTX : 0); return mode; } @@ -1350,11 +1455,15 @@ static int udf_update_inode(struct inode *inode, int do_sync) uint32_t udfperms; uint16_t icbflags; uint16_t crclen; - int i; kernel_timestamp cpu_time; int err = 0; + struct udf_sb_info *sbi = UDF_SB(inode->i_sb); + unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; + struct udf_inode_info *iinfo = UDF_I(inode); - bh = udf_tread(inode->i_sb, udf_get_lb_pblock(inode->i_sb, UDF_I_LOCATION(inode), 0)); + bh = udf_tread(inode->i_sb, + udf_get_lb_pblock(inode->i_sb, + iinfo->i_location, 0)); if (!bh) { udf_debug("bread failure\n"); return -EIO; @@ -1365,23 +1474,24 @@ static int udf_update_inode(struct inode *inode, int do_sync) fe = (struct fileEntry *)bh->b_data; efe = (struct extendedFileEntry *)bh->b_data; - if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE) { + if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_USE)) { struct unallocSpaceEntry *use = (struct unallocSpaceEntry *)bh->b_data; - use->lengthAllocDescs = cpu_to_le32(UDF_I_LENALLOC(inode)); - memcpy(bh->b_data + sizeof(struct unallocSpaceEntry), UDF_I_DATA(inode), - inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); - crclen = sizeof(struct unallocSpaceEntry) + UDF_I_LENALLOC(inode) - sizeof(tag); - use->descTag.tagLocation = cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum); + use->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc); + memcpy(bh->b_data + sizeof(struct unallocSpaceEntry), + iinfo->i_ext.i_data, inode->i_sb->s_blocksize - + sizeof(struct unallocSpaceEntry)); + crclen = sizeof(struct unallocSpaceEntry) + + iinfo->i_lenAlloc - sizeof(tag); + use->descTag.tagLocation = cpu_to_le32( + iinfo->i_location. + logicalBlockNum); use->descTag.descCRCLength = cpu_to_le16(crclen); - use->descTag.descCRC = cpu_to_le16(udf_crc((char *)use + sizeof(tag), crclen, 0)); - - use->descTag.tagChecksum = 0; - for (i = 0; i < 16; i++) { - if (i != 4) - use->descTag.tagChecksum += ((uint8_t *)&(use->descTag))[i]; - } + use->descTag.descCRC = cpu_to_le16(udf_crc((char *)use + + sizeof(tag), crclen, + 0)); + use->descTag.tagChecksum = udf_tag_checksum(&use->descTag); mark_buffer_dirty(bh); brelse(bh); @@ -1398,14 +1508,14 @@ static int udf_update_inode(struct inode *inode, int do_sync) else fe->gid = cpu_to_le32(inode->i_gid); - udfperms = ((inode->i_mode & S_IRWXO) ) | - ((inode->i_mode & S_IRWXG) << 2) | - ((inode->i_mode & S_IRWXU) << 4); + udfperms = ((inode->i_mode & S_IRWXO)) | + ((inode->i_mode & S_IRWXG) << 2) | + ((inode->i_mode & S_IRWXU) << 4); - udfperms |= (le32_to_cpu(fe->permissions) & - (FE_PERM_O_DELETE | FE_PERM_O_CHATTR | - FE_PERM_G_DELETE | FE_PERM_G_CHATTR | - FE_PERM_U_DELETE | FE_PERM_U_CHATTR)); + udfperms |= (le32_to_cpu(fe->permissions) & + (FE_PERM_O_DELETE | FE_PERM_O_CHATTR | + FE_PERM_G_DELETE | FE_PERM_G_CHATTR | + FE_PERM_U_DELETE | FE_PERM_U_CHATTR)); fe->permissions = cpu_to_le32(udfperms); if (S_ISDIR(inode->i_mode)) @@ -1426,8 +1536,9 @@ static int udf_update_inode(struct inode *inode, int do_sync) sizeof(regid), 12, 0x3); dsea->attrType = cpu_to_le32(12); dsea->attrSubtype = 1; - dsea->attrLength = cpu_to_le32(sizeof(struct deviceSpec) + - sizeof(regid)); + dsea->attrLength = cpu_to_le32( + sizeof(struct deviceSpec) + + sizeof(regid)); dsea->impUseLength = cpu_to_le32(sizeof(regid)); } eid = (regid *)dsea->impUse; @@ -1439,12 +1550,13 @@ static int udf_update_inode(struct inode *inode, int do_sync) dsea->minorDeviceIdent = cpu_to_le32(iminor(inode)); } - if (UDF_I_EFE(inode) == 0) { - memcpy(bh->b_data + sizeof(struct fileEntry), UDF_I_DATA(inode), + if (iinfo->i_efe == 0) { + memcpy(bh->b_data + sizeof(struct fileEntry), + iinfo->i_ext.i_data, inode->i_sb->s_blocksize - sizeof(struct fileEntry)); fe->logicalBlocksRecorded = cpu_to_le64( - (inode->i_blocks + (1 << (inode->i_sb->s_blocksize_bits - 9)) - 1) >> - (inode->i_sb->s_blocksize_bits - 9)); + (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >> + (blocksize_bits - 9)); if (udf_time_to_stamp(&cpu_time, inode->i_atime)) fe->accessTime = cpu_to_lets(cpu_time); @@ -1456,40 +1568,41 @@ static int udf_update_inode(struct inode *inode, int do_sync) strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER); fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; fe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; - fe->uniqueID = cpu_to_le64(UDF_I_UNIQUE(inode)); - fe->lengthExtendedAttr = cpu_to_le32(UDF_I_LENEATTR(inode)); - fe->lengthAllocDescs = cpu_to_le32(UDF_I_LENALLOC(inode)); + fe->uniqueID = cpu_to_le64(iinfo->i_unique); + fe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr); + fe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc); fe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_FE); crclen = sizeof(struct fileEntry); } else { - memcpy(bh->b_data + sizeof(struct extendedFileEntry), UDF_I_DATA(inode), - inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)); + memcpy(bh->b_data + sizeof(struct extendedFileEntry), + iinfo->i_ext.i_data, + inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry)); efe->objectSize = cpu_to_le64(inode->i_size); efe->logicalBlocksRecorded = cpu_to_le64( - (inode->i_blocks + (1 << (inode->i_sb->s_blocksize_bits - 9)) - 1) >> - (inode->i_sb->s_blocksize_bits - 9)); + (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >> + (blocksize_bits - 9)); - if (UDF_I_CRTIME(inode).tv_sec > inode->i_atime.tv_sec || - (UDF_I_CRTIME(inode).tv_sec == inode->i_atime.tv_sec && - UDF_I_CRTIME(inode).tv_nsec > inode->i_atime.tv_nsec)) { - UDF_I_CRTIME(inode) = inode->i_atime; - } - if (UDF_I_CRTIME(inode).tv_sec > inode->i_mtime.tv_sec || - (UDF_I_CRTIME(inode).tv_sec == inode->i_mtime.tv_sec && - UDF_I_CRTIME(inode).tv_nsec > inode->i_mtime.tv_nsec)) { - UDF_I_CRTIME(inode) = inode->i_mtime; - } - if (UDF_I_CRTIME(inode).tv_sec > inode->i_ctime.tv_sec || - (UDF_I_CRTIME(inode).tv_sec == inode->i_ctime.tv_sec && - UDF_I_CRTIME(inode).tv_nsec > inode->i_ctime.tv_nsec)) { - UDF_I_CRTIME(inode) = inode->i_ctime; - } + if (iinfo->i_crtime.tv_sec > inode->i_atime.tv_sec || + (iinfo->i_crtime.tv_sec == inode->i_atime.tv_sec && + iinfo->i_crtime.tv_nsec > inode->i_atime.tv_nsec)) + iinfo->i_crtime = inode->i_atime; + + if (iinfo->i_crtime.tv_sec > inode->i_mtime.tv_sec || + (iinfo->i_crtime.tv_sec == inode->i_mtime.tv_sec && + iinfo->i_crtime.tv_nsec > inode->i_mtime.tv_nsec)) + iinfo->i_crtime = inode->i_mtime; + + if (iinfo->i_crtime.tv_sec > inode->i_ctime.tv_sec || + (iinfo->i_crtime.tv_sec == inode->i_ctime.tv_sec && + iinfo->i_crtime.tv_nsec > inode->i_ctime.tv_nsec)) + iinfo->i_crtime = inode->i_ctime; if (udf_time_to_stamp(&cpu_time, inode->i_atime)) efe->accessTime = cpu_to_lets(cpu_time); if (udf_time_to_stamp(&cpu_time, inode->i_mtime)) efe->modificationTime = cpu_to_lets(cpu_time); - if (udf_time_to_stamp(&cpu_time, UDF_I_CRTIME(inode))) + if (udf_time_to_stamp(&cpu_time, iinfo->i_crtime)) efe->createTime = cpu_to_lets(cpu_time); if (udf_time_to_stamp(&cpu_time, inode->i_ctime)) efe->attrTime = cpu_to_lets(cpu_time); @@ -1498,13 +1611,13 @@ static int udf_update_inode(struct inode *inode, int do_sync) strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER); efe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; efe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; - efe->uniqueID = cpu_to_le64(UDF_I_UNIQUE(inode)); - efe->lengthExtendedAttr = cpu_to_le32(UDF_I_LENEATTR(inode)); - efe->lengthAllocDescs = cpu_to_le32(UDF_I_LENALLOC(inode)); + efe->uniqueID = cpu_to_le64(iinfo->i_unique); + efe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr); + efe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc); efe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_EFE); crclen = sizeof(struct extendedFileEntry); } - if (UDF_I_STRAT4096(inode)) { + if (iinfo->i_strat4096) { fe->icbTag.strategyType = cpu_to_le16(4096); fe->icbTag.strategyParameter = cpu_to_le16(1); fe->icbTag.numEntries = cpu_to_le16(2); @@ -1528,7 +1641,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) else if (S_ISSOCK(inode->i_mode)) fe->icbTag.fileType = ICBTAG_FILE_TYPE_SOCKET; - icbflags = UDF_I_ALLOCTYPE(inode) | + icbflags = iinfo->i_alloc_type | ((inode->i_mode & S_ISUID) ? ICBTAG_FLAG_SETUID : 0) | ((inode->i_mode & S_ISGID) ? ICBTAG_FLAG_SETGID : 0) | ((inode->i_mode & S_ISVTX) ? ICBTAG_FLAG_STICKY : 0) | @@ -1537,29 +1650,28 @@ static int udf_update_inode(struct inode *inode, int do_sync) ICBTAG_FLAG_SETGID | ICBTAG_FLAG_STICKY)); fe->icbTag.flags = cpu_to_le16(icbflags); - if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200) + if (sbi->s_udfrev >= 0x0200) fe->descTag.descVersion = cpu_to_le16(3); else fe->descTag.descVersion = cpu_to_le16(2); - fe->descTag.tagSerialNum = cpu_to_le16(UDF_SB_SERIALNUM(inode->i_sb)); - fe->descTag.tagLocation = cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum); - crclen += UDF_I_LENEATTR(inode) + UDF_I_LENALLOC(inode) - sizeof(tag); + fe->descTag.tagSerialNum = cpu_to_le16(sbi->s_serial_number); + fe->descTag.tagLocation = cpu_to_le32( + iinfo->i_location.logicalBlockNum); + crclen += iinfo->i_lenEAttr + iinfo->i_lenAlloc - + sizeof(tag); fe->descTag.descCRCLength = cpu_to_le16(crclen); - fe->descTag.descCRC = cpu_to_le16(udf_crc((char *)fe + sizeof(tag), crclen, 0)); - - fe->descTag.tagChecksum = 0; - for (i = 0; i < 16; i++) { - if (i != 4) - fe->descTag.tagChecksum += ((uint8_t *)&(fe->descTag))[i]; - } + fe->descTag.descCRC = cpu_to_le16(udf_crc((char *)fe + sizeof(tag), + crclen, 0)); + fe->descTag.tagChecksum = udf_tag_checksum(&fe->descTag); /* write the data blocks */ mark_buffer_dirty(bh); if (do_sync) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { - printk("IO error syncing udf inode [%s:%08lx]\n", - inode->i_sb->s_id, inode->i_ino); + printk(KERN_WARNING "IO error syncing udf inode " + "[%s:%08lx]\n", inode->i_sb->s_id, + inode->i_ino); err = -EIO; } } @@ -1577,7 +1689,7 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino) return NULL; if (inode->i_state & I_NEW) { - memcpy(&UDF_I_LOCATION(inode), &ino, sizeof(kernel_lb_addr)); + memcpy(&UDF_I(inode)->i_location, &ino, sizeof(kernel_lb_addr)); __udf_read_inode(inode); unlock_new_inode(inode); } @@ -1585,7 +1697,8 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino) if (is_bad_inode(inode)) goto out_iput; - if (ino.logicalBlockNum >= UDF_SB_PARTLEN(sb, ino.partitionReferenceNum)) { + if (ino.logicalBlockNum >= UDF_SB(sb)-> + s_partmaps[ino.partitionReferenceNum].s_partition_len) { udf_debug("block=%d, partition=%d out of range\n", ino.logicalBlockNum, ino.partitionReferenceNum); make_bad_inode(inode); @@ -1599,7 +1712,7 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino) return NULL; } -int8_t udf_add_aext(struct inode * inode, struct extent_position * epos, +int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, kernel_lb_addr eloc, uint32_t elen, int inc) { int adsize; @@ -1608,15 +1721,18 @@ int8_t udf_add_aext(struct inode * inode, struct extent_position * epos, struct allocExtDesc *aed; int8_t etype; uint8_t *ptr; + struct udf_inode_info *iinfo = UDF_I(inode); if (!epos->bh) - ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); + ptr = iinfo->i_ext.i_data + epos->offset - + udf_file_entry_alloc_offset(inode) + + iinfo->i_lenEAttr; else ptr = epos->bh->b_data + epos->offset; - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) + else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); else return -1; @@ -1627,15 +1743,16 @@ int8_t udf_add_aext(struct inode * inode, struct extent_position * epos, int err, loffset; kernel_lb_addr obloc = epos->block; - if (!(epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL, - obloc.partitionReferenceNum, - obloc.logicalBlockNum, &err))) { + epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL, + obloc.partitionReferenceNum, + obloc.logicalBlockNum, &err); + if (!epos->block.logicalBlockNum) return -1; - } - if (!(nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb, - epos->block, 0)))) { + nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb, + epos->block, + 0)); + if (!nbh) return -1; - } lock_buffer(nbh); memset(nbh->b_data, 0x00, inode->i_sb->s_blocksize); set_buffer_uptodate(nbh); @@ -1644,7 +1761,8 @@ int8_t udf_add_aext(struct inode * inode, struct extent_position * epos, aed = (struct allocExtDesc *)(nbh->b_data); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT)) - aed->previousAllocExtLocation = cpu_to_le32(obloc.logicalBlockNum); + aed->previousAllocExtLocation = + cpu_to_le32(obloc.logicalBlockNum); if (epos->offset + adsize > inode->i_sb->s_blocksize) { loffset = epos->offset; aed->lengthAllocDescs = cpu_to_le32(adsize); @@ -1661,24 +1779,26 @@ int8_t udf_add_aext(struct inode * inode, struct extent_position * epos, if (epos->bh) { aed = (struct allocExtDesc *)epos->bh->b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); + cpu_to_le32(le32_to_cpu( + aed->lengthAllocDescs) + adsize); } else { - UDF_I_LENALLOC(inode) += adsize; + iinfo->i_lenAlloc += adsize; mark_inode_dirty(inode); } } - if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200) + if (UDF_SB(inode->i_sb)->s_udfrev >= 0x0200) udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1, epos->block.logicalBlockNum, sizeof(tag)); else udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1, epos->block.logicalBlockNum, sizeof(tag)); - switch (UDF_I_ALLOCTYPE(inode)) { + switch (iinfo->i_alloc_type) { case ICBTAG_FLAG_AD_SHORT: sad = (short_ad *)sptr; sad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS | inode->i_sb->s_blocksize); - sad->extPosition = cpu_to_le32(epos->block.logicalBlockNum); + sad->extPosition = + cpu_to_le32(epos->block.logicalBlockNum); break; case ICBTAG_FLAG_AD_LONG: lad = (long_ad *)sptr; @@ -1690,10 +1810,11 @@ int8_t udf_add_aext(struct inode * inode, struct extent_position * epos, } if (epos->bh) { if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || - UDF_SB_UDFREV(inode->i_sb) >= 0x0201) + UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) udf_update_tag(epos->bh->b_data, loffset); else - udf_update_tag(epos->bh->b_data, sizeof(struct allocExtDesc)); + udf_update_tag(epos->bh->b_data, + sizeof(struct allocExtDesc)); mark_buffer_dirty_inode(epos->bh, inode); brelse(epos->bh); } else { @@ -1705,36 +1826,43 @@ int8_t udf_add_aext(struct inode * inode, struct extent_position * epos, etype = udf_write_aext(inode, epos, eloc, elen, inc); if (!epos->bh) { - UDF_I_LENALLOC(inode) += adsize; + iinfo->i_lenAlloc += adsize; mark_inode_dirty(inode); } else { aed = (struct allocExtDesc *)epos->bh->b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); - if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag(epos->bh->b_data, epos->offset + (inc ? 0 : adsize)); + cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + + adsize); + if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || + UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) + udf_update_tag(epos->bh->b_data, + epos->offset + (inc ? 0 : adsize)); else - udf_update_tag(epos->bh->b_data, sizeof(struct allocExtDesc)); + udf_update_tag(epos->bh->b_data, + sizeof(struct allocExtDesc)); mark_buffer_dirty_inode(epos->bh, inode); } return etype; } -int8_t udf_write_aext(struct inode * inode, struct extent_position * epos, +int8_t udf_write_aext(struct inode *inode, struct extent_position *epos, kernel_lb_addr eloc, uint32_t elen, int inc) { int adsize; uint8_t *ptr; short_ad *sad; long_ad *lad; + struct udf_inode_info *iinfo = UDF_I(inode); if (!epos->bh) - ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); + ptr = iinfo->i_ext.i_data + epos->offset - + udf_file_entry_alloc_offset(inode) + + iinfo->i_lenEAttr; else ptr = epos->bh->b_data + epos->offset; - switch (UDF_I_ALLOCTYPE(inode)) { + switch (iinfo->i_alloc_type) { case ICBTAG_FLAG_AD_SHORT: sad = (short_ad *)ptr; sad->extLength = cpu_to_le32(elen); @@ -1754,10 +1882,12 @@ int8_t udf_write_aext(struct inode * inode, struct extent_position * epos, if (epos->bh) { if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || - UDF_SB_UDFREV(inode->i_sb) >= 0x0201) { - struct allocExtDesc *aed = (struct allocExtDesc *)epos->bh->b_data; + UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) { + struct allocExtDesc *aed = + (struct allocExtDesc *)epos->bh->b_data; udf_update_tag(epos->bh->b_data, - le32_to_cpu(aed->lengthAllocDescs) + sizeof(struct allocExtDesc)); + le32_to_cpu(aed->lengthAllocDescs) + + sizeof(struct allocExtDesc)); } mark_buffer_dirty_inode(epos->bh, inode); } else { @@ -1770,19 +1900,21 @@ int8_t udf_write_aext(struct inode * inode, struct extent_position * epos, return (elen >> 30); } -int8_t udf_next_aext(struct inode * inode, struct extent_position * epos, - kernel_lb_addr * eloc, uint32_t * elen, int inc) +int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, + kernel_lb_addr *eloc, uint32_t *elen, int inc) { int8_t etype; while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { + int block; epos->block = *eloc; epos->offset = sizeof(struct allocExtDesc); brelse(epos->bh); - if (!(epos->bh = udf_tread(inode->i_sb, udf_get_lb_pblock(inode->i_sb, epos->block, 0)))) { - udf_debug("reading block %d failed!\n", - udf_get_lb_pblock(inode->i_sb, epos->block, 0)); + block = udf_get_lb_pblock(inode->i_sb, epos->block, 0); + epos->bh = udf_tread(inode->i_sb, block); + if (!epos->bh) { + udf_debug("reading block %d failed!\n", block); return -1; } } @@ -1790,47 +1922,55 @@ int8_t udf_next_aext(struct inode * inode, struct extent_position * epos, return etype; } -int8_t udf_current_aext(struct inode * inode, struct extent_position * epos, - kernel_lb_addr * eloc, uint32_t * elen, int inc) +int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, + kernel_lb_addr *eloc, uint32_t *elen, int inc) { int alen; int8_t etype; uint8_t *ptr; short_ad *sad; long_ad *lad; - + struct udf_inode_info *iinfo = UDF_I(inode); if (!epos->bh) { if (!epos->offset) epos->offset = udf_file_entry_alloc_offset(inode); - ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); - alen = udf_file_entry_alloc_offset(inode) + UDF_I_LENALLOC(inode); + ptr = iinfo->i_ext.i_data + epos->offset - + udf_file_entry_alloc_offset(inode) + + iinfo->i_lenEAttr; + alen = udf_file_entry_alloc_offset(inode) + + iinfo->i_lenAlloc; } else { if (!epos->offset) epos->offset = sizeof(struct allocExtDesc); ptr = epos->bh->b_data + epos->offset; alen = sizeof(struct allocExtDesc) + - le32_to_cpu(((struct allocExtDesc *)epos->bh->b_data)->lengthAllocDescs); + le32_to_cpu(((struct allocExtDesc *)epos->bh->b_data)-> + lengthAllocDescs); } - switch (UDF_I_ALLOCTYPE(inode)) { + switch (iinfo->i_alloc_type) { case ICBTAG_FLAG_AD_SHORT: - if (!(sad = udf_get_fileshortad(ptr, alen, &epos->offset, inc))) + sad = udf_get_fileshortad(ptr, alen, &epos->offset, inc); + if (!sad) return -1; etype = le32_to_cpu(sad->extLength) >> 30; eloc->logicalBlockNum = le32_to_cpu(sad->extPosition); - eloc->partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum; + eloc->partitionReferenceNum = + iinfo->i_location.partitionReferenceNum; *elen = le32_to_cpu(sad->extLength) & UDF_EXTENT_LENGTH_MASK; break; case ICBTAG_FLAG_AD_LONG: - if (!(lad = udf_get_filelongad(ptr, alen, &epos->offset, inc))) + lad = udf_get_filelongad(ptr, alen, &epos->offset, inc); + if (!lad) return -1; etype = le32_to_cpu(lad->extLength) >> 30; *eloc = lelb_to_cpu(lad->extLocation); *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK; break; default: - udf_debug("alloc_type = %d unsupported\n", UDF_I_ALLOCTYPE(inode)); + udf_debug("alloc_type = %d unsupported\n", + iinfo->i_alloc_type); return -1; } @@ -1858,22 +1998,24 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos, return (nelen >> 30); } -int8_t udf_delete_aext(struct inode * inode, struct extent_position epos, +int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, kernel_lb_addr eloc, uint32_t elen) { struct extent_position oepos; int adsize; int8_t etype; struct allocExtDesc *aed; + struct udf_inode_info *iinfo; if (epos.bh) { get_bh(epos.bh); get_bh(epos.bh); } - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) + iinfo = UDF_I(inode); + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) + else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); else adsize = 0; @@ -1900,33 +2042,39 @@ int8_t udf_delete_aext(struct inode * inode, struct extent_position epos, udf_write_aext(inode, &oepos, eloc, elen, 1); udf_write_aext(inode, &oepos, eloc, elen, 1); if (!oepos.bh) { - UDF_I_LENALLOC(inode) -= (adsize * 2); + iinfo->i_lenAlloc -= (adsize * 2); mark_inode_dirty(inode); } else { aed = (struct allocExtDesc *)oepos.bh->b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - (2 * adsize)); + cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - + (2 * adsize)); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || - UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag(oepos.bh->b_data, oepos.offset - (2 * adsize)); + UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) + udf_update_tag(oepos.bh->b_data, + oepos.offset - (2 * adsize)); else - udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc)); + udf_update_tag(oepos.bh->b_data, + sizeof(struct allocExtDesc)); mark_buffer_dirty_inode(oepos.bh, inode); } } else { udf_write_aext(inode, &oepos, eloc, elen, 1); if (!oepos.bh) { - UDF_I_LENALLOC(inode) -= adsize; + iinfo->i_lenAlloc -= adsize; mark_inode_dirty(inode); } else { aed = (struct allocExtDesc *)oepos.bh->b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - adsize); + cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - + adsize); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || - UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag(oepos.bh->b_data, epos.offset - adsize); + UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) + udf_update_tag(oepos.bh->b_data, + epos.offset - adsize); else - udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc)); + udf_update_tag(oepos.bh->b_data, + sizeof(struct allocExtDesc)); mark_buffer_dirty_inode(oepos.bh, inode); } } @@ -1937,34 +2085,38 @@ int8_t udf_delete_aext(struct inode * inode, struct extent_position epos, return (elen >> 30); } -int8_t inode_bmap(struct inode * inode, sector_t block, - struct extent_position * pos, kernel_lb_addr * eloc, - uint32_t * elen, sector_t * offset) +int8_t inode_bmap(struct inode *inode, sector_t block, + struct extent_position *pos, kernel_lb_addr *eloc, + uint32_t *elen, sector_t *offset) { + unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; loff_t lbcount = 0, bcount = - (loff_t) block << inode->i_sb->s_blocksize_bits; + (loff_t) block << blocksize_bits; int8_t etype; + struct udf_inode_info *iinfo; if (block < 0) { printk(KERN_ERR "udf: inode_bmap: block < 0\n"); return -1; } + iinfo = UDF_I(inode); pos->offset = 0; - pos->block = UDF_I_LOCATION(inode); + pos->block = iinfo->i_location; pos->bh = NULL; *elen = 0; do { - if ((etype = udf_next_aext(inode, pos, eloc, elen, 1)) == -1) { - *offset = (bcount - lbcount) >> inode->i_sb->s_blocksize_bits; - UDF_I_LENEXTENTS(inode) = lbcount; + etype = udf_next_aext(inode, pos, eloc, elen, 1); + if (etype == -1) { + *offset = (bcount - lbcount) >> blocksize_bits; + iinfo->i_lenExtents = lbcount; return -1; } lbcount += *elen; } while (lbcount <= bcount); - *offset = (bcount + *elen - lbcount) >> inode->i_sb->s_blocksize_bits; + *offset = (bcount + *elen - lbcount) >> blocksize_bits; return etype; } @@ -1979,7 +2131,8 @@ long udf_block_map(struct inode *inode, sector_t block) lock_kernel(); - if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) + if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) == + (EXT_RECORDED_ALLOCATED >> 30)) ret = udf_get_lb_pblock(inode->i_sb, eloc, offset); else ret = 0; diff --git a/fs/udf/misc.c b/fs/udf/misc.c index 15297deb5051..a1d6da0caf71 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -51,18 +51,18 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size, uint8_t *ea = NULL, *ad = NULL; int offset; uint16_t crclen; - int i; + struct udf_inode_info *iinfo = UDF_I(inode); - ea = UDF_I_DATA(inode); - if (UDF_I_LENEATTR(inode)) { - ad = UDF_I_DATA(inode) + UDF_I_LENEATTR(inode); + ea = iinfo->i_ext.i_data; + if (iinfo->i_lenEAttr) { + ad = iinfo->i_ext.i_data + iinfo->i_lenEAttr; } else { ad = ea; size += sizeof(struct extendedAttrHeaderDesc); } offset = inode->i_sb->s_blocksize - udf_file_entry_alloc_offset(inode) - - UDF_I_LENALLOC(inode); + iinfo->i_lenAlloc; /* TODO - Check for FreeEASpace */ @@ -70,69 +70,80 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size, struct extendedAttrHeaderDesc *eahd; eahd = (struct extendedAttrHeaderDesc *)ea; - if (UDF_I_LENALLOC(inode)) { - memmove(&ad[size], ad, UDF_I_LENALLOC(inode)); - } + if (iinfo->i_lenAlloc) + memmove(&ad[size], ad, iinfo->i_lenAlloc); - if (UDF_I_LENEATTR(inode)) { + if (iinfo->i_lenEAttr) { /* check checksum/crc */ - if (le16_to_cpu(eahd->descTag.tagIdent) != TAG_IDENT_EAHD || - le32_to_cpu(eahd->descTag.tagLocation) != UDF_I_LOCATION(inode).logicalBlockNum) { + if (eahd->descTag.tagIdent != + cpu_to_le16(TAG_IDENT_EAHD) || + le32_to_cpu(eahd->descTag.tagLocation) != + iinfo->i_location.logicalBlockNum) return NULL; - } } else { + struct udf_sb_info *sbi = UDF_SB(inode->i_sb); + size -= sizeof(struct extendedAttrHeaderDesc); - UDF_I_LENEATTR(inode) += sizeof(struct extendedAttrHeaderDesc); + iinfo->i_lenEAttr += + sizeof(struct extendedAttrHeaderDesc); eahd->descTag.tagIdent = cpu_to_le16(TAG_IDENT_EAHD); - if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200) + if (sbi->s_udfrev >= 0x0200) eahd->descTag.descVersion = cpu_to_le16(3); else eahd->descTag.descVersion = cpu_to_le16(2); - eahd->descTag.tagSerialNum = cpu_to_le16(UDF_SB_SERIALNUM(inode->i_sb)); - eahd->descTag.tagLocation = cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum); + eahd->descTag.tagSerialNum = + cpu_to_le16(sbi->s_serial_number); + eahd->descTag.tagLocation = cpu_to_le32( + iinfo->i_location.logicalBlockNum); eahd->impAttrLocation = cpu_to_le32(0xFFFFFFFF); eahd->appAttrLocation = cpu_to_le32(0xFFFFFFFF); } - offset = UDF_I_LENEATTR(inode); + offset = iinfo->i_lenEAttr; if (type < 2048) { - if (le32_to_cpu(eahd->appAttrLocation) < UDF_I_LENEATTR(inode)) { - uint32_t aal = le32_to_cpu(eahd->appAttrLocation); + if (le32_to_cpu(eahd->appAttrLocation) < + iinfo->i_lenEAttr) { + uint32_t aal = + le32_to_cpu(eahd->appAttrLocation); memmove(&ea[offset - aal + size], &ea[aal], offset - aal); offset -= aal; - eahd->appAttrLocation = cpu_to_le32(aal + size); + eahd->appAttrLocation = + cpu_to_le32(aal + size); } - if (le32_to_cpu(eahd->impAttrLocation) < UDF_I_LENEATTR(inode)) { - uint32_t ial = le32_to_cpu(eahd->impAttrLocation); + if (le32_to_cpu(eahd->impAttrLocation) < + iinfo->i_lenEAttr) { + uint32_t ial = + le32_to_cpu(eahd->impAttrLocation); memmove(&ea[offset - ial + size], &ea[ial], offset - ial); offset -= ial; - eahd->impAttrLocation = cpu_to_le32(ial + size); + eahd->impAttrLocation = + cpu_to_le32(ial + size); } } else if (type < 65536) { - if (le32_to_cpu(eahd->appAttrLocation) < UDF_I_LENEATTR(inode)) { - uint32_t aal = le32_to_cpu(eahd->appAttrLocation); + if (le32_to_cpu(eahd->appAttrLocation) < + iinfo->i_lenEAttr) { + uint32_t aal = + le32_to_cpu(eahd->appAttrLocation); memmove(&ea[offset - aal + size], &ea[aal], offset - aal); offset -= aal; - eahd->appAttrLocation = cpu_to_le32(aal + size); + eahd->appAttrLocation = + cpu_to_le32(aal + size); } } /* rewrite CRC + checksum of eahd */ crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(tag); eahd->descTag.descCRCLength = cpu_to_le16(crclen); eahd->descTag.descCRC = cpu_to_le16(udf_crc((char *)eahd + - sizeof(tag), crclen, 0)); - eahd->descTag.tagChecksum = 0; - for (i = 0; i < 16; i++) - if (i != 4) - eahd->descTag.tagChecksum += ((uint8_t *)&(eahd->descTag))[i]; - UDF_I_LENEATTR(inode) += size; + sizeof(tag), crclen, 0)); + eahd->descTag.tagChecksum = udf_tag_checksum(&eahd->descTag); + iinfo->i_lenEAttr += size; return (struct genericFormat *)&ea[offset]; } - if (loc & 0x02) { - } + if (loc & 0x02) + ; return NULL; } @@ -143,18 +154,20 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type, struct genericFormat *gaf; uint8_t *ea = NULL; uint32_t offset; + struct udf_inode_info *iinfo = UDF_I(inode); - ea = UDF_I_DATA(inode); + ea = iinfo->i_ext.i_data; - if (UDF_I_LENEATTR(inode)) { + if (iinfo->i_lenEAttr) { struct extendedAttrHeaderDesc *eahd; eahd = (struct extendedAttrHeaderDesc *)ea; /* check checksum/crc */ - if (le16_to_cpu(eahd->descTag.tagIdent) != TAG_IDENT_EAHD || - le32_to_cpu(eahd->descTag.tagLocation) != UDF_I_LOCATION(inode).logicalBlockNum) { + if (eahd->descTag.tagIdent != + cpu_to_le16(TAG_IDENT_EAHD) || + le32_to_cpu(eahd->descTag.tagLocation) != + iinfo->i_location.logicalBlockNum) return NULL; - } if (type < 2048) offset = sizeof(struct extendedAttrHeaderDesc); @@ -163,9 +176,10 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type, else offset = le32_to_cpu(eahd->appAttrLocation); - while (offset < UDF_I_LENEATTR(inode)) { + while (offset < iinfo->i_lenEAttr) { gaf = (struct genericFormat *)&ea[offset]; - if (le32_to_cpu(gaf->attrType) == type && gaf->attrSubtype == subtype) + if (le32_to_cpu(gaf->attrType) == type && + gaf->attrSubtype == subtype) return gaf; else offset += le32_to_cpu(gaf->attrLength); @@ -186,21 +200,20 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type, * Written, tested, and released. */ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, - uint32_t location, uint16_t * ident) + uint32_t location, uint16_t *ident) { tag *tag_p; struct buffer_head *bh = NULL; - register uint8_t checksum; - register int i; + struct udf_sb_info *sbi = UDF_SB(sb); /* Read the block */ if (block == 0xFFFFFFFF) return NULL; - bh = udf_tread(sb, block + UDF_SB_SESSION(sb)); + bh = udf_tread(sb, block + sbi->s_session); if (!bh) { udf_debug("block=%d, location=%d: read failed\n", - block + UDF_SB_SESSION(sb), location); + block + sbi->s_session, location); return NULL; } @@ -210,24 +223,20 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, if (location != le32_to_cpu(tag_p->tagLocation)) { udf_debug("location mismatch block %u, tag %u != %u\n", - block + UDF_SB_SESSION(sb), le32_to_cpu(tag_p->tagLocation), location); + block + sbi->s_session, + le32_to_cpu(tag_p->tagLocation), location); goto error_out; } /* Verify the tag checksum */ - checksum = 0U; - for (i = 0; i < 4; i++) - checksum += (uint8_t)(bh->b_data[i]); - for (i = 5; i < 16; i++) - checksum += (uint8_t)(bh->b_data[i]); - if (checksum != tag_p->tagChecksum) { + if (udf_tag_checksum(tag_p) != tag_p->tagChecksum) { printk(KERN_ERR "udf: tag checksum failed block %d\n", block); goto error_out; } /* Verify the tag version */ - if (le16_to_cpu(tag_p->descVersion) != 0x0002U && - le16_to_cpu(tag_p->descVersion) != 0x0003U) { + if (tag_p->descVersion != cpu_to_le16(0x0002U) && + tag_p->descVersion != cpu_to_le16(0x0003U)) { udf_debug("tag version 0x%04x != 0x0002 || 0x0003 block %d\n", le16_to_cpu(tag_p->descVersion), block); goto error_out; @@ -236,11 +245,11 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, /* Verify the descriptor CRC */ if (le16_to_cpu(tag_p->descCRCLength) + sizeof(tag) > sb->s_blocksize || le16_to_cpu(tag_p->descCRC) == udf_crc(bh->b_data + sizeof(tag), - le16_to_cpu(tag_p->descCRCLength), 0)) { + le16_to_cpu(tag_p->descCRCLength), 0)) return bh; - } + udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", - block + UDF_SB_SESSION(sb), le16_to_cpu(tag_p->descCRC), + block + sbi->s_session, le16_to_cpu(tag_p->descCRC), le16_to_cpu(tag_p->descCRCLength)); error_out: @@ -249,7 +258,7 @@ error_out: } struct buffer_head *udf_read_ptagged(struct super_block *sb, kernel_lb_addr loc, - uint32_t offset, uint16_t * ident) + uint32_t offset, uint16_t *ident) { return udf_read_tagged(sb, udf_get_lb_pblock(sb, loc, offset), loc.logicalBlockNum + offset, ident); @@ -258,17 +267,11 @@ struct buffer_head *udf_read_ptagged(struct super_block *sb, kernel_lb_addr loc, void udf_update_tag(char *data, int length) { tag *tptr = (tag *)data; - int i; - length -= sizeof(tag); - tptr->tagChecksum = 0; tptr->descCRCLength = cpu_to_le16(length); tptr->descCRC = cpu_to_le16(udf_crc(data + sizeof(tag), length, 0)); - - for (i = 0; i < 16; i++) - if (i != 4) - tptr->tagChecksum += (uint8_t)(data[i]); + tptr->tagChecksum = udf_tag_checksum(tptr); } void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum, @@ -281,3 +284,14 @@ void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum, tptr->tagLocation = cpu_to_le32(loc); udf_update_tag(data, length); } + +u8 udf_tag_checksum(const tag *t) +{ + u8 *data = (u8 *)t; + u8 checksum = 0; + int i; + for (i = 0; i < sizeof(tag); ++i) + if (i != 4) /* position of checksum */ + checksum += data[i]; + return checksum; +} diff --git a/fs/udf/namei.c b/fs/udf/namei.c index bec96a6b3343..112a5fb0b27b 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -43,12 +43,10 @@ static inline int udf_match(int len1, const char *name1, int len2, int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, struct fileIdentDesc *sfi, struct udf_fileident_bh *fibh, - uint8_t * impuse, uint8_t * fileident) + uint8_t *impuse, uint8_t *fileident) { uint16_t crclen = fibh->eoffset - fibh->soffset - sizeof(tag); uint16_t crc; - uint8_t checksum = 0; - int i; int offset; uint16_t liu = le16_to_cpu(cfi->lengthOfImpUse); uint8_t lfi = cfi->lengthFileIdent; @@ -56,7 +54,7 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, sizeof(struct fileIdentDesc); int adinicb = 0; - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) + if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) adinicb = 1; offset = fibh->soffset + sizeof(struct fileIdentDesc); @@ -68,7 +66,8 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, memcpy(fibh->ebh->b_data + offset, impuse, liu); } else { memcpy((uint8_t *)sfi->impUse, impuse, -offset); - memcpy(fibh->ebh->b_data, impuse - offset, liu + offset); + memcpy(fibh->ebh->b_data, impuse - offset, + liu + offset); } } @@ -80,8 +79,10 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, } else if (offset >= 0) { memcpy(fibh->ebh->b_data + offset, fileident, lfi); } else { - memcpy((uint8_t *)sfi->fileIdent + liu, fileident, -offset); - memcpy(fibh->ebh->b_data, fileident - offset, lfi + offset); + memcpy((uint8_t *)sfi->fileIdent + liu, fileident, + -offset); + memcpy(fibh->ebh->b_data, fileident - offset, + lfi + offset); } } @@ -101,27 +102,29 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, if (fibh->sbh == fibh->ebh) { crc = udf_crc((uint8_t *)sfi->impUse, - crclen + sizeof(tag) - sizeof(struct fileIdentDesc), crc); + crclen + sizeof(tag) - + sizeof(struct fileIdentDesc), crc); } else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) { - crc = udf_crc(fibh->ebh->b_data + sizeof(struct fileIdentDesc) + fibh->soffset, - crclen + sizeof(tag) - sizeof(struct fileIdentDesc), crc); + crc = udf_crc(fibh->ebh->b_data + + sizeof(struct fileIdentDesc) + + fibh->soffset, + crclen + sizeof(tag) - + sizeof(struct fileIdentDesc), + crc); } else { crc = udf_crc((uint8_t *)sfi->impUse, - -fibh->soffset - sizeof(struct fileIdentDesc), crc); + -fibh->soffset - sizeof(struct fileIdentDesc), + crc); crc = udf_crc(fibh->ebh->b_data, fibh->eoffset, crc); } cfi->descTag.descCRC = cpu_to_le16(crc); cfi->descTag.descCRCLength = cpu_to_le16(crclen); + cfi->descTag.tagChecksum = udf_tag_checksum(&cfi->descTag); - for (i = 0; i < 16; i++) { - if (i != 4) - checksum += ((uint8_t *)&cfi->descTag)[i]; - } - - cfi->descTag.tagChecksum = checksum; if (adinicb || (sizeof(struct fileIdentDesc) <= -fibh->soffset)) { - memcpy((uint8_t *)sfi, (uint8_t *)cfi, sizeof(struct fileIdentDesc)); + memcpy((uint8_t *)sfi, (uint8_t *)cfi, + sizeof(struct fileIdentDesc)); } else { memcpy((uint8_t *)sfi, (uint8_t *)cfi, -fibh->soffset); memcpy(fibh->ebh->b_data, (uint8_t *)cfi - fibh->soffset, @@ -155,26 +158,28 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, uint32_t elen; sector_t offset; struct extent_position epos = {}; + struct udf_inode_info *dinfo = UDF_I(dir); - size = (udf_ext0_offset(dir) + dir->i_size) >> 2; - f_pos = (udf_ext0_offset(dir) >> 2); + size = udf_ext0_offset(dir) + dir->i_size; + f_pos = udf_ext0_offset(dir); - fibh->soffset = fibh->eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { + fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1); + if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) fibh->sbh = fibh->ebh = NULL; - } else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), - &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { + else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, + &epos, &eloc, &elen, &offset) == + (EXT_RECORDED_ALLOCATED >> 30)) { block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) + if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) epos.offset -= sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) + else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) epos.offset -= sizeof(long_ad); - } else { + } else offset = 0; - } - if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) { + fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block); + if (!fibh->sbh) { brelse(epos.bh); return NULL; } @@ -183,7 +188,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, return NULL; } - while ((f_pos < size)) { + while (f_pos < size) { fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, &elen, &offset); if (!fi) { @@ -202,14 +207,18 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, } else { int poffset; /* Unpaded ending offset */ - poffset = fibh->soffset + sizeof(struct fileIdentDesc) + liu + lfi; + poffset = fibh->soffset + sizeof(struct fileIdentDesc) + + liu + lfi; - if (poffset >= lfi) { - nameptr = (uint8_t *)(fibh->ebh->b_data + poffset - lfi); - } else { + if (poffset >= lfi) + nameptr = (uint8_t *)(fibh->ebh->b_data + + poffset - lfi); + else { nameptr = fname; - memcpy(nameptr, fi->fileIdent + liu, lfi - poffset); - memcpy(nameptr + lfi - poffset, fibh->ebh->b_data, poffset); + memcpy(nameptr, fi->fileIdent + liu, + lfi - poffset); + memcpy(nameptr + lfi - poffset, + fibh->ebh->b_data, poffset); } } @@ -226,11 +235,11 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, if (!lfi) continue; - if ((flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi))) { - if (udf_match(flen, fname, dentry->d_name.len, dentry->d_name.name)) { - brelse(epos.bh); - return fi; - } + flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); + if (flen && udf_match(flen, fname, dentry->d_name.len, + dentry->d_name.name)) { + brelse(epos.bh); + return fi; } } @@ -291,16 +300,16 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, if (!strncmp(dentry->d_name.name, ".B=", 3)) { kernel_lb_addr lb = { .logicalBlockNum = 0, - .partitionReferenceNum = simple_strtoul(dentry->d_name.name + 3, - NULL, 0), + .partitionReferenceNum = + simple_strtoul(dentry->d_name.name + 3, + NULL, 0), }; inode = udf_iget(dir->i_sb, lb); if (!inode) { unlock_kernel(); return ERR_PTR(-EACCES); } - } - else + } else #endif /* UDF_RECOVERY */ if (udf_find_entry(dir, dentry, &fibh, &cfi)) { @@ -325,14 +334,14 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, struct udf_fileident_bh *fibh, struct fileIdentDesc *cfi, int *err) { - struct super_block *sb; + struct super_block *sb = dir->i_sb; struct fileIdentDesc *fi = NULL; char name[UDF_NAME_LEN], fname[UDF_NAME_LEN]; int namelen; loff_t f_pos; int flen; char *nameptr; - loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2; + loff_t size = udf_ext0_offset(dir) + dir->i_size; int nfidlen; uint8_t lfi; uint16_t liu; @@ -341,16 +350,16 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, uint32_t elen; sector_t offset; struct extent_position epos = {}; - - sb = dir->i_sb; + struct udf_inode_info *dinfo; if (dentry) { if (!dentry->d_name.len) { *err = -EINVAL; return NULL; } - if (!(namelen = udf_put_filename(sb, dentry->d_name.name, name, - dentry->d_name.len))) { + namelen = udf_put_filename(sb, dentry->d_name.name, name, + dentry->d_name.len); + if (!namelen) { *err = -ENAMETOOLONG; return NULL; } @@ -360,39 +369,40 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, nfidlen = (sizeof(struct fileIdentDesc) + namelen + 3) & ~3; - f_pos = (udf_ext0_offset(dir) >> 2); + f_pos = udf_ext0_offset(dir); - fibh->soffset = fibh->eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { + fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1); + dinfo = UDF_I(dir); + if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) fibh->sbh = fibh->ebh = NULL; - } else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), - &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { + else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, + &epos, &eloc, &elen, &offset) == + (EXT_RECORDED_ALLOCATED >> 30)) { block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) + if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) epos.offset -= sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) + else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) epos.offset -= sizeof(long_ad); - } else { + } else offset = 0; - } - if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) { + fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block); + if (!fibh->sbh) { brelse(epos.bh); *err = -EIO; return NULL; } - block = UDF_I_LOCATION(dir).logicalBlockNum; - + block = dinfo->i_location.logicalBlockNum; } else { - block = udf_get_lb_pblock(dir->i_sb, UDF_I_LOCATION(dir), 0); + block = udf_get_lb_pblock(dir->i_sb, dinfo->i_location, 0); fibh->sbh = fibh->ebh = NULL; fibh->soffset = fibh->eoffset = sb->s_blocksize; goto add; } - while ((f_pos < size)) { + while (f_pos < size) { fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, &elen, &offset); @@ -408,33 +418,39 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, liu = le16_to_cpu(cfi->lengthOfImpUse); lfi = cfi->lengthFileIdent; - if (fibh->sbh == fibh->ebh) { + if (fibh->sbh == fibh->ebh) nameptr = fi->fileIdent + liu; - } else { + else { int poffset; /* Unpaded ending offset */ - poffset = fibh->soffset + sizeof(struct fileIdentDesc) + liu + lfi; + poffset = fibh->soffset + sizeof(struct fileIdentDesc) + + liu + lfi; - if (poffset >= lfi) { - nameptr = (char *)(fibh->ebh->b_data + poffset - lfi); - } else { + if (poffset >= lfi) + nameptr = (char *)(fibh->ebh->b_data + + poffset - lfi); + else { nameptr = fname; - memcpy(nameptr, fi->fileIdent + liu, lfi - poffset); - memcpy(nameptr + lfi - poffset, fibh->ebh->b_data, poffset); + memcpy(nameptr, fi->fileIdent + liu, + lfi - poffset); + memcpy(nameptr + lfi - poffset, + fibh->ebh->b_data, poffset); } } if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) { - if (((sizeof(struct fileIdentDesc) + liu + lfi + 3) & ~3) == nfidlen) { + if (((sizeof(struct fileIdentDesc) + + liu + lfi + 3) & ~3) == nfidlen) { brelse(epos.bh); cfi->descTag.tagSerialNum = cpu_to_le16(1); cfi->fileVersionNum = cpu_to_le16(1); cfi->fileCharacteristics = 0; cfi->lengthFileIdent = namelen; cfi->lengthOfImpUse = cpu_to_le16(0); - if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) { + if (!udf_write_fi(dir, cfi, fi, fibh, NULL, + name)) return fi; - } else { + else { *err = -EIO; return NULL; } @@ -444,8 +460,9 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, if (!lfi || !dentry) continue; - if ((flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi)) && - udf_match(flen, fname, dentry->d_name.len, dentry->d_name.name)) { + flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); + if (flen && udf_match(flen, fname, dentry->d_name.len, + dentry->d_name.name)) { if (fibh->sbh != fibh->ebh) brelse(fibh->ebh); brelse(fibh->sbh); @@ -456,29 +473,34 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, } add: + if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { + elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1); + if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) + epos.offset -= sizeof(short_ad); + else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) + epos.offset -= sizeof(long_ad); + udf_write_aext(dir, &epos, eloc, elen, 1); + } f_pos += nfidlen; - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB && + if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB && sb->s_blocksize - fibh->eoffset < nfidlen) { brelse(epos.bh); epos.bh = NULL; fibh->soffset -= udf_ext0_offset(dir); fibh->eoffset -= udf_ext0_offset(dir); - f_pos -= (udf_ext0_offset(dir) >> 2); + f_pos -= udf_ext0_offset(dir); if (fibh->sbh != fibh->ebh) brelse(fibh->ebh); brelse(fibh->sbh); - if (!(fibh->sbh = fibh->ebh = udf_expand_dir_adinicb(dir, &block, err))) + fibh->sbh = fibh->ebh = + udf_expand_dir_adinicb(dir, &block, err); + if (!fibh->sbh) return NULL; - epos.block = UDF_I_LOCATION(dir); - eloc.logicalBlockNum = block; - eloc.partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum; - elen = dir->i_sb->s_blocksize; + epos.block = dinfo->i_location; epos.offset = udf_file_entry_alloc_offset(dir); - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) - epos.offset += sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) - epos.offset += sizeof(long_ad); + /* Load extent udf_expand_dir_adinicb() has created */ + udf_current_aext(dir, &epos, &eloc, &elen, 1); } if (sb->s_blocksize - fibh->eoffset >= nfidlen) { @@ -489,15 +511,19 @@ add: fibh->sbh = fibh->ebh; } - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { - block = UDF_I_LOCATION(dir).logicalBlockNum; - fi = (struct fileIdentDesc *)(UDF_I_DATA(dir) + fibh->soffset - - udf_ext0_offset(dir) + - UDF_I_LENEATTR(dir)); + if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { + block = dinfo->i_location.logicalBlockNum; + fi = (struct fileIdentDesc *) + (dinfo->i_ext.i_data + + fibh->soffset - + udf_ext0_offset(dir) + + dinfo->i_lenEAttr); } else { - block = eloc.logicalBlockNum + ((elen - 1) >> - dir->i_sb->s_blocksize_bits); - fi = (struct fileIdentDesc *)(fibh->sbh->b_data + fibh->soffset); + block = eloc.logicalBlockNum + + ((elen - 1) >> + dir->i_sb->s_blocksize_bits); + fi = (struct fileIdentDesc *) + (fibh->sbh->b_data + fibh->soffset); } } else { fibh->soffset = fibh->eoffset - sb->s_blocksize; @@ -509,7 +535,8 @@ add: block = eloc.logicalBlockNum + ((elen - 1) >> dir->i_sb->s_blocksize_bits); - fibh->ebh = udf_bread(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), 1, err); + fibh->ebh = udf_bread(dir, + f_pos >> dir->i_sb->s_blocksize_bits, 1, err); if (!fibh->ebh) { brelse(epos.bh); brelse(fibh->sbh); @@ -521,32 +548,34 @@ add: (EXT_RECORDED_ALLOCATED >> 30)) { block = eloc.logicalBlockNum + ((elen - 1) >> dir->i_sb->s_blocksize_bits); - } else { + } else block++; - } brelse(fibh->sbh); fibh->sbh = fibh->ebh; fi = (struct fileIdentDesc *)(fibh->sbh->b_data); } else { fi = (struct fileIdentDesc *) - (fibh->sbh->b_data + sb->s_blocksize + fibh->soffset); + (fibh->sbh->b_data + sb->s_blocksize + + fibh->soffset); } } memset(cfi, 0, sizeof(struct fileIdentDesc)); - if (UDF_SB_UDFREV(sb) >= 0x0200) - udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block, sizeof(tag)); + if (UDF_SB(sb)->s_udfrev >= 0x0200) + udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block, + sizeof(tag)); else - udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block, sizeof(tag)); + udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block, + sizeof(tag)); cfi->fileVersionNum = cpu_to_le16(1); cfi->lengthFileIdent = namelen; cfi->lengthOfImpUse = cpu_to_le16(0); if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) { brelse(epos.bh); dir->i_size += nfidlen; - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) - UDF_I_LENALLOC(dir) += nfidlen; + if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) + dinfo->i_lenAlloc += nfidlen; mark_inode_dirty(dir); return fi; } else { @@ -578,6 +607,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, struct inode *inode; struct fileIdentDesc cfi, *fi; int err; + struct udf_inode_info *iinfo; lock_kernel(); inode = udf_new_inode(dir, mode, &err); @@ -586,7 +616,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, return err; } - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) + iinfo = UDF_I(inode); + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) inode->i_data.a_ops = &udf_adinicb_aops; else inode->i_data.a_ops = &udf_aops; @@ -595,7 +626,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, inode->i_mode = mode; mark_inode_dirty(inode); - if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) { + fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); + if (!fi) { inode->i_nlink--; mark_inode_dirty(inode); iput(inode); @@ -603,13 +635,12 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, return err; } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); - cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode)); + cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(UDF_I_UNIQUE(inode) & 0x00000000FFFFFFFFUL); + cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL); udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { + if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) mark_inode_dirty(dir); - } if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); @@ -626,6 +657,7 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, struct udf_fileident_bh fibh; struct fileIdentDesc cfi, *fi; int err; + struct udf_inode_info *iinfo; if (!old_valid_dev(rdev)) return -EINVAL; @@ -636,9 +668,11 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, if (!inode) goto out; + iinfo = UDF_I(inode); inode->i_uid = current->fsuid; init_special_inode(inode, mode, rdev); - if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) { + fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); + if (!fi) { inode->i_nlink--; mark_inode_dirty(inode); iput(inode); @@ -646,13 +680,12 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, return err; } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); - cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode)); + cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(UDF_I_UNIQUE(inode) & 0x00000000FFFFFFFFUL); + cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL); udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { + if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) mark_inode_dirty(dir); - } mark_inode_dirty(inode); if (fibh.sbh != fibh.ebh) @@ -672,6 +705,8 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct udf_fileident_bh fibh; struct fileIdentDesc cfi, *fi; int err; + struct udf_inode_info *dinfo = UDF_I(dir); + struct udf_inode_info *iinfo; lock_kernel(); err = -EMLINK; @@ -683,9 +718,11 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (!inode) goto out; + iinfo = UDF_I(inode); inode->i_op = &udf_dir_inode_operations; inode->i_fop = &udf_dir_operations; - if (!(fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err))) { + fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); + if (!fi) { inode->i_nlink--; mark_inode_dirty(inode); iput(inode); @@ -693,10 +730,11 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) } inode->i_nlink = 2; cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); - cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(dir)); + cfi.icb.extLocation = cpu_to_lelb(dinfo->i_location); *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(UDF_I_UNIQUE(dir) & 0x00000000FFFFFFFFUL); - cfi.fileCharacteristics = FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT; + cpu_to_le32(dinfo->i_unique & 0x00000000FFFFFFFFUL); + cfi.fileCharacteristics = + FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT; udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL); brelse(fibh.sbh); inode->i_mode = S_IFDIR | mode; @@ -704,16 +742,17 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_mode |= S_ISGID; mark_inode_dirty(inode); - if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) { + fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); + if (!fi) { inode->i_nlink = 0; mark_inode_dirty(inode); iput(inode); goto out; } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); - cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode)); + cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(UDF_I_UNIQUE(inode) & 0x00000000FFFFFFFFUL); + cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL); cfi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY; udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); inc_nlink(dir); @@ -734,32 +773,33 @@ static int empty_dir(struct inode *dir) struct fileIdentDesc *fi, cfi; struct udf_fileident_bh fibh; loff_t f_pos; - loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2; + loff_t size = udf_ext0_offset(dir) + dir->i_size; int block; kernel_lb_addr eloc; uint32_t elen; sector_t offset; struct extent_position epos = {}; + struct udf_inode_info *dinfo = UDF_I(dir); - f_pos = (udf_ext0_offset(dir) >> 2); + f_pos = udf_ext0_offset(dir); + fibh.soffset = fibh.eoffset = f_pos & (dir->i_sb->s_blocksize - 1); - fibh.soffset = fibh.eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; - - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { + if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) fibh.sbh = fibh.ebh = NULL; - } else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), - &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { + else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, + &epos, &eloc, &elen, &offset) == + (EXT_RECORDED_ALLOCATED >> 30)) { block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) + if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) epos.offset -= sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) + else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) epos.offset -= sizeof(long_ad); - } else { + } else offset = 0; - } - if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) { + fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block); + if (!fibh.sbh) { brelse(epos.bh); return 0; } @@ -768,7 +808,7 @@ static int empty_dir(struct inode *dir) return 0; } - while ((f_pos < size)) { + while (f_pos < size) { fi = udf_fileident_read(dir, &f_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset); if (!fi) { @@ -828,7 +868,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_size = 0; inode_dec_link_count(dir); - inode->i_ctime = dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); + inode->i_ctime = dir->i_ctime = dir->i_mtime = + current_fs_time(dir->i_sb); mark_inode_dirty(dir); end_rmdir: @@ -901,36 +942,42 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, int block; char name[UDF_NAME_LEN]; int namelen; + struct buffer_head *bh; + struct udf_inode_info *iinfo; lock_kernel(); - if (!(inode = udf_new_inode(dir, S_IFLNK, &err))) + inode = udf_new_inode(dir, S_IFLNK, &err); + if (!inode) goto out; + iinfo = UDF_I(inode); inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_data.a_ops = &udf_symlink_aops; inode->i_op = &page_symlink_inode_operations; - if (UDF_I_ALLOCTYPE(inode) != ICBTAG_FLAG_AD_IN_ICB) { + if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { kernel_lb_addr eloc; uint32_t elen; block = udf_new_block(inode->i_sb, inode, - UDF_I_LOCATION(inode).partitionReferenceNum, - UDF_I_LOCATION(inode).logicalBlockNum, &err); + iinfo->i_location.partitionReferenceNum, + iinfo->i_location.logicalBlockNum, &err); if (!block) goto out_no_entry; - epos.block = UDF_I_LOCATION(inode); + epos.block = iinfo->i_location; epos.offset = udf_file_entry_alloc_offset(inode); epos.bh = NULL; eloc.logicalBlockNum = block; - eloc.partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum; + eloc.partitionReferenceNum = + iinfo->i_location.partitionReferenceNum; elen = inode->i_sb->s_blocksize; - UDF_I_LENEXTENTS(inode) = elen; + iinfo->i_lenExtents = elen; udf_add_aext(inode, &epos, eloc, elen, 0); brelse(epos.bh); block = udf_get_pblock(inode->i_sb, block, - UDF_I_LOCATION(inode).partitionReferenceNum, 0); + iinfo->i_location.partitionReferenceNum, + 0); epos.bh = udf_tread(inode->i_sb, block); lock_buffer(epos.bh); memset(epos.bh->b_data, 0x00, inode->i_sb->s_blocksize); @@ -938,9 +985,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, unlock_buffer(epos.bh); mark_buffer_dirty_inode(epos.bh, inode); ea = epos.bh->b_data + udf_ext0_offset(inode); - } else { - ea = UDF_I_DATA(inode) + UDF_I_LENEATTR(inode); - } + } else + ea = iinfo->i_ext.i_data + iinfo->i_lenEAttr; eoffset = inode->i_sb->s_blocksize - udf_ext0_offset(inode); pc = (struct pathComponent *)ea; @@ -977,7 +1023,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, if (compstart[0] == '.') { if ((symname - compstart) == 1) pc->componentType = 4; - else if ((symname - compstart) == 2 && compstart[1] == '.') + else if ((symname - compstart) == 2 && + compstart[1] == '.') pc->componentType = 3; } @@ -987,7 +1034,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, if (!namelen) goto out_no_entry; - if (elen + sizeof(struct pathComponent) + namelen > eoffset) + if (elen + sizeof(struct pathComponent) + namelen > + eoffset) goto out_no_entry; else pc->lengthComponentIdent = namelen; @@ -1006,30 +1054,34 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, brelse(epos.bh); inode->i_size = elen; - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) - UDF_I_LENALLOC(inode) = inode->i_size; + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) + iinfo->i_lenAlloc = inode->i_size; mark_inode_dirty(inode); - if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) + fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); + if (!fi) goto out_no_entry; cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); - cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode)); - if (UDF_SB_LVIDBH(inode->i_sb)) { + cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); + bh = UDF_SB(inode->i_sb)->s_lvid_bh; + if (bh) { + struct logicalVolIntegrityDesc *lvid = + (struct logicalVolIntegrityDesc *)bh->b_data; struct logicalVolHeaderDesc *lvhd; uint64_t uniqueID; - lvhd = (struct logicalVolHeaderDesc *)(UDF_SB_LVID(inode->i_sb)->logicalVolContentsUse); + lvhd = (struct logicalVolHeaderDesc *) + lvid->logicalVolContentsUse; uniqueID = le64_to_cpu(lvhd->uniqueID); *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL); if (!(++uniqueID & 0x00000000FFFFFFFFUL)) uniqueID += 16; lvhd->uniqueID = cpu_to_le64(uniqueID); - mark_buffer_dirty(UDF_SB_LVIDBH(inode->i_sb)); + mark_buffer_dirty(bh); } udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { + if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) mark_inode_dirty(dir); - } if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); @@ -1053,6 +1105,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, struct udf_fileident_bh fibh; struct fileIdentDesc cfi, *fi; int err; + struct buffer_head *bh; lock_kernel(); if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { @@ -1060,28 +1113,32 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, return -EMLINK; } - if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) { + fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); + if (!fi) { unlock_kernel(); return err; } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); - cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode)); - if (UDF_SB_LVIDBH(inode->i_sb)) { + cfi.icb.extLocation = cpu_to_lelb(UDF_I(inode)->i_location); + bh = UDF_SB(inode->i_sb)->s_lvid_bh; + if (bh) { + struct logicalVolIntegrityDesc *lvid = + (struct logicalVolIntegrityDesc *)bh->b_data; struct logicalVolHeaderDesc *lvhd; uint64_t uniqueID; - lvhd = (struct logicalVolHeaderDesc *)(UDF_SB_LVID(inode->i_sb)->logicalVolContentsUse); + lvhd = (struct logicalVolHeaderDesc *) + (lvid->logicalVolContentsUse); uniqueID = le64_to_cpu(lvhd->uniqueID); *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL); if (!(++uniqueID & 0x00000000FFFFFFFFUL)) uniqueID += 16; lvhd->uniqueID = cpu_to_le64(uniqueID); - mark_buffer_dirty(UDF_SB_LVIDBH(inode->i_sb)); + mark_buffer_dirty(bh); } udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { + if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) mark_inode_dirty(dir); - } if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); @@ -1105,13 +1162,16 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct udf_fileident_bh ofibh, nfibh; - struct fileIdentDesc *ofi = NULL, *nfi = NULL, *dir_fi = NULL, ocfi, ncfi; + struct fileIdentDesc *ofi = NULL, *nfi = NULL, *dir_fi = NULL; + struct fileIdentDesc ocfi, ncfi; struct buffer_head *dir_bh = NULL; int retval = -ENOENT; kernel_lb_addr tloc; + struct udf_inode_info *old_iinfo = UDF_I(old_inode); lock_kernel(); - if ((ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi))) { + ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi); + if (ofi) { if (ofibh.sbh != ofibh.ebh) brelse(ofibh.ebh); brelse(ofibh.sbh); @@ -1131,7 +1191,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, } } if (S_ISDIR(old_inode->i_mode)) { - uint32_t offset = udf_ext0_offset(old_inode); + int offset = udf_ext0_offset(old_inode); if (new_inode) { retval = -ENOTEMPTY; @@ -1139,30 +1199,36 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, goto end_rename; } retval = -EIO; - if (UDF_I_ALLOCTYPE(old_inode) == ICBTAG_FLAG_AD_IN_ICB) { - dir_fi = udf_get_fileident(UDF_I_DATA(old_inode) - - (UDF_I_EFE(old_inode) ? - sizeof(struct extendedFileEntry) : - sizeof(struct fileEntry)), - old_inode->i_sb->s_blocksize, &offset); + if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { + dir_fi = udf_get_fileident( + old_iinfo->i_ext.i_data - + (old_iinfo->i_efe ? + sizeof(struct extendedFileEntry) : + sizeof(struct fileEntry)), + old_inode->i_sb->s_blocksize, &offset); } else { dir_bh = udf_bread(old_inode, 0, 0, &retval); if (!dir_bh) goto end_rename; - dir_fi = udf_get_fileident(dir_bh->b_data, old_inode->i_sb->s_blocksize, &offset); + dir_fi = udf_get_fileident(dir_bh->b_data, + old_inode->i_sb->s_blocksize, &offset); } if (!dir_fi) goto end_rename; tloc = lelb_to_cpu(dir_fi->icb.extLocation); - if (udf_get_lb_pblock(old_inode->i_sb, tloc, 0) != old_dir->i_ino) + if (udf_get_lb_pblock(old_inode->i_sb, tloc, 0) != + old_dir->i_ino) goto end_rename; retval = -EMLINK; - if (!new_inode && new_dir->i_nlink >= (256 << sizeof(new_dir->i_nlink)) - 1) + if (!new_inode && + new_dir->i_nlink >= + (256 << sizeof(new_dir->i_nlink)) - 1) goto end_rename; } if (!nfi) { - nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi, &retval); + nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi, + &retval); if (!nfi) goto end_rename; } @@ -1194,18 +1260,19 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, mark_inode_dirty(old_dir); if (dir_fi) { - dir_fi->icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(new_dir)); - udf_update_tag((char *)dir_fi, (sizeof(struct fileIdentDesc) + - le16_to_cpu(dir_fi->lengthOfImpUse) + 3) & ~3); - if (UDF_I_ALLOCTYPE(old_inode) == ICBTAG_FLAG_AD_IN_ICB) { + dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location); + udf_update_tag((char *)dir_fi, + (sizeof(struct fileIdentDesc) + + le16_to_cpu(dir_fi->lengthOfImpUse) + 3) & ~3); + if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) mark_inode_dirty(old_inode); - } else { + else mark_buffer_dirty_inode(dir_bh, old_inode); - } + inode_dec_link_count(old_dir); - if (new_inode) { + if (new_inode) inode_dec_link_count(new_inode); - } else { + else { inc_nlink(new_dir); mark_inode_dirty(new_dir); } diff --git a/fs/udf/partition.c b/fs/udf/partition.c index aaab24c8c498..fc533345ab89 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -31,15 +31,18 @@ inline uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, uint16_t partition, uint32_t offset) { - if (partition >= UDF_SB_NUMPARTS(sb)) { - udf_debug("block=%d, partition=%d, offset=%d: invalid partition\n", - block, partition, offset); + struct udf_sb_info *sbi = UDF_SB(sb); + struct udf_part_map *map; + if (partition >= sbi->s_partitions) { + udf_debug("block=%d, partition=%d, offset=%d: " + "invalid partition\n", block, partition, offset); return 0xFFFFFFFF; } - if (UDF_SB_PARTFUNC(sb, partition)) - return UDF_SB_PARTFUNC(sb, partition)(sb, block, partition, offset); + map = &sbi->s_partmaps[partition]; + if (map->s_partition_func) + return map->s_partition_func(sb, block, partition, offset); else - return UDF_SB_PARTROOT(sb, partition) + block + offset; + return map->s_partition_root + block + offset; } uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, @@ -49,12 +52,18 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, uint32_t newblock; uint32_t index; uint32_t loc; + struct udf_sb_info *sbi = UDF_SB(sb); + struct udf_part_map *map; + struct udf_virtual_data *vdata; + struct udf_inode_info *iinfo; - index = (sb->s_blocksize - UDF_SB_TYPEVIRT(sb,partition).s_start_offset) / sizeof(uint32_t); + map = &sbi->s_partmaps[partition]; + vdata = &map->s_type_specific.s_virtual; + index = (sb->s_blocksize - vdata->s_start_offset) / sizeof(uint32_t); - if (block > UDF_SB_TYPEVIRT(sb,partition).s_num_entries) { - udf_debug("Trying to access block beyond end of VAT (%d max %d)\n", - block, UDF_SB_TYPEVIRT(sb,partition).s_num_entries); + if (block > vdata->s_num_entries) { + udf_debug("Trying to access block beyond end of VAT " + "(%d max %d)\n", block, vdata->s_num_entries); return 0xFFFFFFFF; } @@ -64,12 +73,13 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, index = block % (sb->s_blocksize / sizeof(uint32_t)); } else { newblock = 0; - index = UDF_SB_TYPEVIRT(sb,partition).s_start_offset / sizeof(uint32_t) + block; + index = vdata->s_start_offset / sizeof(uint32_t) + block; } - loc = udf_block_map(UDF_SB_VAT(sb), newblock); + loc = udf_block_map(sbi->s_vat_inode, newblock); - if (!(bh = sb_bread(sb, loc))) { + bh = sb_bread(sb, loc); + if (!bh) { udf_debug("get_pblock(UDF_VIRTUAL_MAP:%p,%d,%d) VAT: %d[%d]\n", sb, block, partition, loc, index); return 0xFFFFFFFF; @@ -79,50 +89,61 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, brelse(bh); - if (UDF_I_LOCATION(UDF_SB_VAT(sb)).partitionReferenceNum == partition) { + iinfo = UDF_I(sbi->s_vat_inode); + if (iinfo->i_location.partitionReferenceNum == partition) { udf_debug("recursive call to udf_get_pblock!\n"); return 0xFFFFFFFF; } return udf_get_pblock(sb, loc, - UDF_I_LOCATION(UDF_SB_VAT(sb)).partitionReferenceNum, + iinfo->i_location.partitionReferenceNum, offset); } -inline uint32_t udf_get_pblock_virt20(struct super_block * sb, uint32_t block, +inline uint32_t udf_get_pblock_virt20(struct super_block *sb, uint32_t block, uint16_t partition, uint32_t offset) { return udf_get_pblock_virt15(sb, block, partition, offset); } -uint32_t udf_get_pblock_spar15(struct super_block * sb, uint32_t block, +uint32_t udf_get_pblock_spar15(struct super_block *sb, uint32_t block, uint16_t partition, uint32_t offset) { int i; struct sparingTable *st = NULL; - uint32_t packet = (block + offset) & ~(UDF_SB_TYPESPAR(sb,partition).s_packet_len - 1); + struct udf_sb_info *sbi = UDF_SB(sb); + struct udf_part_map *map; + uint32_t packet; + struct udf_sparing_data *sdata; + + map = &sbi->s_partmaps[partition]; + sdata = &map->s_type_specific.s_sparing; + packet = (block + offset) & ~(sdata->s_packet_len - 1); for (i = 0; i < 4; i++) { - if (UDF_SB_TYPESPAR(sb,partition).s_spar_map[i] != NULL) { - st = (struct sparingTable *)UDF_SB_TYPESPAR(sb,partition).s_spar_map[i]->b_data; + if (sdata->s_spar_map[i] != NULL) { + st = (struct sparingTable *) + sdata->s_spar_map[i]->b_data; break; } } if (st) { for (i = 0; i < le16_to_cpu(st->reallocationTableLen); i++) { - if (le32_to_cpu(st->mapEntry[i].origLocation) >= 0xFFFFFFF0) { + struct sparingEntry *entry = &st->mapEntry[i]; + u32 origLoc = le32_to_cpu(entry->origLocation); + if (origLoc >= 0xFFFFFFF0) break; - } else if (le32_to_cpu(st->mapEntry[i].origLocation) == packet) { - return le32_to_cpu(st->mapEntry[i].mappedLocation) + - ((block + offset) & (UDF_SB_TYPESPAR(sb,partition).s_packet_len - 1)); - } else if (le32_to_cpu(st->mapEntry[i].origLocation) > packet) { + else if (origLoc == packet) + return le32_to_cpu(entry->mappedLocation) + + ((block + offset) & + (sdata->s_packet_len - 1)); + else if (origLoc > packet) break; - } } } - return UDF_SB_PARTROOT(sb,partition) + block + offset; + return map->s_partition_root + block + offset; } int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block) @@ -132,69 +153,109 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block) struct sparingEntry mapEntry; uint32_t packet; int i, j, k, l; + struct udf_sb_info *sbi = UDF_SB(sb); + u16 reallocationTableLen; + struct buffer_head *bh; - for (i = 0; i < UDF_SB_NUMPARTS(sb); i++) { - if (old_block > UDF_SB_PARTROOT(sb,i) && - old_block < UDF_SB_PARTROOT(sb,i) + UDF_SB_PARTLEN(sb,i)) { - sdata = &UDF_SB_TYPESPAR(sb,i); - packet = (old_block - UDF_SB_PARTROOT(sb,i)) & ~(sdata->s_packet_len - 1); + for (i = 0; i < sbi->s_partitions; i++) { + struct udf_part_map *map = &sbi->s_partmaps[i]; + if (old_block > map->s_partition_root && + old_block < map->s_partition_root + map->s_partition_len) { + sdata = &map->s_type_specific.s_sparing; + packet = (old_block - map->s_partition_root) & + ~(sdata->s_packet_len - 1); - for (j = 0; j < 4; j++) { - if (UDF_SB_TYPESPAR(sb,i).s_spar_map[j] != NULL) { - st = (struct sparingTable *)sdata->s_spar_map[j]->b_data; + for (j = 0; j < 4; j++) + if (sdata->s_spar_map[j] != NULL) { + st = (struct sparingTable *) + sdata->s_spar_map[j]->b_data; break; } - } if (!st) return 1; - for (k = 0; k < le16_to_cpu(st->reallocationTableLen); k++) { - if (le32_to_cpu(st->mapEntry[k].origLocation) == 0xFFFFFFFF) { + reallocationTableLen = + le16_to_cpu(st->reallocationTableLen); + for (k = 0; k < reallocationTableLen; k++) { + struct sparingEntry *entry = &st->mapEntry[k]; + u32 origLoc = le32_to_cpu(entry->origLocation); + + if (origLoc == 0xFFFFFFFF) { for (; j < 4; j++) { - if (sdata->s_spar_map[j]) { - st = (struct sparingTable *)sdata->s_spar_map[j]->b_data; - st->mapEntry[k].origLocation = cpu_to_le32(packet); - udf_update_tag((char *)st, sizeof(struct sparingTable) + le16_to_cpu(st->reallocationTableLen) * sizeof(struct sparingEntry)); - mark_buffer_dirty(sdata->s_spar_map[j]); - } + int len; + bh = sdata->s_spar_map[j]; + if (!bh) + continue; + + st = (struct sparingTable *) + bh->b_data; + entry->origLocation = + cpu_to_le32(packet); + len = + sizeof(struct sparingTable) + + reallocationTableLen * + sizeof(struct sparingEntry); + udf_update_tag((char *)st, len); + mark_buffer_dirty(bh); } - *new_block = le32_to_cpu(st->mapEntry[k].mappedLocation) + - ((old_block - UDF_SB_PARTROOT(sb,i)) & (sdata->s_packet_len - 1)); + *new_block = le32_to_cpu( + entry->mappedLocation) + + ((old_block - + map->s_partition_root) & + (sdata->s_packet_len - 1)); return 0; - } else if (le32_to_cpu(st->mapEntry[k].origLocation) == packet) { - *new_block = le32_to_cpu(st->mapEntry[k].mappedLocation) + - ((old_block - UDF_SB_PARTROOT(sb,i)) & (sdata->s_packet_len - 1)); + } else if (origLoc == packet) { + *new_block = le32_to_cpu( + entry->mappedLocation) + + ((old_block - + map->s_partition_root) & + (sdata->s_packet_len - 1)); return 0; - } else if (le32_to_cpu(st->mapEntry[k].origLocation) > packet) { + } else if (origLoc > packet) break; - } } - for (l = k; l < le16_to_cpu(st->reallocationTableLen); l++) { - if (le32_to_cpu(st->mapEntry[l].origLocation) == 0xFFFFFFFF) { - for (; j < 4; j++) { - if (sdata->s_spar_map[j]) { - st = (struct sparingTable *)sdata->s_spar_map[j]->b_data; - mapEntry = st->mapEntry[l]; - mapEntry.origLocation = cpu_to_le32(packet); - memmove(&st->mapEntry[k + 1], &st->mapEntry[k], (l - k) * sizeof(struct sparingEntry)); - st->mapEntry[k] = mapEntry; - udf_update_tag((char *)st, sizeof(struct sparingTable) + le16_to_cpu(st->reallocationTableLen) * sizeof(struct sparingEntry)); - mark_buffer_dirty(sdata->s_spar_map[j]); - } - } - *new_block = le32_to_cpu(st->mapEntry[k].mappedLocation) + - ((old_block - UDF_SB_PARTROOT(sb,i)) & (sdata->s_packet_len - 1)); - return 0; + for (l = k; l < reallocationTableLen; l++) { + struct sparingEntry *entry = &st->mapEntry[l]; + u32 origLoc = le32_to_cpu(entry->origLocation); + + if (origLoc != 0xFFFFFFFF) + continue; + + for (; j < 4; j++) { + bh = sdata->s_spar_map[j]; + if (!bh) + continue; + + st = (struct sparingTable *)bh->b_data; + mapEntry = st->mapEntry[l]; + mapEntry.origLocation = + cpu_to_le32(packet); + memmove(&st->mapEntry[k + 1], + &st->mapEntry[k], + (l - k) * + sizeof(struct sparingEntry)); + st->mapEntry[k] = mapEntry; + udf_update_tag((char *)st, + sizeof(struct sparingTable) + + reallocationTableLen * + sizeof(struct sparingEntry)); + mark_buffer_dirty(bh); } + *new_block = + le32_to_cpu( + st->mapEntry[k].mappedLocation) + + ((old_block - map->s_partition_root) & + (sdata->s_packet_len - 1)); + return 0; } return 1; } /* if old_block */ } - if (i == UDF_SB_NUMPARTS(sb)) { + if (i == sbi->s_partitions) { /* outside of partitions */ /* for now, fail =) */ return 1; diff --git a/fs/udf/super.c b/fs/udf/super.c index 4360c7a05743..f3ac4abfc946 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -33,8 +33,8 @@ * 10/17/98 added freespace count for "df" * 11/11/98 gr added novrs option * 11/26/98 dgb added fileset,anchor mount options - * 12/06/98 blf really hosed things royally. vat/sparing support. sequenced vol descs - * rewrote option handling based on isofs + * 12/06/98 blf really hosed things royally. vat/sparing support. sequenced + * vol descs. rewrote option handling based on isofs * 12/20/98 find the free space bitmap (if it exists) */ @@ -52,6 +52,9 @@ #include <linux/buffer_head.h> #include <linux/vfs.h> #include <linux/vmalloc.h> +#include <linux/errno.h> +#include <linux/mount.h> +#include <linux/seq_file.h> #include <asm/byteorder.h> #include <linux/udf_fs.h> @@ -70,6 +73,8 @@ #define VDS_POS_TERMINATING_DESC 6 #define VDS_POS_LENGTH 7 +#define UDF_DEFAULT_BLOCKSIZE 2048 + static char error_buf[1024]; /* These are the "meat" - everything else is stuffing */ @@ -94,6 +99,17 @@ static void udf_open_lvid(struct super_block *); static void udf_close_lvid(struct super_block *); static unsigned int udf_count_free(struct super_block *); static int udf_statfs(struct dentry *, struct kstatfs *); +static int udf_show_options(struct seq_file *, struct vfsmount *); + +struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) +{ + struct logicalVolIntegrityDesc *lvid = + (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; + __u32 number_of_partitions = le32_to_cpu(lvid->numOfPartitions); + __u32 offset = number_of_partitions * 2 * + sizeof(uint32_t)/sizeof(uint8_t); + return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]); +} /* UDF filesystem type */ static int udf_get_sb(struct file_system_type *fs_type, @@ -116,7 +132,7 @@ static struct kmem_cache *udf_inode_cachep; static struct inode *udf_alloc_inode(struct super_block *sb) { struct udf_inode_info *ei; - ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, GFP_KERNEL); + ei = kmem_cache_alloc(udf_inode_cachep, GFP_KERNEL); if (!ei) return NULL; @@ -170,6 +186,7 @@ static const struct super_operations udf_sb_ops = { .write_super = udf_write_super, .statfs = udf_statfs, .remount_fs = udf_remount_fs, + .show_options = udf_show_options, }; struct udf_options { @@ -218,6 +235,79 @@ static void __exit exit_udf_fs(void) module_init(init_udf_fs) module_exit(exit_udf_fs) +static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count) +{ + struct udf_sb_info *sbi = UDF_SB(sb); + + sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map), + GFP_KERNEL); + if (!sbi->s_partmaps) { + udf_error(sb, __FUNCTION__, + "Unable to allocate space for %d partition maps", + count); + sbi->s_partitions = 0; + return -ENOMEM; + } + + sbi->s_partitions = count; + return 0; +} + +static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt) +{ + struct super_block *sb = mnt->mnt_sb; + struct udf_sb_info *sbi = UDF_SB(sb); + + if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT)) + seq_puts(seq, ",nostrict"); + if (sb->s_blocksize != UDF_DEFAULT_BLOCKSIZE) + seq_printf(seq, ",bs=%lu", sb->s_blocksize); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE)) + seq_puts(seq, ",unhide"); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_UNDELETE)) + seq_puts(seq, ",undelete"); + if (!UDF_QUERY_FLAG(sb, UDF_FLAG_USE_AD_IN_ICB)) + seq_puts(seq, ",noadinicb"); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_USE_SHORT_AD)) + seq_puts(seq, ",shortad"); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_FORGET)) + seq_puts(seq, ",uid=forget"); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_IGNORE)) + seq_puts(seq, ",uid=ignore"); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_FORGET)) + seq_puts(seq, ",gid=forget"); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_IGNORE)) + seq_puts(seq, ",gid=ignore"); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET)) + seq_printf(seq, ",uid=%u", sbi->s_uid); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) + seq_printf(seq, ",gid=%u", sbi->s_gid); + if (sbi->s_umask != 0) + seq_printf(seq, ",umask=%o", sbi->s_umask); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET)) + seq_printf(seq, ",session=%u", sbi->s_session); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET)) + seq_printf(seq, ",lastblock=%u", sbi->s_last_block); + /* + * s_anchor[2] could be zeroed out in case there is no anchor + * in the specified block, but then the "anchor=N" option + * originally given by the user wasn't effective, so it's OK + * if we don't show it. + */ + if (sbi->s_anchor[2] != 0) + seq_printf(seq, ",anchor=%u", sbi->s_anchor[2]); + /* + * volume, partition, fileset and rootdir seem to be ignored + * currently + */ + if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) + seq_puts(seq, ",utf8"); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP) && sbi->s_nls_map) + seq_printf(seq, ",iocharset=%s", sbi->s_nls_map->charset); + + return 0; +} + /* * udf_parse_options * @@ -310,13 +400,14 @@ static match_table_t tokens = { {Opt_err, NULL} }; -static int udf_parse_options(char *options, struct udf_options *uopt) +static int udf_parse_options(char *options, struct udf_options *uopt, + bool remount) { char *p; int option; uopt->novrs = 0; - uopt->blocksize = 2048; + uopt->blocksize = UDF_DEFAULT_BLOCKSIZE; uopt->partition = 0xFFFF; uopt->session = 0xFFFFFFFF; uopt->lastblock = 0; @@ -386,11 +477,15 @@ static int udf_parse_options(char *options, struct udf_options *uopt) if (match_int(args, &option)) return 0; uopt->session = option; + if (!remount) + uopt->flags |= (1 << UDF_FLAG_SESSION_SET); break; case Opt_lastblock: if (match_int(args, &option)) return 0; uopt->lastblock = option; + if (!remount) + uopt->flags |= (1 << UDF_FLAG_LASTBLOCK_SET); break; case Opt_anchor: if (match_int(args, &option)) @@ -447,7 +542,7 @@ static int udf_parse_options(char *options, struct udf_options *uopt) return 1; } -void udf_write_super(struct super_block *sb) +static void udf_write_super(struct super_block *sb) { lock_kernel(); @@ -461,22 +556,23 @@ void udf_write_super(struct super_block *sb) static int udf_remount_fs(struct super_block *sb, int *flags, char *options) { struct udf_options uopt; + struct udf_sb_info *sbi = UDF_SB(sb); - uopt.flags = UDF_SB(sb)->s_flags; - uopt.uid = UDF_SB(sb)->s_uid; - uopt.gid = UDF_SB(sb)->s_gid; - uopt.umask = UDF_SB(sb)->s_umask; + uopt.flags = sbi->s_flags; + uopt.uid = sbi->s_uid; + uopt.gid = sbi->s_gid; + uopt.umask = sbi->s_umask; - if (!udf_parse_options(options, &uopt)) + if (!udf_parse_options(options, &uopt, true)) return -EINVAL; - UDF_SB(sb)->s_flags = uopt.flags; - UDF_SB(sb)->s_uid = uopt.uid; - UDF_SB(sb)->s_gid = uopt.gid; - UDF_SB(sb)->s_umask = uopt.umask; + sbi->s_flags = uopt.flags; + sbi->s_uid = uopt.uid; + sbi->s_gid = uopt.gid; + sbi->s_umask = uopt.umask; - if (UDF_SB_LVIDBH(sb)) { - int write_rev = le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFWriteRev); + if (sbi->s_lvid_bh) { + int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev); if (write_rev > UDF_MAX_WRITE_VERSION) *flags |= MS_RDONLY; } @@ -538,17 +634,19 @@ static int udf_vrs(struct super_block *sb, int silent) int iso9660 = 0; int nsr02 = 0; int nsr03 = 0; + struct udf_sb_info *sbi; /* Block size must be a multiple of 512 */ if (sb->s_blocksize & 511) return 0; + sbi = UDF_SB(sb); if (sb->s_blocksize < sizeof(struct volStructDesc)) sectorsize = sizeof(struct volStructDesc); else sectorsize = sb->s_blocksize; - sector += (UDF_SB_SESSION(sb) << sb->s_blocksize_bits); + sector += (sbi->s_session << sb->s_blocksize_bits); udf_debug("Starting at sector %u (%ld byte sectors)\n", (sector >> sb->s_blocksize_bits), sb->s_blocksize); @@ -561,47 +659,52 @@ static int udf_vrs(struct super_block *sb, int silent) /* Look for ISO descriptors */ vsd = (struct volStructDesc *)(bh->b_data + - (sector & (sb->s_blocksize - 1))); + (sector & (sb->s_blocksize - 1))); if (vsd->stdIdent[0] == 0) { brelse(bh); break; - } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, VSD_STD_ID_LEN)) { + } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, + VSD_STD_ID_LEN)) { iso9660 = sector; switch (vsd->structType) { case 0: udf_debug("ISO9660 Boot Record found\n"); break; case 1: - udf_debug - ("ISO9660 Primary Volume Descriptor found\n"); + udf_debug("ISO9660 Primary Volume Descriptor " + "found\n"); break; case 2: - udf_debug - ("ISO9660 Supplementary Volume Descriptor found\n"); + udf_debug("ISO9660 Supplementary Volume " + "Descriptor found\n"); break; case 3: - udf_debug - ("ISO9660 Volume Partition Descriptor found\n"); + udf_debug("ISO9660 Volume Partition Descriptor " + "found\n"); break; case 255: - udf_debug - ("ISO9660 Volume Descriptor Set Terminator found\n"); + udf_debug("ISO9660 Volume Descriptor Set " + "Terminator found\n"); break; default: udf_debug("ISO9660 VRS (%u) found\n", vsd->structType); break; } - } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_BEA01, VSD_STD_ID_LEN)) { - } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_TEA01, VSD_STD_ID_LEN)) { + } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_BEA01, + VSD_STD_ID_LEN)) + ; /* nothing */ + else if (!strncmp(vsd->stdIdent, VSD_STD_ID_TEA01, + VSD_STD_ID_LEN)) { brelse(bh); break; - } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR02, VSD_STD_ID_LEN)) { + } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR02, + VSD_STD_ID_LEN)) nsr02 = sector; - } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR03, VSD_STD_ID_LEN)) { + else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR03, + VSD_STD_ID_LEN)) nsr03 = sector; - } brelse(bh); } @@ -609,7 +712,7 @@ static int udf_vrs(struct super_block *sb, int silent) return nsr03; else if (nsr02) return nsr02; - else if (sector - (UDF_SB_SESSION(sb) << sb->s_blocksize_bits) == 32768) + else if (sector - (sbi->s_session << sb->s_blocksize_bits) == 32768) return -1; else return 0; @@ -634,11 +737,15 @@ static int udf_vrs(struct super_block *sb, int silent) */ static void udf_find_anchor(struct super_block *sb) { - int lastblock = UDF_SB_LASTBLOCK(sb); + int lastblock; struct buffer_head *bh = NULL; uint16_t ident; uint32_t location; int i; + struct udf_sb_info *sbi; + + sbi = UDF_SB(sb); + lastblock = sbi->s_last_block; if (lastblock) { int varlastblock = udf_variable_to_fixed(lastblock); @@ -658,57 +765,83 @@ static void udf_find_anchor(struct super_block *sb) * however, if the disc isn't closed, it could be 512 */ for (i = 0; !lastblock && i < ARRAY_SIZE(last); i++) { - if (last[i] < 0 || !(bh = sb_bread(sb, last[i]))) { - ident = location = 0; - } else { - ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); - location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); - brelse(bh); + ident = location = 0; + if (last[i] >= 0) { + bh = sb_bread(sb, last[i]); + if (bh) { + tag *t = (tag *)bh->b_data; + ident = le16_to_cpu(t->tagIdent); + location = le32_to_cpu(t->tagLocation); + brelse(bh); + } } if (ident == TAG_IDENT_AVDP) { - if (location == last[i] - UDF_SB_SESSION(sb)) { - lastblock = UDF_SB_ANCHOR(sb)[0] = last[i] - UDF_SB_SESSION(sb); - UDF_SB_ANCHOR(sb)[1] = last[i] - 256 - UDF_SB_SESSION(sb); - } else if (location == udf_variable_to_fixed(last[i]) - UDF_SB_SESSION(sb)) { + if (location == last[i] - sbi->s_session) { + lastblock = last[i] - sbi->s_session; + sbi->s_anchor[0] = lastblock; + sbi->s_anchor[1] = lastblock - 256; + } else if (location == + udf_variable_to_fixed(last[i]) - + sbi->s_session) { UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); - lastblock = UDF_SB_ANCHOR(sb)[0] = udf_variable_to_fixed(last[i]) - UDF_SB_SESSION(sb); - UDF_SB_ANCHOR(sb)[1] = lastblock - 256 - UDF_SB_SESSION(sb); + lastblock = + udf_variable_to_fixed(last[i]) - + sbi->s_session; + sbi->s_anchor[0] = lastblock; + sbi->s_anchor[1] = lastblock - 256 - + sbi->s_session; } else { - udf_debug("Anchor found at block %d, location mismatch %d.\n", + udf_debug("Anchor found at block %d, " + "location mismatch %d.\n", last[i], location); } - } else if (ident == TAG_IDENT_FE || ident == TAG_IDENT_EFE) { + } else if (ident == TAG_IDENT_FE || + ident == TAG_IDENT_EFE) { lastblock = last[i]; - UDF_SB_ANCHOR(sb)[3] = 512; + sbi->s_anchor[3] = 512; } else { - if (last[i] < 256 || !(bh = sb_bread(sb, last[i] - 256))) { - ident = location = 0; - } else { - ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); - location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); - brelse(bh); + ident = location = 0; + if (last[i] >= 256) { + bh = sb_bread(sb, last[i] - 256); + if (bh) { + tag *t = (tag *)bh->b_data; + ident = le16_to_cpu( + t->tagIdent); + location = le32_to_cpu( + t->tagLocation); + brelse(bh); + } } if (ident == TAG_IDENT_AVDP && - location == last[i] - 256 - UDF_SB_SESSION(sb)) { + location == last[i] - 256 - + sbi->s_session) { lastblock = last[i]; - UDF_SB_ANCHOR(sb)[1] = last[i] - 256; + sbi->s_anchor[1] = last[i] - 256; } else { - if (last[i] < 312 + UDF_SB_SESSION(sb) || - !(bh = sb_bread(sb, last[i] - 312 - UDF_SB_SESSION(sb)))) { - ident = location = 0; - } else { - ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); - location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); - brelse(bh); + ident = location = 0; + if (last[i] >= 312 + sbi->s_session) { + bh = sb_bread(sb, + last[i] - 312 - + sbi->s_session); + if (bh) { + tag *t = (tag *) + bh->b_data; + ident = le16_to_cpu( + t->tagIdent); + location = le32_to_cpu( + t->tagLocation); + brelse(bh); + } } if (ident == TAG_IDENT_AVDP && location == udf_variable_to_fixed(last[i]) - 256) { - UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); + UDF_SET_FLAG(sb, + UDF_FLAG_VARCONV); lastblock = udf_variable_to_fixed(last[i]); - UDF_SB_ANCHOR(sb)[1] = lastblock - 256; + sbi->s_anchor[1] = lastblock - 256; } } } @@ -716,10 +849,12 @@ static void udf_find_anchor(struct super_block *sb) } if (!lastblock) { - /* We havn't found the lastblock. check 312 */ - if ((bh = sb_bread(sb, 312 + UDF_SB_SESSION(sb)))) { - ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); - location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); + /* We haven't found the lastblock. check 312 */ + bh = sb_bread(sb, 312 + sbi->s_session); + if (bh) { + tag *t = (tag *)bh->b_data; + ident = le16_to_cpu(t->tagIdent); + location = le32_to_cpu(t->tagLocation); brelse(bh); if (ident == TAG_IDENT_AVDP && location == 256) @@ -727,29 +862,33 @@ static void udf_find_anchor(struct super_block *sb) } } - for (i = 0; i < ARRAY_SIZE(UDF_SB_ANCHOR(sb)); i++) { - if (UDF_SB_ANCHOR(sb)[i]) { - if (!(bh = udf_read_tagged(sb, UDF_SB_ANCHOR(sb)[i], - UDF_SB_ANCHOR(sb)[i], &ident))) { - UDF_SB_ANCHOR(sb)[i] = 0; - } else { + for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) { + if (sbi->s_anchor[i]) { + bh = udf_read_tagged(sb, sbi->s_anchor[i], + sbi->s_anchor[i], &ident); + if (!bh) + sbi->s_anchor[i] = 0; + else { brelse(bh); if ((ident != TAG_IDENT_AVDP) && - (i || (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE))) { - UDF_SB_ANCHOR(sb)[i] = 0; - } + (i || (ident != TAG_IDENT_FE && + ident != TAG_IDENT_EFE))) + sbi->s_anchor[i] = 0; } } } - UDF_SB_LASTBLOCK(sb) = lastblock; + sbi->s_last_block = lastblock; } -static int udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr *root) +static int udf_find_fileset(struct super_block *sb, + kernel_lb_addr *fileset, + kernel_lb_addr *root) { struct buffer_head *bh = NULL; long lastblock; uint16_t ident; + struct udf_sb_info *sbi; if (fileset->logicalBlockNum != 0xFFFFFFFF || fileset->partitionReferenceNum != 0xFFFF) { @@ -764,22 +903,27 @@ static int udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, ker } - if (!bh) { /* Search backwards through the partitions */ + sbi = UDF_SB(sb); + if (!bh) { + /* Search backwards through the partitions */ kernel_lb_addr newfileset; /* --> cvg: FIXME - is it reasonable? */ return 1; - for (newfileset.partitionReferenceNum = UDF_SB_NUMPARTS(sb) - 1; + for (newfileset.partitionReferenceNum = sbi->s_partitions - 1; (newfileset.partitionReferenceNum != 0xFFFF && fileset->logicalBlockNum == 0xFFFFFFFF && fileset->partitionReferenceNum == 0xFFFF); newfileset.partitionReferenceNum--) { - lastblock = UDF_SB_PARTLEN(sb, newfileset.partitionReferenceNum); + lastblock = sbi->s_partmaps + [newfileset.partitionReferenceNum] + .s_partition_len; newfileset.logicalBlockNum = 0; do { - bh = udf_read_ptagged(sb, newfileset, 0, &ident); + bh = udf_read_ptagged(sb, newfileset, 0, + &ident); if (!bh) { newfileset.logicalBlockNum++; continue; @@ -789,11 +933,12 @@ static int udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, ker case TAG_IDENT_SBD: { struct spaceBitmapDesc *sp; - sp = (struct spaceBitmapDesc *)bh->b_data; + sp = (struct spaceBitmapDesc *) + bh->b_data; newfileset.logicalBlockNum += 1 + ((le32_to_cpu(sp->numOfBytes) + - sizeof(struct spaceBitmapDesc) - 1) - >> sb->s_blocksize_bits); + sizeof(struct spaceBitmapDesc) + - 1) >> sb->s_blocksize_bits); brelse(bh); break; } @@ -818,7 +963,7 @@ static int udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, ker fileset->logicalBlockNum, fileset->partitionReferenceNum); - UDF_SB_PARTITION(sb) = fileset->partitionReferenceNum; + sbi->s_partition = fileset->partitionReferenceNum; udf_load_fileset(sb, bh, root); brelse(bh); return 0; @@ -840,26 +985,26 @@ static void udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh) lets_to_cpu(pvoldesc->recordingDateAndTime))) { kernel_timestamp ts; ts = lets_to_cpu(pvoldesc->recordingDateAndTime); - udf_debug("recording time %ld/%ld, %04u/%02u/%02u %02u:%02u (%x)\n", + udf_debug("recording time %ld/%ld, %04u/%02u/%02u" + " %02u:%02u (%x)\n", recording, recording_usec, ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.typeAndTimezone); - UDF_SB_RECORDTIME(sb).tv_sec = recording; - UDF_SB_RECORDTIME(sb).tv_nsec = recording_usec * 1000; + UDF_SB(sb)->s_record_time.tv_sec = recording; + UDF_SB(sb)->s_record_time.tv_nsec = recording_usec * 1000; } - if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32)) { + if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32)) if (udf_CS0toUTF8(&outstr, &instr)) { - strncpy(UDF_SB_VOLIDENT(sb), outstr.u_name, + strncpy(UDF_SB(sb)->s_volume_ident, outstr.u_name, outstr.u_len > 31 ? 31 : outstr.u_len); - udf_debug("volIdent[] = '%s'\n", UDF_SB_VOLIDENT(sb)); + udf_debug("volIdent[] = '%s'\n", + UDF_SB(sb)->s_volume_ident); } - } - if (!udf_build_ustr(&instr, pvoldesc->volSetIdent, 128)) { + if (!udf_build_ustr(&instr, pvoldesc->volSetIdent, 128)) if (udf_CS0toUTF8(&outstr, &instr)) udf_debug("volSetIdent[] = '%s'\n", outstr.u_name); - } } static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, @@ -871,65 +1016,124 @@ static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, *root = lelb_to_cpu(fset->rootDirectoryICB.extLocation); - UDF_SB_SERIALNUM(sb) = le16_to_cpu(fset->descTag.tagSerialNum); + UDF_SB(sb)->s_serial_number = le16_to_cpu(fset->descTag.tagSerialNum); udf_debug("Rootdir at block=%d, partition=%d\n", root->logicalBlockNum, root->partitionReferenceNum); } +int udf_compute_nr_groups(struct super_block *sb, u32 partition) +{ + struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; + return (map->s_partition_len + + (sizeof(struct spaceBitmapDesc) << 3) + + (sb->s_blocksize * 8) - 1) / + (sb->s_blocksize * 8); +} + +static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index) +{ + struct udf_bitmap *bitmap; + int nr_groups; + int size; + + nr_groups = udf_compute_nr_groups(sb, index); + size = sizeof(struct udf_bitmap) + + (sizeof(struct buffer_head *) * nr_groups); + + if (size <= PAGE_SIZE) + bitmap = kmalloc(size, GFP_KERNEL); + else + bitmap = vmalloc(size); /* TODO: get rid of vmalloc */ + + if (bitmap == NULL) { + udf_error(sb, __FUNCTION__, + "Unable to allocate space for bitmap " + "and %d buffer_head pointers", nr_groups); + return NULL; + } + + memset(bitmap, 0x00, size); + bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1); + bitmap->s_nr_groups = nr_groups; + return bitmap; +} + static int udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) { struct partitionDesc *p; int i; + struct udf_part_map *map; + struct udf_sb_info *sbi; p = (struct partitionDesc *)bh->b_data; + sbi = UDF_SB(sb); - for (i = 0; i < UDF_SB_NUMPARTS(sb); i++) { + for (i = 0; i < sbi->s_partitions; i++) { + map = &sbi->s_partmaps[i]; udf_debug("Searching map: (%d == %d)\n", - UDF_SB_PARTMAPS(sb)[i].s_partition_num, le16_to_cpu(p->partitionNumber)); - if (UDF_SB_PARTMAPS(sb)[i].s_partition_num == le16_to_cpu(p->partitionNumber)) { - UDF_SB_PARTLEN(sb,i) = le32_to_cpu(p->partitionLength); /* blocks */ - UDF_SB_PARTROOT(sb,i) = le32_to_cpu(p->partitionStartingLocation); - if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_READ_ONLY) - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_READ_ONLY; - if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_WRITE_ONCE) - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_WRITE_ONCE; - if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_REWRITABLE) - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_REWRITABLE; - if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_OVERWRITABLE) - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_OVERWRITABLE; - - if (!strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) || - !strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03)) { + map->s_partition_num, + le16_to_cpu(p->partitionNumber)); + if (map->s_partition_num == + le16_to_cpu(p->partitionNumber)) { + map->s_partition_len = + le32_to_cpu(p->partitionLength); /* blocks */ + map->s_partition_root = + le32_to_cpu(p->partitionStartingLocation); + if (p->accessType == + cpu_to_le32(PD_ACCESS_TYPE_READ_ONLY)) + map->s_partition_flags |= + UDF_PART_FLAG_READ_ONLY; + if (p->accessType == + cpu_to_le32(PD_ACCESS_TYPE_WRITE_ONCE)) + map->s_partition_flags |= + UDF_PART_FLAG_WRITE_ONCE; + if (p->accessType == + cpu_to_le32(PD_ACCESS_TYPE_REWRITABLE)) + map->s_partition_flags |= + UDF_PART_FLAG_REWRITABLE; + if (p->accessType == + cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE)) + map->s_partition_flags |= + UDF_PART_FLAG_OVERWRITABLE; + + if (!strcmp(p->partitionContents.ident, + PD_PARTITION_CONTENTS_NSR02) || + !strcmp(p->partitionContents.ident, + PD_PARTITION_CONTENTS_NSR03)) { struct partitionHeaderDesc *phd; - phd = (struct partitionHeaderDesc *)(p->partitionContentsUse); + phd = (struct partitionHeaderDesc *) + (p->partitionContentsUse); if (phd->unallocSpaceTable.extLength) { kernel_lb_addr loc = { .logicalBlockNum = le32_to_cpu(phd->unallocSpaceTable.extPosition), .partitionReferenceNum = i, }; - UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table = + map->s_uspace.s_table = udf_iget(sb, loc); - if (!UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table) { + if (!map->s_uspace.s_table) { udf_debug("cannot load unallocSpaceTable (part %d)\n", i); return 1; } - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_UNALLOC_TABLE; + map->s_partition_flags |= + UDF_PART_FLAG_UNALLOC_TABLE; udf_debug("unallocSpaceTable (part %d) @ %ld\n", - i, UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table->i_ino); + i, map->s_uspace.s_table->i_ino); } if (phd->unallocSpaceBitmap.extLength) { - UDF_SB_ALLOC_BITMAP(sb, i, s_uspace); - if (UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap != NULL) { - UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap->s_extLength = + struct udf_bitmap *bitmap = + udf_sb_alloc_bitmap(sb, i); + map->s_uspace.s_bitmap = bitmap; + if (bitmap != NULL) { + bitmap->s_extLength = le32_to_cpu(phd->unallocSpaceBitmap.extLength); - UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap->s_extPosition = + bitmap->s_extPosition = le32_to_cpu(phd->unallocSpaceBitmap.extPosition); - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_UNALLOC_BITMAP; + map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; udf_debug("unallocSpaceBitmap (part %d) @ %d\n", - i, UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap->s_extPosition); + i, bitmap->s_extPosition); } } if (phd->partitionIntegrityTable.extLength) @@ -940,40 +1144,45 @@ static int udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) .partitionReferenceNum = i, }; - UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table = + map->s_fspace.s_table = udf_iget(sb, loc); - if (!UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table) { + if (!map->s_fspace.s_table) { udf_debug("cannot load freedSpaceTable (part %d)\n", i); return 1; } - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_FREED_TABLE; + map->s_partition_flags |= + UDF_PART_FLAG_FREED_TABLE; udf_debug("freedSpaceTable (part %d) @ %ld\n", - i, UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table->i_ino); + i, map->s_fspace.s_table->i_ino); } if (phd->freedSpaceBitmap.extLength) { - UDF_SB_ALLOC_BITMAP(sb, i, s_fspace); - if (UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap != NULL) { - UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap->s_extLength = + struct udf_bitmap *bitmap = + udf_sb_alloc_bitmap(sb, i); + map->s_fspace.s_bitmap = bitmap; + if (bitmap != NULL) { + bitmap->s_extLength = le32_to_cpu(phd->freedSpaceBitmap.extLength); - UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap->s_extPosition = + bitmap->s_extPosition = le32_to_cpu(phd->freedSpaceBitmap.extPosition); - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_FREED_BITMAP; + map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP; udf_debug("freedSpaceBitmap (part %d) @ %d\n", - i, UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap->s_extPosition); + i, bitmap->s_extPosition); } } } break; } } - if (i == UDF_SB_NUMPARTS(sb)) { + if (i == sbi->s_partitions) udf_debug("Partition (%d) not found in partition map\n", le16_to_cpu(p->partitionNumber)); - } else { - udf_debug("Partition (%d:%d type %x) starts at physical %d, block length %d\n", - le16_to_cpu(p->partitionNumber), i, UDF_SB_PARTTYPE(sb,i), - UDF_SB_PARTROOT(sb,i), UDF_SB_PARTLEN(sb,i)); - } + else + udf_debug("Partition (%d:%d type %x) starts at physical %d, " + "block length %d\n", + le16_to_cpu(p->partitionNumber), i, + map->s_partition_type, + map->s_partition_root, + map->s_partition_len); return 0; } @@ -983,70 +1192,105 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, struct logicalVolDesc *lvd; int i, j, offset; uint8_t type; + struct udf_sb_info *sbi = UDF_SB(sb); + struct genericPartitionMap *gpm; lvd = (struct logicalVolDesc *)bh->b_data; - UDF_SB_ALLOC_PARTMAPS(sb, le32_to_cpu(lvd->numPartitionMaps)); + i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); + if (i != 0) + return i; for (i = 0, offset = 0; - i < UDF_SB_NUMPARTS(sb) && offset < le32_to_cpu(lvd->mapTableLength); - i++, offset += ((struct genericPartitionMap *)&(lvd->partitionMaps[offset]))->partitionMapLength) { - type = ((struct genericPartitionMap *)&(lvd->partitionMaps[offset]))->partitionMapType; + i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength); + i++, offset += gpm->partitionMapLength) { + struct udf_part_map *map = &sbi->s_partmaps[i]; + gpm = (struct genericPartitionMap *) + &(lvd->partitionMaps[offset]); + type = gpm->partitionMapType; if (type == 1) { - struct genericPartitionMap1 *gpm1 = (struct genericPartitionMap1 *)&(lvd->partitionMaps[offset]); - UDF_SB_PARTTYPE(sb,i) = UDF_TYPE1_MAP15; - UDF_SB_PARTVSN(sb,i) = le16_to_cpu(gpm1->volSeqNum); - UDF_SB_PARTNUM(sb,i) = le16_to_cpu(gpm1->partitionNum); - UDF_SB_PARTFUNC(sb,i) = NULL; + struct genericPartitionMap1 *gpm1 = + (struct genericPartitionMap1 *)gpm; + map->s_partition_type = UDF_TYPE1_MAP15; + map->s_volumeseqnum = le16_to_cpu(gpm1->volSeqNum); + map->s_partition_num = le16_to_cpu(gpm1->partitionNum); + map->s_partition_func = NULL; } else if (type == 2) { - struct udfPartitionMap2 *upm2 = (struct udfPartitionMap2 *)&(lvd->partitionMaps[offset]); - if (!strncmp(upm2->partIdent.ident, UDF_ID_VIRTUAL, strlen(UDF_ID_VIRTUAL))) { - if (le16_to_cpu(((__le16 *)upm2->partIdent.identSuffix)[0]) == 0x0150) { - UDF_SB_PARTTYPE(sb,i) = UDF_VIRTUAL_MAP15; - UDF_SB_PARTFUNC(sb,i) = udf_get_pblock_virt15; - } else if (le16_to_cpu(((__le16 *)upm2->partIdent.identSuffix)[0]) == 0x0200) { - UDF_SB_PARTTYPE(sb,i) = UDF_VIRTUAL_MAP20; - UDF_SB_PARTFUNC(sb,i) = udf_get_pblock_virt20; + struct udfPartitionMap2 *upm2 = + (struct udfPartitionMap2 *)gpm; + if (!strncmp(upm2->partIdent.ident, UDF_ID_VIRTUAL, + strlen(UDF_ID_VIRTUAL))) { + u16 suf = + le16_to_cpu(((__le16 *)upm2->partIdent. + identSuffix)[0]); + if (suf == 0x0150) { + map->s_partition_type = + UDF_VIRTUAL_MAP15; + map->s_partition_func = + udf_get_pblock_virt15; + } else if (suf == 0x0200) { + map->s_partition_type = + UDF_VIRTUAL_MAP20; + map->s_partition_func = + udf_get_pblock_virt20; } - } else if (!strncmp(upm2->partIdent.ident, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { + } else if (!strncmp(upm2->partIdent.ident, + UDF_ID_SPARABLE, + strlen(UDF_ID_SPARABLE))) { uint32_t loc; uint16_t ident; struct sparingTable *st; - struct sparablePartitionMap *spm = (struct sparablePartitionMap *)&(lvd->partitionMaps[offset]); + struct sparablePartitionMap *spm = + (struct sparablePartitionMap *)gpm; - UDF_SB_PARTTYPE(sb,i) = UDF_SPARABLE_MAP15; - UDF_SB_TYPESPAR(sb,i).s_packet_len = le16_to_cpu(spm->packetLength); + map->s_partition_type = UDF_SPARABLE_MAP15; + map->s_type_specific.s_sparing.s_packet_len = + le16_to_cpu(spm->packetLength); for (j = 0; j < spm->numSparingTables; j++) { - loc = le32_to_cpu(spm->locSparingTable[j]); - UDF_SB_TYPESPAR(sb,i).s_spar_map[j] = - udf_read_tagged(sb, loc, loc, &ident); - if (UDF_SB_TYPESPAR(sb,i).s_spar_map[j] != NULL) { - st = (struct sparingTable *)UDF_SB_TYPESPAR(sb,i).s_spar_map[j]->b_data; - if (ident != 0 || - strncmp(st->sparingIdent.ident, UDF_ID_SPARING, strlen(UDF_ID_SPARING))) { - brelse(UDF_SB_TYPESPAR(sb,i).s_spar_map[j]); - UDF_SB_TYPESPAR(sb,i).s_spar_map[j] = NULL; + struct buffer_head *bh2; + + loc = le32_to_cpu( + spm->locSparingTable[j]); + bh2 = udf_read_tagged(sb, loc, loc, + &ident); + map->s_type_specific.s_sparing. + s_spar_map[j] = bh2; + + if (bh2 != NULL) { + st = (struct sparingTable *) + bh2->b_data; + if (ident != 0 || strncmp( + st->sparingIdent.ident, + UDF_ID_SPARING, + strlen(UDF_ID_SPARING))) { + brelse(bh2); + map->s_type_specific. + s_sparing. + s_spar_map[j] = + NULL; } } } - UDF_SB_PARTFUNC(sb,i) = udf_get_pblock_spar15; + map->s_partition_func = udf_get_pblock_spar15; } else { - udf_debug("Unknown ident: %s\n", upm2->partIdent.ident); + udf_debug("Unknown ident: %s\n", + upm2->partIdent.ident); continue; } - UDF_SB_PARTVSN(sb,i) = le16_to_cpu(upm2->volSeqNum); - UDF_SB_PARTNUM(sb,i) = le16_to_cpu(upm2->partitionNum); + map->s_volumeseqnum = le16_to_cpu(upm2->volSeqNum); + map->s_partition_num = le16_to_cpu(upm2->partitionNum); } udf_debug("Partition (%d:%d) type %d on volume %d\n", - i, UDF_SB_PARTNUM(sb,i), type, UDF_SB_PARTVSN(sb,i)); + i, map->s_partition_num, type, + map->s_volumeseqnum); } if (fileset) { long_ad *la = (long_ad *)&(lvd->logicalVolContentsUse[0]); *fileset = lelb_to_cpu(la->extLocation); - udf_debug("FileSet found in LogicalVolDesc at block=%d, partition=%d\n", - fileset->logicalBlockNum, + udf_debug("FileSet found in LogicalVolDesc at block=%d, " + "partition=%d\n", fileset->logicalBlockNum, fileset->partitionReferenceNum); } if (lvd->integritySeqExt.extLength) @@ -1063,22 +1307,26 @@ static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc) { struct buffer_head *bh = NULL; uint16_t ident; + struct udf_sb_info *sbi = UDF_SB(sb); + struct logicalVolIntegrityDesc *lvid; while (loc.extLength > 0 && (bh = udf_read_tagged(sb, loc.extLocation, loc.extLocation, &ident)) && ident == TAG_IDENT_LVID) { - UDF_SB_LVIDBH(sb) = bh; + sbi->s_lvid_bh = bh; + lvid = (struct logicalVolIntegrityDesc *)bh->b_data; - if (UDF_SB_LVID(sb)->nextIntegrityExt.extLength) - udf_load_logicalvolint(sb, leea_to_cpu(UDF_SB_LVID(sb)->nextIntegrityExt)); + if (lvid->nextIntegrityExt.extLength) + udf_load_logicalvolint(sb, + leea_to_cpu(lvid->nextIntegrityExt)); - if (UDF_SB_LVIDBH(sb) != bh) + if (sbi->s_lvid_bh != bh) brelse(bh); loc.extLength -= sb->s_blocksize; loc.extLocation++; } - if (UDF_SB_LVIDBH(sb) != bh) + if (sbi->s_lvid_bh != bh) brelse(bh); } @@ -1097,11 +1345,12 @@ static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc) * July 1, 1997 - Andrew E. Mileski * Written, tested, and released. */ -static int udf_process_sequence(struct super_block *sb, long block, long lastblock, - kernel_lb_addr *fileset) +static int udf_process_sequence(struct super_block *sb, long block, + long lastblock, kernel_lb_addr *fileset) { struct buffer_head *bh = NULL; struct udf_vds_record vds[VDS_POS_LENGTH]; + struct udf_vds_record *curr; struct generic_desc *gd; struct volDescPtr *vdp; int done = 0; @@ -1124,43 +1373,51 @@ static int udf_process_sequence(struct super_block *sb, long block, long lastblo vdsn = le32_to_cpu(gd->volDescSeqNum); switch (ident) { case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ - if (vdsn >= vds[VDS_POS_PRIMARY_VOL_DESC].volDescSeqNum) { - vds[VDS_POS_PRIMARY_VOL_DESC].volDescSeqNum = vdsn; - vds[VDS_POS_PRIMARY_VOL_DESC].block = block; + curr = &vds[VDS_POS_PRIMARY_VOL_DESC]; + if (vdsn >= curr->volDescSeqNum) { + curr->volDescSeqNum = vdsn; + curr->block = block; } break; case TAG_IDENT_VDP: /* ISO 13346 3/10.3 */ - if (vdsn >= vds[VDS_POS_VOL_DESC_PTR].volDescSeqNum) { - vds[VDS_POS_VOL_DESC_PTR].volDescSeqNum = vdsn; - vds[VDS_POS_VOL_DESC_PTR].block = block; + curr = &vds[VDS_POS_VOL_DESC_PTR]; + if (vdsn >= curr->volDescSeqNum) { + curr->volDescSeqNum = vdsn; + curr->block = block; vdp = (struct volDescPtr *)bh->b_data; - next_s = le32_to_cpu(vdp->nextVolDescSeqExt.extLocation); - next_e = le32_to_cpu(vdp->nextVolDescSeqExt.extLength); + next_s = le32_to_cpu( + vdp->nextVolDescSeqExt.extLocation); + next_e = le32_to_cpu( + vdp->nextVolDescSeqExt.extLength); next_e = next_e >> sb->s_blocksize_bits; next_e += next_s; } break; case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ - if (vdsn >= vds[VDS_POS_IMP_USE_VOL_DESC].volDescSeqNum) { - vds[VDS_POS_IMP_USE_VOL_DESC].volDescSeqNum = vdsn; - vds[VDS_POS_IMP_USE_VOL_DESC].block = block; + curr = &vds[VDS_POS_IMP_USE_VOL_DESC]; + if (vdsn >= curr->volDescSeqNum) { + curr->volDescSeqNum = vdsn; + curr->block = block; } break; case TAG_IDENT_PD: /* ISO 13346 3/10.5 */ - if (!vds[VDS_POS_PARTITION_DESC].block) - vds[VDS_POS_PARTITION_DESC].block = block; + curr = &vds[VDS_POS_PARTITION_DESC]; + if (!curr->block) + curr->block = block; break; case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ - if (vdsn >= vds[VDS_POS_LOGICAL_VOL_DESC].volDescSeqNum) { - vds[VDS_POS_LOGICAL_VOL_DESC].volDescSeqNum = vdsn; - vds[VDS_POS_LOGICAL_VOL_DESC].block = block; + curr = &vds[VDS_POS_LOGICAL_VOL_DESC]; + if (vdsn >= curr->volDescSeqNum) { + curr->volDescSeqNum = vdsn; + curr->block = block; } break; case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ - if (vdsn >= vds[VDS_POS_UNALLOC_SPACE_DESC].volDescSeqNum) { - vds[VDS_POS_UNALLOC_SPACE_DESC].volDescSeqNum = vdsn; - vds[VDS_POS_UNALLOC_SPACE_DESC].block = block; + curr = &vds[VDS_POS_UNALLOC_SPACE_DESC]; + if (vdsn >= curr->volDescSeqNum) { + curr->volDescSeqNum = vdsn; + curr->block = block; } break; case TAG_IDENT_TD: /* ISO 13346 3/10.9 */ @@ -1169,32 +1426,38 @@ static int udf_process_sequence(struct super_block *sb, long block, long lastblo block = next_s; lastblock = next_e; next_s = next_e = 0; - } else { + } else done = 1; - } break; } brelse(bh); } for (i = 0; i < VDS_POS_LENGTH; i++) { if (vds[i].block) { - bh = udf_read_tagged(sb, vds[i].block, vds[i].block, &ident); + bh = udf_read_tagged(sb, vds[i].block, vds[i].block, + &ident); if (i == VDS_POS_PRIMARY_VOL_DESC) { udf_load_pvoldesc(sb, bh); } else if (i == VDS_POS_LOGICAL_VOL_DESC) { - udf_load_logicalvol(sb, bh, fileset); + if (udf_load_logicalvol(sb, bh, fileset)) { + brelse(bh); + return 1; + } } else if (i == VDS_POS_PARTITION_DESC) { struct buffer_head *bh2 = NULL; if (udf_load_partdesc(sb, bh)) { brelse(bh); return 1; } - for (j = vds[i].block + 1; j < vds[VDS_POS_TERMINATING_DESC].block; j++) { + for (j = vds[i].block + 1; + j < vds[VDS_POS_TERMINATING_DESC].block; + j++) { bh2 = udf_read_tagged(sb, j, j, &ident); gd = (struct generic_desc *)bh2->b_data; if (ident == TAG_IDENT_PD) - if (udf_load_partdesc(sb, bh2)) { + if (udf_load_partdesc(sb, + bh2)) { brelse(bh); brelse(bh2); return 1; @@ -1222,14 +1485,17 @@ static int udf_check_valid(struct super_block *sb, int novrs, int silent) } /* Check that it is NSR02 compliant */ /* Process any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */ - else if ((block = udf_vrs(sb, silent)) == -1) { - udf_debug("Failed to read byte 32768. Assuming open disc. " - "Skipping validity check\n"); - if (!UDF_SB_LASTBLOCK(sb)) - UDF_SB_LASTBLOCK(sb) = udf_get_last_block(sb); - return 0; - } else { - return !block; + else { + block = udf_vrs(sb, silent); + if (block == -1) { + struct udf_sb_info *sbi = UDF_SB(sb); + udf_debug("Failed to read byte 32768. Assuming open " + "disc. Skipping validity check\n"); + if (!sbi->s_last_block) + sbi->s_last_block = udf_get_last_block(sb); + return 0; + } else + return !block; } } @@ -1240,100 +1506,121 @@ static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) struct buffer_head *bh; long main_s, main_e, reserve_s, reserve_e; int i, j; + struct udf_sb_info *sbi; if (!sb) return 1; + sbi = UDF_SB(sb); - for (i = 0; i < ARRAY_SIZE(UDF_SB_ANCHOR(sb)); i++) { - if (UDF_SB_ANCHOR(sb)[i] && - (bh = udf_read_tagged(sb, UDF_SB_ANCHOR(sb)[i], - UDF_SB_ANCHOR(sb)[i], &ident))) { - anchor = (struct anchorVolDescPtr *)bh->b_data; + for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) { + if (!sbi->s_anchor[i]) + continue; + bh = udf_read_tagged(sb, sbi->s_anchor[i], sbi->s_anchor[i], + &ident); + if (!bh) + continue; - /* Locate the main sequence */ - main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation); - main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength ); - main_e = main_e >> sb->s_blocksize_bits; - main_e += main_s; + anchor = (struct anchorVolDescPtr *)bh->b_data; - /* Locate the reserve sequence */ - reserve_s = le32_to_cpu(anchor->reserveVolDescSeqExt.extLocation); - reserve_e = le32_to_cpu(anchor->reserveVolDescSeqExt.extLength); - reserve_e = reserve_e >> sb->s_blocksize_bits; - reserve_e += reserve_s; + /* Locate the main sequence */ + main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation); + main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength); + main_e = main_e >> sb->s_blocksize_bits; + main_e += main_s; - brelse(bh); + /* Locate the reserve sequence */ + reserve_s = le32_to_cpu( + anchor->reserveVolDescSeqExt.extLocation); + reserve_e = le32_to_cpu( + anchor->reserveVolDescSeqExt.extLength); + reserve_e = reserve_e >> sb->s_blocksize_bits; + reserve_e += reserve_s; - /* Process the main & reserve sequences */ - /* responsible for finding the PartitionDesc(s) */ - if (!(udf_process_sequence(sb, main_s, main_e, fileset) && - udf_process_sequence(sb, reserve_s, reserve_e, fileset))) { - break; - } - } + brelse(bh); + + /* Process the main & reserve sequences */ + /* responsible for finding the PartitionDesc(s) */ + if (!(udf_process_sequence(sb, main_s, main_e, + fileset) && + udf_process_sequence(sb, reserve_s, reserve_e, + fileset))) + break; } - if (i == ARRAY_SIZE(UDF_SB_ANCHOR(sb))) { + if (i == ARRAY_SIZE(sbi->s_anchor)) { udf_debug("No Anchor block found\n"); return 1; - } else - udf_debug("Using anchor in block %d\n", UDF_SB_ANCHOR(sb)[i]); + } + udf_debug("Using anchor in block %d\n", sbi->s_anchor[i]); - for (i = 0; i < UDF_SB_NUMPARTS(sb); i++) { + for (i = 0; i < sbi->s_partitions; i++) { kernel_lb_addr uninitialized_var(ino); - switch (UDF_SB_PARTTYPE(sb, i)) { + struct udf_part_map *map = &sbi->s_partmaps[i]; + switch (map->s_partition_type) { case UDF_VIRTUAL_MAP15: case UDF_VIRTUAL_MAP20: - if (!UDF_SB_LASTBLOCK(sb)) { - UDF_SB_LASTBLOCK(sb) = udf_get_last_block(sb); + if (!sbi->s_last_block) { + sbi->s_last_block = udf_get_last_block(sb); udf_find_anchor(sb); } - if (!UDF_SB_LASTBLOCK(sb)) { + if (!sbi->s_last_block) { udf_debug("Unable to determine Lastblock (For " "Virtual Partition)\n"); return 1; } - for (j = 0; j < UDF_SB_NUMPARTS(sb); j++) { + for (j = 0; j < sbi->s_partitions; j++) { + struct udf_part_map *map2 = &sbi->s_partmaps[j]; if (j != i && - UDF_SB_PARTVSN(sb, i) == UDF_SB_PARTVSN(sb, j) && - UDF_SB_PARTNUM(sb, i) == UDF_SB_PARTNUM(sb, j)) { + map->s_volumeseqnum == + map2->s_volumeseqnum && + map->s_partition_num == + map2->s_partition_num) { ino.partitionReferenceNum = j; - ino.logicalBlockNum = UDF_SB_LASTBLOCK(sb) - UDF_SB_PARTROOT(sb, j); + ino.logicalBlockNum = + sbi->s_last_block - + map2->s_partition_root; break; } } - if (j == UDF_SB_NUMPARTS(sb)) + if (j == sbi->s_partitions) return 1; - if (!(UDF_SB_VAT(sb) = udf_iget(sb, ino))) + sbi->s_vat_inode = udf_iget(sb, ino); + if (!sbi->s_vat_inode) return 1; - if (UDF_SB_PARTTYPE(sb, i) == UDF_VIRTUAL_MAP15) { - UDF_SB_TYPEVIRT(sb, i).s_start_offset = - udf_ext0_offset(UDF_SB_VAT(sb)); - UDF_SB_TYPEVIRT(sb, i).s_num_entries = - (UDF_SB_VAT(sb)->i_size - 36) >> 2; - } else if (UDF_SB_PARTTYPE(sb, i) == UDF_VIRTUAL_MAP20) { - struct buffer_head *bh = NULL; + if (map->s_partition_type == UDF_VIRTUAL_MAP15) { + map->s_type_specific.s_virtual.s_start_offset = + udf_ext0_offset(sbi->s_vat_inode); + map->s_type_specific.s_virtual.s_num_entries = + (sbi->s_vat_inode->i_size - 36) >> 2; + } else if (map->s_partition_type == UDF_VIRTUAL_MAP20) { uint32_t pos; + struct virtualAllocationTable20 *vat20; - pos = udf_block_map(UDF_SB_VAT(sb), 0); + pos = udf_block_map(sbi->s_vat_inode, 0); bh = sb_bread(sb, pos); if (!bh) return 1; - UDF_SB_TYPEVIRT(sb, i).s_start_offset = - le16_to_cpu(((struct virtualAllocationTable20 *)bh->b_data + - udf_ext0_offset(UDF_SB_VAT(sb)))->lengthHeader) + - udf_ext0_offset(UDF_SB_VAT(sb)); - UDF_SB_TYPEVIRT(sb, i).s_num_entries = (UDF_SB_VAT(sb)->i_size - - UDF_SB_TYPEVIRT(sb, i).s_start_offset) >> 2; + vat20 = (struct virtualAllocationTable20 *) + bh->b_data + + udf_ext0_offset(sbi->s_vat_inode); + map->s_type_specific.s_virtual.s_start_offset = + le16_to_cpu(vat20->lengthHeader) + + udf_ext0_offset(sbi->s_vat_inode); + map->s_type_specific.s_virtual.s_num_entries = + (sbi->s_vat_inode->i_size - + map->s_type_specific.s_virtual. + s_start_offset) >> 2; brelse(bh); } - UDF_SB_PARTROOT(sb, i) = udf_get_pblock(sb, 0, i, 0); - UDF_SB_PARTLEN(sb, i) = UDF_SB_PARTLEN(sb, ino.partitionReferenceNum); + map->s_partition_root = udf_get_pblock(sb, 0, i, 0); + map->s_partition_len = + sbi->s_partmaps[ino.partitionReferenceNum]. + s_partition_len; } } return 0; @@ -1341,62 +1628,86 @@ static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) static void udf_open_lvid(struct super_block *sb) { - if (UDF_SB_LVIDBH(sb)) { - int i; + struct udf_sb_info *sbi = UDF_SB(sb); + struct buffer_head *bh = sbi->s_lvid_bh; + if (bh) { kernel_timestamp cpu_time; + struct logicalVolIntegrityDesc *lvid = + (struct logicalVolIntegrityDesc *)bh->b_data; + struct logicalVolIntegrityDescImpUse *lvidiu = + udf_sb_lvidiu(sbi); - UDF_SB_LVIDIU(sb)->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; - UDF_SB_LVIDIU(sb)->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; + lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; + lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; if (udf_time_to_stamp(&cpu_time, CURRENT_TIME)) - UDF_SB_LVID(sb)->recordingDateAndTime = cpu_to_lets(cpu_time); - UDF_SB_LVID(sb)->integrityType = LVID_INTEGRITY_TYPE_OPEN; - - UDF_SB_LVID(sb)->descTag.descCRC = cpu_to_le16(udf_crc((char *)UDF_SB_LVID(sb) + sizeof(tag), - le16_to_cpu(UDF_SB_LVID(sb)->descTag.descCRCLength), 0)); + lvid->recordingDateAndTime = cpu_to_lets(cpu_time); + lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN; - UDF_SB_LVID(sb)->descTag.tagChecksum = 0; - for (i = 0; i < 16; i++) - if (i != 4) - UDF_SB_LVID(sb)->descTag.tagChecksum += - ((uint8_t *) &(UDF_SB_LVID(sb)->descTag))[i]; + lvid->descTag.descCRC = cpu_to_le16( + udf_crc((char *)lvid + sizeof(tag), + le16_to_cpu(lvid->descTag.descCRCLength), + 0)); - mark_buffer_dirty(UDF_SB_LVIDBH(sb)); + lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); + mark_buffer_dirty(bh); } } static void udf_close_lvid(struct super_block *sb) { kernel_timestamp cpu_time; - int i; + struct udf_sb_info *sbi = UDF_SB(sb); + struct buffer_head *bh = sbi->s_lvid_bh; + struct logicalVolIntegrityDesc *lvid; + + if (!bh) + return; + + lvid = (struct logicalVolIntegrityDesc *)bh->b_data; - if (UDF_SB_LVIDBH(sb) && - UDF_SB_LVID(sb)->integrityType == LVID_INTEGRITY_TYPE_OPEN) { - UDF_SB_LVIDIU(sb)->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; - UDF_SB_LVIDIU(sb)->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; + if (lvid->integrityType == LVID_INTEGRITY_TYPE_OPEN) { + struct logicalVolIntegrityDescImpUse *lvidiu = + udf_sb_lvidiu(sbi); + lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; + lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; if (udf_time_to_stamp(&cpu_time, CURRENT_TIME)) - UDF_SB_LVID(sb)->recordingDateAndTime = cpu_to_lets(cpu_time); - if (UDF_MAX_WRITE_VERSION > le16_to_cpu(UDF_SB_LVIDIU(sb)->maxUDFWriteRev)) - UDF_SB_LVIDIU(sb)->maxUDFWriteRev = cpu_to_le16(UDF_MAX_WRITE_VERSION); - if (UDF_SB_UDFREV(sb) > le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev)) - UDF_SB_LVIDIU(sb)->minUDFReadRev = cpu_to_le16(UDF_SB_UDFREV(sb)); - if (UDF_SB_UDFREV(sb) > le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFWriteRev)) - UDF_SB_LVIDIU(sb)->minUDFWriteRev = cpu_to_le16(UDF_SB_UDFREV(sb)); - UDF_SB_LVID(sb)->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); - - UDF_SB_LVID(sb)->descTag.descCRC = - cpu_to_le16(udf_crc((char *)UDF_SB_LVID(sb) + sizeof(tag), - le16_to_cpu(UDF_SB_LVID(sb)->descTag.descCRCLength), 0)); - - UDF_SB_LVID(sb)->descTag.tagChecksum = 0; - for (i = 0; i < 16; i++) - if (i != 4) - UDF_SB_LVID(sb)->descTag.tagChecksum += - ((uint8_t *)&(UDF_SB_LVID(sb)->descTag))[i]; - - mark_buffer_dirty(UDF_SB_LVIDBH(sb)); + lvid->recordingDateAndTime = cpu_to_lets(cpu_time); + if (UDF_MAX_WRITE_VERSION > le16_to_cpu(lvidiu->maxUDFWriteRev)) + lvidiu->maxUDFWriteRev = + cpu_to_le16(UDF_MAX_WRITE_VERSION); + if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFReadRev)) + lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev); + if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev)) + lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev); + lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); + + lvid->descTag.descCRC = cpu_to_le16( + udf_crc((char *)lvid + sizeof(tag), + le16_to_cpu(lvid->descTag.descCRCLength), + 0)); + + lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); + mark_buffer_dirty(bh); } } +static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) +{ + int i; + int nr_groups = bitmap->s_nr_groups; + int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) * + nr_groups); + + for (i = 0; i < nr_groups; i++) + if (bitmap->s_block_bitmap[i]) + brelse(bitmap->s_block_bitmap[i]); + + if (size <= PAGE_SIZE) + kfree(bitmap); + else + vfree(bitmap); +} + /* * udf_read_super * @@ -1426,16 +1737,15 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) uopt.gid = -1; uopt.umask = 0; - sbi = kmalloc(sizeof(struct udf_sb_info), GFP_KERNEL); + sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; - memset(UDF_SB(sb), 0x00, sizeof(struct udf_sb_info)); mutex_init(&sbi->s_alloc_mutex); - if (!udf_parse_options((char *)options, &uopt)) + if (!udf_parse_options((char *)options, &uopt, false)) goto error_out; if (uopt.flags & (1 << UDF_FLAG_UTF8) && @@ -1459,30 +1769,31 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) fileset.logicalBlockNum = 0xFFFFFFFF; fileset.partitionReferenceNum = 0xFFFF; - UDF_SB(sb)->s_flags = uopt.flags; - UDF_SB(sb)->s_uid = uopt.uid; - UDF_SB(sb)->s_gid = uopt.gid; - UDF_SB(sb)->s_umask = uopt.umask; - UDF_SB(sb)->s_nls_map = uopt.nls_map; + sbi->s_flags = uopt.flags; + sbi->s_uid = uopt.uid; + sbi->s_gid = uopt.gid; + sbi->s_umask = uopt.umask; + sbi->s_nls_map = uopt.nls_map; /* Set the block size for all transfers */ if (!udf_set_blocksize(sb, uopt.blocksize)) goto error_out; if (uopt.session == 0xFFFFFFFF) - UDF_SB_SESSION(sb) = udf_get_last_session(sb); + sbi->s_session = udf_get_last_session(sb); else - UDF_SB_SESSION(sb) = uopt.session; + sbi->s_session = uopt.session; - udf_debug("Multi-session=%d\n", UDF_SB_SESSION(sb)); + udf_debug("Multi-session=%d\n", sbi->s_session); - UDF_SB_LASTBLOCK(sb) = uopt.lastblock; - UDF_SB_ANCHOR(sb)[0] = UDF_SB_ANCHOR(sb)[1] = 0; - UDF_SB_ANCHOR(sb)[2] = uopt.anchor; - UDF_SB_ANCHOR(sb)[3] = 256; + sbi->s_last_block = uopt.lastblock; + sbi->s_anchor[0] = sbi->s_anchor[1] = 0; + sbi->s_anchor[2] = uopt.anchor; + sbi->s_anchor[3] = 256; - if (udf_check_valid(sb, uopt.novrs, silent)) { /* read volume recognition sequences */ - printk("UDF-fs: No VRS found\n"); + if (udf_check_valid(sb, uopt.novrs, silent)) { + /* read volume recognition sequences */ + printk(KERN_WARNING "UDF-fs: No VRS found\n"); goto error_out; } @@ -1496,27 +1807,30 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) sb->s_time_gran = 1000; if (udf_load_partition(sb, &fileset)) { - printk("UDF-fs: No partition found (1)\n"); + printk(KERN_WARNING "UDF-fs: No partition found (1)\n"); goto error_out; } - udf_debug("Lastblock=%d\n", UDF_SB_LASTBLOCK(sb)); + udf_debug("Lastblock=%d\n", sbi->s_last_block); - if (UDF_SB_LVIDBH(sb)) { - uint16_t minUDFReadRev = le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev); - uint16_t minUDFWriteRev = le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFWriteRev); - /* uint16_t maxUDFWriteRev = le16_to_cpu(UDF_SB_LVIDIU(sb)->maxUDFWriteRev); */ + if (sbi->s_lvid_bh) { + struct logicalVolIntegrityDescImpUse *lvidiu = + udf_sb_lvidiu(sbi); + uint16_t minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev); + uint16_t minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev); + /* uint16_t maxUDFWriteRev = + le16_to_cpu(lvidiu->maxUDFWriteRev); */ if (minUDFReadRev > UDF_MAX_READ_VERSION) { - printk("UDF-fs: minUDFReadRev=%x (max is %x)\n", - le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev), + printk(KERN_ERR "UDF-fs: minUDFReadRev=%x " + "(max is %x)\n", + le16_to_cpu(lvidiu->minUDFReadRev), UDF_MAX_READ_VERSION); goto error_out; - } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) { + } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) sb->s_flags |= MS_RDONLY; - } - UDF_SB_UDFREV(sb) = minUDFWriteRev; + sbi->s_udfrev = minUDFWriteRev; if (minUDFReadRev >= UDF_VERS_USE_EXTENDED_FE) UDF_SET_FLAG(sb, UDF_FLAG_USE_EXTENDED_FE); @@ -1524,29 +1838,30 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) UDF_SET_FLAG(sb, UDF_FLAG_USE_STREAMS); } - if (!UDF_SB_NUMPARTS(sb)) { - printk("UDF-fs: No partition found (2)\n"); + if (!sbi->s_partitions) { + printk(KERN_WARNING "UDF-fs: No partition found (2)\n"); goto error_out; } - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_READ_ONLY) { - printk("UDF-fs: Partition marked readonly; forcing readonly mount\n"); + if (sbi->s_partmaps[sbi->s_partition].s_partition_flags & + UDF_PART_FLAG_READ_ONLY) { + printk(KERN_NOTICE "UDF-fs: Partition marked readonly; " + "forcing readonly mount\n"); sb->s_flags |= MS_RDONLY; } if (udf_find_fileset(sb, &fileset, &rootdir)) { - printk("UDF-fs: No fileset found\n"); + printk(KERN_WARNING "UDF-fs: No fileset found\n"); goto error_out; } if (!silent) { kernel_timestamp ts; - udf_time_to_stamp(&ts, UDF_SB_RECORDTIME(sb)); - udf_info("UDF %s (%s) Mounting volume '%s', " + udf_time_to_stamp(&ts, sbi->s_record_time); + udf_info("UDF: Mounting volume '%s', " "timestamp %04u/%02u/%02u %02u:%02u (%x)\n", - UDFFS_VERSION, UDFFS_DATE, - UDF_SB_VOLIDENT(sb), ts.year, ts.month, ts.day, ts.hour, ts.minute, - ts.typeAndTimezone); + sbi->s_volume_ident, ts.year, ts.month, ts.day, + ts.hour, ts.minute, ts.typeAndTimezone); } if (!(sb->s_flags & MS_RDONLY)) udf_open_lvid(sb); @@ -1556,7 +1871,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) /* perhaps it's not extensible enough, but for now ... */ inode = udf_iget(sb, rootdir); if (!inode) { - printk("UDF-fs: Error in udf_iget, block=%d, partition=%d\n", + printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, " + "partition=%d\n", rootdir.logicalBlockNum, rootdir.partitionReferenceNum); goto error_out; } @@ -1564,7 +1880,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) /* Allocate a dentry for the root inode */ sb->s_root = d_alloc_root(inode); if (!sb->s_root) { - printk("UDF-fs: Couldn't allocate root dentry\n"); + printk(KERN_ERR "UDF-fs: Couldn't allocate root dentry\n"); iput(inode); goto error_out; } @@ -1572,30 +1888,32 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) return 0; error_out: - if (UDF_SB_VAT(sb)) - iput(UDF_SB_VAT(sb)); - if (UDF_SB_NUMPARTS(sb)) { - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_TABLE) - iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_table); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_TABLE) - iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_table); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP) - UDF_SB_FREE_BITMAP(sb,UDF_SB_PARTITION(sb), s_uspace); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP) - UDF_SB_FREE_BITMAP(sb,UDF_SB_PARTITION(sb), s_fspace); - if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15) { + if (sbi->s_vat_inode) + iput(sbi->s_vat_inode); + if (sbi->s_partitions) { + struct udf_part_map *map = &sbi->s_partmaps[sbi->s_partition]; + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) + iput(map->s_uspace.s_table); + if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) + iput(map->s_fspace.s_table); + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) + udf_sb_free_bitmap(map->s_uspace.s_bitmap); + if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) + udf_sb_free_bitmap(map->s_fspace.s_bitmap); + if (map->s_partition_type == UDF_SPARABLE_MAP15) for (i = 0; i < 4; i++) - brelse(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]); - } + brelse(map->s_type_specific.s_sparing. + s_spar_map[i]); } #ifdef CONFIG_UDF_NLS if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) - unload_nls(UDF_SB(sb)->s_nls_map); + unload_nls(sbi->s_nls_map); #endif if (!(sb->s_flags & MS_RDONLY)) udf_close_lvid(sb); - brelse(UDF_SB_LVIDBH(sb)); - UDF_SB_FREE(sb); + brelse(sbi->s_lvid_bh); + + kfree(sbi->s_partmaps); kfree(sbi); sb->s_fs_info = NULL; @@ -1614,7 +1932,7 @@ void udf_error(struct super_block *sb, const char *function, va_start(args, fmt); vsnprintf(error_buf, sizeof(error_buf), fmt, args); va_end(args); - printk (KERN_CRIT "UDF-fs error (device %s): %s: %s\n", + printk(KERN_CRIT "UDF-fs error (device %s): %s: %s\n", sb->s_id, function, error_buf); } @@ -1646,31 +1964,34 @@ void udf_warning(struct super_block *sb, const char *function, static void udf_put_super(struct super_block *sb) { int i; + struct udf_sb_info *sbi; - if (UDF_SB_VAT(sb)) - iput(UDF_SB_VAT(sb)); - if (UDF_SB_NUMPARTS(sb)) { - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_TABLE) - iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_table); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_TABLE) - iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_table); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP) - UDF_SB_FREE_BITMAP(sb,UDF_SB_PARTITION(sb), s_uspace); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP) - UDF_SB_FREE_BITMAP(sb,UDF_SB_PARTITION(sb), s_fspace); - if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15) { + sbi = UDF_SB(sb); + if (sbi->s_vat_inode) + iput(sbi->s_vat_inode); + if (sbi->s_partitions) { + struct udf_part_map *map = &sbi->s_partmaps[sbi->s_partition]; + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) + iput(map->s_uspace.s_table); + if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) + iput(map->s_fspace.s_table); + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) + udf_sb_free_bitmap(map->s_uspace.s_bitmap); + if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) + udf_sb_free_bitmap(map->s_fspace.s_bitmap); + if (map->s_partition_type == UDF_SPARABLE_MAP15) for (i = 0; i < 4; i++) - brelse(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]); - } + brelse(map->s_type_specific.s_sparing. + s_spar_map[i]); } #ifdef CONFIG_UDF_NLS if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) - unload_nls(UDF_SB(sb)->s_nls_map); + unload_nls(sbi->s_nls_map); #endif if (!(sb->s_flags & MS_RDONLY)) udf_close_lvid(sb); - brelse(UDF_SB_LVIDBH(sb)); - UDF_SB_FREE(sb); + brelse(sbi->s_lvid_bh); + kfree(sbi->s_partmaps); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } @@ -1691,15 +2012,22 @@ static void udf_put_super(struct super_block *sb) static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; + struct udf_sb_info *sbi = UDF_SB(sb); + struct logicalVolIntegrityDescImpUse *lvidiu; + + if (sbi->s_lvid_bh != NULL) + lvidiu = udf_sb_lvidiu(sbi); + else + lvidiu = NULL; buf->f_type = UDF_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = UDF_SB_PARTLEN(sb, UDF_SB_PARTITION(sb)); + buf->f_blocks = sbi->s_partmaps[sbi->s_partition].s_partition_len; buf->f_bfree = udf_count_free(sb); buf->f_bavail = buf->f_bfree; - buf->f_files = (UDF_SB_LVIDBH(sb) ? - (le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) + - le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs)) : 0) + buf->f_bfree; + buf->f_files = (lvidiu != NULL ? (le32_to_cpu(lvidiu->numFiles) + + le32_to_cpu(lvidiu->numDirs)) : 0) + + buf->f_bfree; buf->f_ffree = buf->f_bfree; /* __kernel_fsid_t f_fsid */ buf->f_namelen = UDF_NAME_LEN - 2; @@ -1711,7 +2039,8 @@ static unsigned char udf_bitmap_lookup[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; -static unsigned int udf_count_free_bitmap(struct super_block *sb, struct udf_bitmap *bitmap) +static unsigned int udf_count_free_bitmap(struct super_block *sb, + struct udf_bitmap *bitmap) { struct buffer_head *bh = NULL; unsigned int accum = 0; @@ -1727,7 +2056,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb, struct udf_bit lock_kernel(); loc.logicalBlockNum = bitmap->s_extPosition; - loc.partitionReferenceNum = UDF_SB_PARTITION(sb); + loc.partitionReferenceNum = UDF_SB(sb)->s_partition; bh = udf_read_ptagged(sb, loc, 0, &ident); if (!bh) { @@ -1772,7 +2101,8 @@ out: return accum; } -static unsigned int udf_count_free_table(struct super_block *sb, struct inode *table) +static unsigned int udf_count_free_table(struct super_block *sb, + struct inode *table) { unsigned int accum = 0; uint32_t elen; @@ -1782,13 +2112,13 @@ static unsigned int udf_count_free_table(struct super_block *sb, struct inode *t lock_kernel(); - epos.block = UDF_I_LOCATION(table); + epos.block = UDF_I(table)->i_location; epos.offset = sizeof(struct unallocSpaceEntry); epos.bh = NULL; - while ((etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { + while ((etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) accum += (elen >> table->i_sb->s_blocksize_bits); - } + brelse(epos.bh); unlock_kernel(); @@ -1799,10 +2129,17 @@ static unsigned int udf_count_free_table(struct super_block *sb, struct inode *t static unsigned int udf_count_free(struct super_block *sb) { unsigned int accum = 0; - - if (UDF_SB_LVIDBH(sb)) { - if (le32_to_cpu(UDF_SB_LVID(sb)->numOfPartitions) > UDF_SB_PARTITION(sb)) { - accum = le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]); + struct udf_sb_info *sbi; + struct udf_part_map *map; + + sbi = UDF_SB(sb); + if (sbi->s_lvid_bh) { + struct logicalVolIntegrityDesc *lvid = + (struct logicalVolIntegrityDesc *) + sbi->s_lvid_bh->b_data; + if (le32_to_cpu(lvid->numOfPartitions) > sbi->s_partition) { + accum = le32_to_cpu( + lvid->freeSpaceTable[sbi->s_partition]); if (accum == 0xFFFFFFFF) accum = 0; } @@ -1811,24 +2148,25 @@ static unsigned int udf_count_free(struct super_block *sb) if (accum) return accum; - if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP) { + map = &sbi->s_partmaps[sbi->s_partition]; + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { accum += udf_count_free_bitmap(sb, - UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_bitmap); + map->s_uspace.s_bitmap); } - if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP) { + if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) { accum += udf_count_free_bitmap(sb, - UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_bitmap); + map->s_fspace.s_bitmap); } if (accum) return accum; - if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_TABLE) { + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) { accum += udf_count_free_table(sb, - UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_table); + map->s_uspace.s_table); } - if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_TABLE) { + if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) { accum += udf_count_free_table(sb, - UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_table); + map->s_fspace.s_table); } return accum; diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index e6f933dd6a7b..6ec99221e50c 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -33,7 +33,8 @@ #include <linux/buffer_head.h> #include "udf_i.h" -static void udf_pc_to_char(struct super_block *sb, char *from, int fromlen, char *to) +static void udf_pc_to_char(struct super_block *sb, char *from, int fromlen, + char *to) { struct pathComponent *pc; int elen = 0; @@ -78,10 +79,12 @@ static int udf_symlink_filler(struct file *file, struct page *page) char *symlink; int err = -EIO; char *p = kmap(page); + struct udf_inode_info *iinfo; lock_kernel(); - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) { - symlink = UDF_I_DATA(inode) + UDF_I_LENEATTR(inode); + iinfo = UDF_I(inode); + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { + symlink = iinfo->i_ext.i_data + iinfo->i_lenEAttr; } else { bh = sb_bread(inode->i_sb, udf_block_map(inode, 0)); diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 7fc3912885a5..fe61be17cdab 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -74,17 +74,18 @@ void udf_truncate_tail_extent(struct inode *inode) uint64_t lbcount = 0; int8_t etype = -1, netype; int adsize; + struct udf_inode_info *iinfo = UDF_I(inode); - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB || - inode->i_size == UDF_I_LENEXTENTS(inode)) + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB || + inode->i_size == iinfo->i_lenExtents) return; /* Are we going to delete the file anyway? */ if (inode->i_nlink == 0) return; - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) + else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); else BUG(); @@ -117,7 +118,7 @@ void udf_truncate_tail_extent(struct inode *inode) } /* This inode entry is in-memory only and thus we don't have to mark * the inode dirty */ - UDF_I_LENEXTENTS(inode) = inode->i_size; + iinfo->i_lenExtents = inode->i_size; brelse(epos.bh); } @@ -129,19 +130,20 @@ void udf_discard_prealloc(struct inode *inode) uint64_t lbcount = 0; int8_t etype = -1, netype; int adsize; + struct udf_inode_info *iinfo = UDF_I(inode); - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB || - inode->i_size == UDF_I_LENEXTENTS(inode)) + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB || + inode->i_size == iinfo->i_lenExtents) return; - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) + else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); else adsize = 0; - epos.block = UDF_I_LOCATION(inode); + epos.block = iinfo->i_location; /* Find the last extent in the file */ while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { @@ -153,8 +155,9 @@ void udf_discard_prealloc(struct inode *inode) lbcount -= elen; extent_trunc(inode, &epos, eloc, etype, elen, 0); if (!epos.bh) { - UDF_I_LENALLOC(inode) = - epos.offset - udf_file_entry_alloc_offset(inode); + iinfo->i_lenAlloc = + epos.offset - + udf_file_entry_alloc_offset(inode); mark_inode_dirty(inode); } else { struct allocExtDesc *aed = @@ -163,7 +166,7 @@ void udf_discard_prealloc(struct inode *inode) cpu_to_le32(epos.offset - sizeof(struct allocExtDesc)); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || - UDF_SB_UDFREV(inode->i_sb) >= 0x0201) + UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) udf_update_tag(epos.bh->b_data, epos.offset); else udf_update_tag(epos.bh->b_data, @@ -173,7 +176,7 @@ void udf_discard_prealloc(struct inode *inode) } /* This inode entry is in-memory only and thus we don't have to mark * the inode dirty */ - UDF_I_LENEXTENTS(inode) = lbcount; + iinfo->i_lenExtents = lbcount; brelse(epos.bh); } @@ -184,13 +187,15 @@ void udf_truncate_extents(struct inode *inode) uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc; int8_t etype; struct super_block *sb = inode->i_sb; + struct udf_sb_info *sbi = UDF_SB(sb); sector_t first_block = inode->i_size >> sb->s_blocksize_bits, offset; loff_t byte_offset; int adsize; + struct udf_inode_info *iinfo = UDF_I(inode); - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(short_ad); - else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) + else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); else BUG(); @@ -212,7 +217,8 @@ void udf_truncate_extents(struct inode *inode) else lenalloc -= sizeof(struct allocExtDesc); - while ((etype = udf_current_aext(inode, &epos, &eloc, &elen, 0)) != -1) { + while ((etype = udf_current_aext(inode, &epos, &eloc, + &elen, 0)) != -1) { if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { udf_write_aext(inode, &epos, neloc, nelen, 0); if (indirect_ext_len) { @@ -224,35 +230,43 @@ void udf_truncate_extents(struct inode *inode) 0, indirect_ext_len); } else { if (!epos.bh) { - UDF_I_LENALLOC(inode) = lenalloc; + iinfo->i_lenAlloc = + lenalloc; mark_inode_dirty(inode); } else { struct allocExtDesc *aed = - (struct allocExtDesc *)(epos.bh->b_data); + (struct allocExtDesc *) + (epos.bh->b_data); + int len = + sizeof(struct allocExtDesc); + aed->lengthAllocDescs = cpu_to_le32(lenalloc); - if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || - UDF_SB_UDFREV(sb) >= 0x0201) - udf_update_tag(epos.bh->b_data, - lenalloc + - sizeof(struct allocExtDesc)); - else - udf_update_tag(epos.bh->b_data, - sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(epos.bh, inode); + if (!UDF_QUERY_FLAG(sb, + UDF_FLAG_STRICT) || + sbi->s_udfrev >= 0x0201) + len += lenalloc; + + udf_update_tag(epos.bh->b_data, + len); + mark_buffer_dirty_inode( + epos.bh, inode); } } brelse(epos.bh); epos.offset = sizeof(struct allocExtDesc); epos.block = eloc; - epos.bh = udf_tread(sb, udf_get_lb_pblock(sb, eloc, 0)); + epos.bh = udf_tread(sb, + udf_get_lb_pblock(sb, eloc, 0)); if (elen) - indirect_ext_len = (elen + sb->s_blocksize -1) >> + indirect_ext_len = + (elen + sb->s_blocksize - 1) >> sb->s_blocksize_bits; else indirect_ext_len = 1; } else { - extent_trunc(inode, &epos, eloc, etype, elen, 0); + extent_trunc(inode, &epos, eloc, etype, + elen, 0); epos.offset += adsize; } } @@ -264,19 +278,20 @@ void udf_truncate_extents(struct inode *inode) indirect_ext_len); } else { if (!epos.bh) { - UDF_I_LENALLOC(inode) = lenalloc; + iinfo->i_lenAlloc = lenalloc; mark_inode_dirty(inode); } else { struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data); aed->lengthAllocDescs = cpu_to_le32(lenalloc); if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || - UDF_SB_UDFREV(sb) >= 0x0201) + sbi->s_udfrev >= 0x0201) udf_update_tag(epos.bh->b_data, - lenalloc + sizeof(struct allocExtDesc)); + lenalloc + + sizeof(struct allocExtDesc)); else udf_update_tag(epos.bh->b_data, - sizeof(struct allocExtDesc)); + sizeof(struct allocExtDesc)); mark_buffer_dirty_inode(epos.bh, inode); } } @@ -290,13 +305,16 @@ void udf_truncate_extents(struct inode *inode) * extending the file by 'offset' blocks. */ if ((!epos.bh && - epos.offset == udf_file_entry_alloc_offset(inode)) || - (epos.bh && epos.offset == sizeof(struct allocExtDesc))) { + epos.offset == + udf_file_entry_alloc_offset(inode)) || + (epos.bh && epos.offset == + sizeof(struct allocExtDesc))) { /* File has no extents at all or has empty last * indirect extent! Create a fake extent... */ extent.extLocation.logicalBlockNum = 0; extent.extLocation.partitionReferenceNum = 0; - extent.extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; + extent.extLength = + EXT_NOT_RECORDED_NOT_ALLOCATED; } else { epos.offset -= adsize; etype = udf_next_aext(inode, &epos, @@ -305,10 +323,12 @@ void udf_truncate_extents(struct inode *inode) extent.extLength |= etype << 30; } udf_extend_file(inode, &epos, &extent, - offset + ((inode->i_size & (sb->s_blocksize - 1)) != 0)); + offset + + ((inode->i_size & + (sb->s_blocksize - 1)) != 0)); } } - UDF_I_LENEXTENTS(inode) = inode->i_size; + iinfo->i_lenExtents = inode->i_size; brelse(epos.bh); } diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index d7dbe6f3ba0c..ccc52f16bf7d 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h @@ -7,20 +7,4 @@ static inline struct udf_inode_info *UDF_I(struct inode *inode) return list_entry(inode, struct udf_inode_info, vfs_inode); } -#define UDF_I_LOCATION(X) ( UDF_I(X)->i_location ) -#define UDF_I_LENEATTR(X) ( UDF_I(X)->i_lenEAttr ) -#define UDF_I_LENALLOC(X) ( UDF_I(X)->i_lenAlloc ) -#define UDF_I_LENEXTENTS(X) ( UDF_I(X)->i_lenExtents ) -#define UDF_I_UNIQUE(X) ( UDF_I(X)->i_unique ) -#define UDF_I_ALLOCTYPE(X) ( UDF_I(X)->i_alloc_type ) -#define UDF_I_EFE(X) ( UDF_I(X)->i_efe ) -#define UDF_I_USE(X) ( UDF_I(X)->i_use ) -#define UDF_I_STRAT4096(X) ( UDF_I(X)->i_strat4096 ) -#define UDF_I_NEXT_ALLOC_BLOCK(X) ( UDF_I(X)->i_next_alloc_block ) -#define UDF_I_NEXT_ALLOC_GOAL(X) ( UDF_I(X)->i_next_alloc_goal ) -#define UDF_I_CRTIME(X) ( UDF_I(X)->i_crtime ) -#define UDF_I_SAD(X) ( UDF_I(X)->i_ext.i_sad ) -#define UDF_I_LAD(X) ( UDF_I(X)->i_ext.i_lad ) -#define UDF_I_DATA(X) ( UDF_I(X)->i_ext.i_data ) - #endif /* !defined(_LINUX_UDF_I_H) */ diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 3c2982017c6d..737d1c604eea 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -26,6 +26,8 @@ #define UDF_FLAG_GID_IGNORE 14 #define UDF_FLAG_UID_SET 15 #define UDF_FLAG_GID_SET 16 +#define UDF_FLAG_SESSION_SET 17 +#define UDF_FLAG_LASTBLOCK_SET 18 #define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001 #define UDF_PART_FLAG_UNALLOC_TABLE 0x0002 @@ -41,96 +43,12 @@ static inline struct udf_sb_info *UDF_SB(struct super_block *sb) return sb->s_fs_info; } -#define UDF_SB_FREE(X)\ -{\ - if (UDF_SB(X)) {\ - kfree(UDF_SB_PARTMAPS(X));\ - UDF_SB_PARTMAPS(X) = NULL;\ - }\ -} - -#define UDF_SB_ALLOC_PARTMAPS(X,Y)\ -{\ - UDF_SB_PARTMAPS(X) = kmalloc(sizeof(struct udf_part_map) * Y, GFP_KERNEL);\ - if (UDF_SB_PARTMAPS(X) != NULL) {\ - UDF_SB_NUMPARTS(X) = Y;\ - memset(UDF_SB_PARTMAPS(X), 0x00, sizeof(struct udf_part_map) * Y);\ - } else {\ - UDF_SB_NUMPARTS(X) = 0;\ - udf_error(X, __FUNCTION__, "Unable to allocate space for %d partition maps", Y);\ - }\ -} - -#define UDF_SB_ALLOC_BITMAP(X,Y,Z)\ -{\ - int nr_groups = ((UDF_SB_PARTLEN((X),(Y)) + (sizeof(struct spaceBitmapDesc) << 3) +\ - ((X)->s_blocksize * 8) - 1) / ((X)->s_blocksize * 8));\ - int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) * nr_groups);\ - if (size <= PAGE_SIZE)\ - UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap = kmalloc(size, GFP_KERNEL);\ - else\ - UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap = vmalloc(size);\ - if (UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap != NULL) {\ - memset(UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap, 0x00, size);\ - UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap->s_block_bitmap =\ - (struct buffer_head **)(UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap + 1);\ - UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap->s_nr_groups = nr_groups;\ - } else {\ - udf_error(X, __FUNCTION__, "Unable to allocate space for bitmap and %d buffer_head pointers", nr_groups);\ - }\ -} +struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi); -#define UDF_SB_FREE_BITMAP(X,Y,Z)\ -{\ - int i;\ - int nr_groups = UDF_SB_BITMAP_NR_GROUPS(X,Y,Z);\ - int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) * nr_groups);\ - for (i = 0; i < nr_groups; i++) {\ - if (UDF_SB_BITMAP(X,Y,Z,i))\ - brelse(UDF_SB_BITMAP(X,Y,Z,i));\ - }\ - if (size <= PAGE_SIZE)\ - kfree(UDF_SB_PARTMAPS(X)[Y].Z.s_bitmap);\ - else\ - vfree(UDF_SB_PARTMAPS(X)[Y].Z.s_bitmap);\ -} +int udf_compute_nr_groups(struct super_block *sb, u32 partition); #define UDF_QUERY_FLAG(X,Y) ( UDF_SB(X)->s_flags & ( 1 << (Y) ) ) #define UDF_SET_FLAG(X,Y) ( UDF_SB(X)->s_flags |= ( 1 << (Y) ) ) #define UDF_CLEAR_FLAG(X,Y) ( UDF_SB(X)->s_flags &= ~( 1 << (Y) ) ) -#define UDF_UPDATE_UDFREV(X,Y) ( ((Y) > UDF_SB_UDFREV(X)) ? UDF_SB_UDFREV(X) = (Y) : UDF_SB_UDFREV(X) ) - -#define UDF_SB_PARTMAPS(X) ( UDF_SB(X)->s_partmaps ) -#define UDF_SB_PARTTYPE(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_partition_type ) -#define UDF_SB_PARTROOT(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_partition_root ) -#define UDF_SB_PARTLEN(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_partition_len ) -#define UDF_SB_PARTVSN(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_volumeseqnum ) -#define UDF_SB_PARTNUM(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_partition_num ) -#define UDF_SB_TYPESPAR(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_type_specific.s_sparing ) -#define UDF_SB_TYPEVIRT(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_type_specific.s_virtual ) -#define UDF_SB_PARTFUNC(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_partition_func ) -#define UDF_SB_PARTFLAGS(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_partition_flags ) -#define UDF_SB_BITMAP(X,Y,Z,I) ( UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap->s_block_bitmap[I] ) -#define UDF_SB_BITMAP_NR_GROUPS(X,Y,Z) ( UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap->s_nr_groups ) - -#define UDF_SB_VOLIDENT(X) ( UDF_SB(X)->s_volident ) -#define UDF_SB_NUMPARTS(X) ( UDF_SB(X)->s_partitions ) -#define UDF_SB_PARTITION(X) ( UDF_SB(X)->s_partition ) -#define UDF_SB_SESSION(X) ( UDF_SB(X)->s_session ) -#define UDF_SB_ANCHOR(X) ( UDF_SB(X)->s_anchor ) -#define UDF_SB_LASTBLOCK(X) ( UDF_SB(X)->s_lastblock ) -#define UDF_SB_LVIDBH(X) ( UDF_SB(X)->s_lvidbh ) -#define UDF_SB_LVID(X) ( (struct logicalVolIntegrityDesc *)UDF_SB_LVIDBH(X)->b_data ) -#define UDF_SB_LVIDIU(X) ( (struct logicalVolIntegrityDescImpUse *)&(UDF_SB_LVID(X)->impUse[le32_to_cpu(UDF_SB_LVID(X)->numOfPartitions) * 2 * sizeof(uint32_t)/sizeof(uint8_t)]) ) - -#define UDF_SB_UMASK(X) ( UDF_SB(X)->s_umask ) -#define UDF_SB_GID(X) ( UDF_SB(X)->s_gid ) -#define UDF_SB_UID(X) ( UDF_SB(X)->s_uid ) -#define UDF_SB_RECORDTIME(X) ( UDF_SB(X)->s_recordtime ) -#define UDF_SB_SERIALNUM(X) ( UDF_SB(X)->s_serialnum ) -#define UDF_SB_UDFREV(X) ( UDF_SB(X)->s_udfrev ) -#define UDF_SB_FLAGS(X) ( UDF_SB(X)->s_flags ) -#define UDF_SB_VAT(X) ( UDF_SB(X)->s_vat ) - #endif /* __LINUX_UDF_SB_H */ diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index c8016cc9e7e6..681dc2b66cdb 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -24,18 +24,21 @@ #define UDF_PATH_LEN 1023 #define udf_file_entry_alloc_offset(inode)\ - (UDF_I_USE(inode) ?\ + (UDF_I(inode)->i_use ?\ sizeof(struct unallocSpaceEntry) :\ - ((UDF_I_EFE(inode) ?\ + ((UDF_I(inode)->i_efe ?\ sizeof(struct extendedFileEntry) :\ - sizeof(struct fileEntry)) + UDF_I_LENEATTR(inode))) + sizeof(struct fileEntry)) + UDF_I(inode)->i_lenEAttr)) #define udf_ext0_offset(inode)\ - (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB ?\ + (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB ?\ udf_file_entry_alloc_offset(inode) : 0) #define udf_get_lb_pblock(sb,loc,offset) udf_get_pblock((sb), (loc).logicalBlockNum, (loc).partitionReferenceNum, (offset)) +/* computes tag checksum */ +u8 udf_tag_checksum(const tag *t); + struct dentry; struct inode; struct task_struct; @@ -185,8 +188,8 @@ extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *, sector_t *); extern struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset); -extern long_ad *udf_get_filelongad(uint8_t *, int, int *, int); -extern short_ad *udf_get_fileshortad(uint8_t *, int, int *, int); +extern long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int); +extern short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int); /* crc.c */ extern uint16_t udf_crc(uint8_t *, uint32_t, uint16_t); diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index adcb87c2da7e..ce595732ba6f 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c @@ -18,8 +18,10 @@ Boston, MA 02111-1307, USA. */ /* - * dgb 10/02/98: ripped this from glibc source to help convert timestamps to unix time - * 10/04/98: added new table-based lookup after seeing how ugly the gnu code is + * dgb 10/02/98: ripped this from glibc source to help convert timestamps + * to unix time + * 10/04/98: added new table-based lookup after seeing how ugly + * the gnu code is * blf 09/27/99: ripped out all the old code and inserted new table from * John Brockmeyer (without leap second corrections) * rewrote udf_stamp_to_time and fixed timezone accounting in @@ -55,27 +57,27 @@ static const unsigned short int __mon_yday[2][13] = { #define MAX_YEAR_SECONDS 69 #define SPD 0x15180 /*3600*24 */ -#define SPY(y,l,s) (SPD * (365*y+l)+s) - -static time_t year_seconds[MAX_YEAR_SECONDS]= { -/*1970*/ SPY( 0, 0,0), SPY( 1, 0,0), SPY( 2, 0,0), SPY( 3, 1,0), -/*1974*/ SPY( 4, 1,0), SPY( 5, 1,0), SPY( 6, 1,0), SPY( 7, 2,0), -/*1978*/ SPY( 8, 2,0), SPY( 9, 2,0), SPY(10, 2,0), SPY(11, 3,0), -/*1982*/ SPY(12, 3,0), SPY(13, 3,0), SPY(14, 3,0), SPY(15, 4,0), -/*1986*/ SPY(16, 4,0), SPY(17, 4,0), SPY(18, 4,0), SPY(19, 5,0), -/*1990*/ SPY(20, 5,0), SPY(21, 5,0), SPY(22, 5,0), SPY(23, 6,0), -/*1994*/ SPY(24, 6,0), SPY(25, 6,0), SPY(26, 6,0), SPY(27, 7,0), -/*1998*/ SPY(28, 7,0), SPY(29, 7,0), SPY(30, 7,0), SPY(31, 8,0), -/*2002*/ SPY(32, 8,0), SPY(33, 8,0), SPY(34, 8,0), SPY(35, 9,0), -/*2006*/ SPY(36, 9,0), SPY(37, 9,0), SPY(38, 9,0), SPY(39,10,0), -/*2010*/ SPY(40,10,0), SPY(41,10,0), SPY(42,10,0), SPY(43,11,0), -/*2014*/ SPY(44,11,0), SPY(45,11,0), SPY(46,11,0), SPY(47,12,0), -/*2018*/ SPY(48,12,0), SPY(49,12,0), SPY(50,12,0), SPY(51,13,0), -/*2022*/ SPY(52,13,0), SPY(53,13,0), SPY(54,13,0), SPY(55,14,0), -/*2026*/ SPY(56,14,0), SPY(57,14,0), SPY(58,14,0), SPY(59,15,0), -/*2030*/ SPY(60,15,0), SPY(61,15,0), SPY(62,15,0), SPY(63,16,0), -/*2034*/ SPY(64,16,0), SPY(65,16,0), SPY(66,16,0), SPY(67,17,0), -/*2038*/ SPY(68,17,0) +#define SPY(y, l, s) (SPD * (365 * y + l) + s) + +static time_t year_seconds[MAX_YEAR_SECONDS] = { +/*1970*/ SPY(0, 0, 0), SPY(1, 0, 0), SPY(2, 0, 0), SPY(3, 1, 0), +/*1974*/ SPY(4, 1, 0), SPY(5, 1, 0), SPY(6, 1, 0), SPY(7, 2, 0), +/*1978*/ SPY(8, 2, 0), SPY(9, 2, 0), SPY(10, 2, 0), SPY(11, 3, 0), +/*1982*/ SPY(12, 3, 0), SPY(13, 3, 0), SPY(14, 3, 0), SPY(15, 4, 0), +/*1986*/ SPY(16, 4, 0), SPY(17, 4, 0), SPY(18, 4, 0), SPY(19, 5, 0), +/*1990*/ SPY(20, 5, 0), SPY(21, 5, 0), SPY(22, 5, 0), SPY(23, 6, 0), +/*1994*/ SPY(24, 6, 0), SPY(25, 6, 0), SPY(26, 6, 0), SPY(27, 7, 0), +/*1998*/ SPY(28, 7, 0), SPY(29, 7, 0), SPY(30, 7, 0), SPY(31, 8, 0), +/*2002*/ SPY(32, 8, 0), SPY(33, 8, 0), SPY(34, 8, 0), SPY(35, 9, 0), +/*2006*/ SPY(36, 9, 0), SPY(37, 9, 0), SPY(38, 9, 0), SPY(39, 10, 0), +/*2010*/ SPY(40, 10, 0), SPY(41, 10, 0), SPY(42, 10, 0), SPY(43, 11, 0), +/*2014*/ SPY(44, 11, 0), SPY(45, 11, 0), SPY(46, 11, 0), SPY(47, 12, 0), +/*2018*/ SPY(48, 12, 0), SPY(49, 12, 0), SPY(50, 12, 0), SPY(51, 13, 0), +/*2022*/ SPY(52, 13, 0), SPY(53, 13, 0), SPY(54, 13, 0), SPY(55, 14, 0), +/*2026*/ SPY(56, 14, 0), SPY(57, 14, 0), SPY(58, 14, 0), SPY(59, 15, 0), +/*2030*/ SPY(60, 15, 0), SPY(61, 15, 0), SPY(62, 15, 0), SPY(63, 16, 0), +/*2034*/ SPY(64, 16, 0), SPY(65, 16, 0), SPY(66, 16, 0), SPY(67, 17, 0), +/*2038*/ SPY(68, 17, 0) }; extern struct timezone sys_tz; @@ -115,7 +117,7 @@ time_t *udf_stamp_to_time(time_t *dest, long *dest_usec, kernel_timestamp src) return dest; } -kernel_timestamp *udf_time_to_stamp(kernel_timestamp * dest, struct timespec ts) +kernel_timestamp *udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts) { long int days, rem, y; const unsigned short int *ip; @@ -137,7 +139,7 @@ kernel_timestamp *udf_time_to_stamp(kernel_timestamp * dest, struct timespec ts) dest->second = rem % 60; y = 1970; -#define DIV(a,b) ((a) / (b) - ((a) % (b) < 0)) +#define DIV(a, b) ((a) / (b) - ((a) % (b) < 0)) #define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) while (days < 0 || days >= (__isleap(y) ? 366 : 365)) { @@ -145,8 +147,8 @@ kernel_timestamp *udf_time_to_stamp(kernel_timestamp * dest, struct timespec ts) /* Adjust DAYS and Y to match the guessed year. */ days -= ((yg - y) * 365 - + LEAPS_THRU_END_OF (yg - 1) - - LEAPS_THRU_END_OF (y - 1)); + + LEAPS_THRU_END_OF(yg - 1) + - LEAPS_THRU_END_OF(y - 1)); y = yg; } dest->year = y; @@ -158,7 +160,8 @@ kernel_timestamp *udf_time_to_stamp(kernel_timestamp * dest, struct timespec ts) dest->day = days + 1; dest->centiseconds = ts.tv_nsec / 10000000; - dest->hundredsOfMicroseconds = (ts.tv_nsec / 1000 - dest->centiseconds * 10000) / 100; + dest->hundredsOfMicroseconds = (ts.tv_nsec / 1000 - + dest->centiseconds * 10000) / 100; dest->microseconds = (ts.tv_nsec / 1000 - dest->centiseconds * 10000 - dest->hundredsOfMicroseconds * 100); return dest; diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 9e6099c26c27..e533b11703bf 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -136,12 +136,18 @@ int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i) if (c < 0x80U) { utf_o->u_name[utf_o->u_len++] = (uint8_t)c; } else if (c < 0x800U) { - utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6)); - utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f)); + utf_o->u_name[utf_o->u_len++] = + (uint8_t)(0xc0 | (c >> 6)); + utf_o->u_name[utf_o->u_len++] = + (uint8_t)(0x80 | (c & 0x3f)); } else { - utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 | (c >> 12)); - utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | ((c >> 6) & 0x3f)); - utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f)); + utf_o->u_name[utf_o->u_len++] = + (uint8_t)(0xe0 | (c >> 12)); + utf_o->u_name[utf_o->u_len++] = + (uint8_t)(0x80 | + ((c >> 6) & 0x3f)); + utf_o->u_name[utf_o->u_len++] = + (uint8_t)(0x80 | (c & 0x3f)); } } utf_o->u_cmpID = 8; @@ -232,9 +238,8 @@ try_again: goto error_out; } - if (max_val == 0xffffU) { + if (max_val == 0xffffU) ocu[++u_len] = (uint8_t)(utf_char >> 8); - } ocu[++u_len] = (uint8_t)(utf_char & 0xffU); } @@ -330,29 +335,29 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, struct ustr filename, unifilename; int len; - if (udf_build_ustr_exact(&unifilename, sname, flen)) { + if (udf_build_ustr_exact(&unifilename, sname, flen)) return 0; - } if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { if (!udf_CS0toUTF8(&filename, &unifilename)) { - udf_debug("Failed in udf_get_filename: sname = %s\n", sname); + udf_debug("Failed in udf_get_filename: sname = %s\n", + sname); return 0; } } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { - if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, &unifilename)) { - udf_debug("Failed in udf_get_filename: sname = %s\n", sname); + if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, + &unifilename)) { + udf_debug("Failed in udf_get_filename: sname = %s\n", + sname); return 0; } - } else { + } else return 0; - } len = udf_translate_to_linux(dname, filename.u_name, filename.u_len, unifilename.u_name, unifilename.u_len); - if (len) { + if (len) return len; - } return 0; } @@ -363,23 +368,20 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, struct ustr unifilename; int namelen; - if (!(udf_char_to_ustr(&unifilename, sname, flen))) { + if (!udf_char_to_ustr(&unifilename, sname, flen)) return 0; - } if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { namelen = udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN); - if (!namelen) { + if (!namelen) return 0; - } } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { - namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, &unifilename, UDF_NAME_LEN); - if (!namelen) { + namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, + &unifilename, UDF_NAME_LEN); + if (!namelen) return 0; - } - } else { + } else return 0; - } return namelen; } @@ -389,8 +391,9 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, #define CRC_MARK '#' #define EXT_SIZE 5 -static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen, - uint8_t *fidName, int fidNameLen) +static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, + int udfLen, uint8_t *fidName, + int fidNameLen) { int index, newIndex = 0, needsCRC = 0; int extIndex = 0, newExtIndex = 0, hasExt = 0; @@ -409,13 +412,16 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen if (curr == '/' || curr == 0) { needsCRC = 1; curr = ILLEGAL_CHAR_MARK; - while (index + 1 < udfLen && (udfName[index + 1] == '/' || - udfName[index + 1] == 0)) + while (index + 1 < udfLen && + (udfName[index + 1] == '/' || + udfName[index + 1] == 0)) index++; - } if (curr == EXT_MARK && (udfLen - index - 1) <= EXT_SIZE) { - if (udfLen == index + 1) { + } + if (curr == EXT_MARK && + (udfLen - index - 1) <= EXT_SIZE) { + if (udfLen == index + 1) hasExt = 0; - } else { + else { hasExt = 1; extIndex = index; newExtIndex = newIndex; @@ -433,16 +439,18 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen if (hasExt) { int maxFilenameLen; - for(index = 0; index < EXT_SIZE && extIndex + index + 1 < udfLen; index++) { + for (index = 0; + index < EXT_SIZE && extIndex + index + 1 < udfLen; + index++) { curr = udfName[extIndex + index + 1]; if (curr == '/' || curr == 0) { needsCRC = 1; curr = ILLEGAL_CHAR_MARK; - while(extIndex + index + 2 < udfLen && - (index + 1 < EXT_SIZE - && (udfName[extIndex + index + 2] == '/' || - udfName[extIndex + index + 2] == 0))) + while (extIndex + index + 2 < udfLen && + (index + 1 < EXT_SIZE && + (udfName[extIndex + index + 2] == '/' || + udfName[extIndex + index + 2] == 0))) index++; } ext[localExtIndex++] = curr; @@ -452,9 +460,8 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen newIndex = maxFilenameLen; else newIndex = newExtIndex; - } else if (newIndex > 250) { + } else if (newIndex > 250) newIndex = 250; - } newName[newIndex++] = CRC_MARK; valueCRC = udf_crc(fidName, fidNameLen, 0); newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index f63a09ce8683..1fca381f0ce2 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -9,7 +9,6 @@ */ #include <linux/fs.h> -#include <linux/ufs_fs.h> #include <linux/stat.h> #include <linux/time.h> #include <linux/string.h> @@ -19,6 +18,7 @@ #include <linux/bitops.h> #include <asm/byteorder.h> +#include "ufs_fs.h" #include "ufs.h" #include "swab.h" #include "util.h" diff --git a/fs/ufs/cylinder.c b/fs/ufs/cylinder.c index 2a815665644f..b4676322ddb6 100644 --- a/fs/ufs/cylinder.c +++ b/fs/ufs/cylinder.c @@ -9,7 +9,6 @@ */ #include <linux/fs.h> -#include <linux/ufs_fs.h> #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> @@ -17,6 +16,7 @@ #include <asm/byteorder.h> +#include "ufs_fs.h" #include "ufs.h" #include "swab.h" #include "util.h" diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index aaf2878305ce..ef563fc8d72c 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -18,9 +18,9 @@ #include <linux/time.h> #include <linux/fs.h> -#include <linux/ufs_fs.h> #include <linux/swap.h> +#include "ufs_fs.h" #include "ufs.h" #include "swab.h" #include "util.h" diff --git a/fs/ufs/file.c b/fs/ufs/file.c index a46c97bf023f..625ef17c6f83 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -24,9 +24,9 @@ */ #include <linux/fs.h> -#include <linux/ufs_fs.h> #include <linux/buffer_head.h> /* for sync_mapping_buffers() */ +#include "ufs_fs.h" #include "ufs.h" diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 7e260bc0d94f..ac181f6806a3 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -24,7 +24,6 @@ */ #include <linux/fs.h> -#include <linux/ufs_fs.h> #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> @@ -34,6 +33,7 @@ #include <linux/bitops.h> #include <asm/byteorder.h> +#include "ufs_fs.h" #include "ufs.h" #include "swab.h" #include "util.h" diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 4320782761ae..5446b888fc8e 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -30,7 +30,6 @@ #include <linux/errno.h> #include <linux/fs.h> -#include <linux/ufs_fs.h> #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> @@ -38,6 +37,7 @@ #include <linux/smp_lock.h> #include <linux/buffer_head.h> +#include "ufs_fs.h" #include "ufs.h" #include "swab.h" #include "util.h" @@ -714,26 +714,30 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) return 0; } -void ufs_read_inode(struct inode * inode) +struct inode *ufs_iget(struct super_block *sb, unsigned long ino) { - struct ufs_inode_info *ufsi = UFS_I(inode); - struct super_block * sb; - struct ufs_sb_private_info * uspi; + struct ufs_inode_info *ufsi; + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct buffer_head * bh; + struct inode *inode; int err; - UFSD("ENTER, ino %lu\n", inode->i_ino); - - sb = inode->i_sb; - uspi = UFS_SB(sb)->s_uspi; + UFSD("ENTER, ino %lu\n", ino); - if (inode->i_ino < UFS_ROOTINO || - inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) { + if (ino < UFS_ROOTINO || ino > (uspi->s_ncg * uspi->s_ipg)) { ufs_warning(sb, "ufs_read_inode", "bad inode number (%lu)\n", - inode->i_ino); - goto bad_inode; + ino); + return ERR_PTR(-EIO); } + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ufsi = UFS_I(inode); + bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino)); if (!bh) { ufs_warning(sb, "ufs_read_inode", "unable to read inode %lu\n", @@ -765,10 +769,12 @@ void ufs_read_inode(struct inode * inode) brelse(bh); UFSD("EXIT\n"); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); + iget_failed(inode); + return ERR_PTR(-EIO); } static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index d8bfbee2fe2b..e3a9b1fac75a 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -29,8 +29,9 @@ #include <linux/time.h> #include <linux/fs.h> -#include <linux/ufs_fs.h> #include <linux/smp_lock.h> + +#include "ufs_fs.h" #include "ufs.h" #include "util.h" @@ -57,10 +58,10 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru lock_kernel(); ino = ufs_inode_by_name(dir, dentry); if (ino) { - inode = iget(dir->i_sb, ino); - if (!inode) { + inode = ufs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) { unlock_kernel(); - return ERR_PTR(-EACCES); + return ERR_CAST(inode); } } unlock_kernel(); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 0072cb33ebec..85b22b5977fa 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -76,7 +76,6 @@ #include <linux/errno.h> #include <linux/fs.h> -#include <linux/ufs_fs.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/stat.h> @@ -91,6 +90,7 @@ #include <linux/mount.h> #include <linux/seq_file.h> +#include "ufs_fs.h" #include "ufs.h" #include "swab.h" #include "util.h" @@ -131,6 +131,8 @@ static void ufs_print_super_stuff(struct super_block *sb, printk(KERN_INFO" cs_nffree(Num of free frags): %llu\n", (unsigned long long) fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree)); + printk(KERN_INFO" fs_maxsymlinklen: %u\n", + fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen)); } else { printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno)); printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno)); @@ -633,6 +635,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) unsigned block_size, super_block_size; unsigned flags; unsigned super_block_offset; + int ret = -EINVAL; uspi = NULL; ubh = NULL; @@ -1060,17 +1063,21 @@ magic_found: uspi->s_bpf = uspi->s_fsize << 3; uspi->s_bpfshift = uspi->s_fshift + 3; uspi->s_bpfmask = uspi->s_bpf - 1; - if ((sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) == - UFS_MOUNT_UFSTYPE_44BSD) + if ((sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) == UFS_MOUNT_UFSTYPE_44BSD || + (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) == UFS_MOUNT_UFSTYPE_UFS2) uspi->s_maxsymlinklen = fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen); - inode = iget(sb, UFS_ROOTINO); - if (!inode || is_bad_inode(inode)) + inode = ufs_iget(sb, UFS_ROOTINO); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); goto failed; + } sb->s_root = d_alloc_root(inode); - if (!sb->s_root) + if (!sb->s_root) { + ret = -ENOMEM; goto dalloc_failed; + } ufs_setup_cstotal(sb); /* @@ -1092,7 +1099,7 @@ failed: kfree(sbi); sb->s_fs_info = NULL; UFSD("EXIT (FAILED)\n"); - return -EINVAL; + return ret; failed_nomem: UFSD("EXIT (NOMEM)\n"); @@ -1326,7 +1333,6 @@ static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, static const struct super_operations ufs_super_ops = { .alloc_inode = ufs_alloc_inode, .destroy_inode = ufs_destroy_inode, - .read_inode = ufs_read_inode, .write_inode = ufs_write_inode, .delete_inode = ufs_delete_inode, .put_super = ufs_put_super, diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c index 43ac10e75a4a..c0156eda44bc 100644 --- a/fs/ufs/symlink.c +++ b/fs/ufs/symlink.c @@ -27,7 +27,8 @@ #include <linux/fs.h> #include <linux/namei.h> -#include <linux/ufs_fs.h> + +#include "ufs_fs.h" #include "ufs.h" diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 311ded34c2b2..41dd431ce228 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -36,7 +36,6 @@ #include <linux/errno.h> #include <linux/fs.h> -#include <linux/ufs_fs.h> #include <linux/fcntl.h> #include <linux/time.h> #include <linux/stat.h> @@ -46,6 +45,7 @@ #include <linux/blkdev.h> #include <linux/sched.h> +#include "ufs_fs.h" #include "ufs.h" #include "swab.h" #include "util.h" diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 7faa4cd71a27..fcb9231bb9ed 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -106,7 +106,7 @@ extern void ufs_free_inode (struct inode *inode); extern struct inode * ufs_new_inode (struct inode *, int); /* inode.c */ -extern void ufs_read_inode (struct inode *); +extern struct inode *ufs_iget(struct super_block *, unsigned long); extern void ufs_put_inode (struct inode *); extern int ufs_write_inode (struct inode *, int); extern int ufs_sync_inode (struct inode *); diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h new file mode 100644 index 000000000000..54bde1895a80 --- /dev/null +++ b/fs/ufs/ufs_fs.h @@ -0,0 +1,947 @@ +/* + * linux/include/linux/ufs_fs.h + * + * Copyright (C) 1996 + * Adrian Rodriguez (adrian@franklins-tower.rutgers.edu) + * Laboratory for Computer Science Research Computing Facility + * Rutgers, The State University of New Jersey + * + * Clean swab support by Fare <fare@tunes.org> + * just hope no one is using NNUUXXI on __?64 structure elements + * 64-bit clean thanks to Maciej W. Rozycki <macro@ds2.pg.gda.pl> + * + * 4.4BSD (FreeBSD) support added on February 1st 1998 by + * Niels Kristian Bech Jensen <nkbj@image.dk> partially based + * on code by Martin von Loewis <martin@mira.isdn.cs.tu-berlin.de>. + * + * NeXTstep support added on February 5th 1998 by + * Niels Kristian Bech Jensen <nkbj@image.dk>. + * + * Write support by Daniel Pirkl <daniel.pirkl@email.cz> + * + * HP/UX hfs filesystem support added by + * Martin K. Petersen <mkp@mkp.net>, August 1999 + * + * UFS2 (of FreeBSD 5.x) support added by + * Niraj Kumar <niraj17@iitbombay.org> , Jan 2004 + * + */ + +#ifndef __LINUX_UFS_FS_H +#define __LINUX_UFS_FS_H + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/stat.h> +#include <linux/fs.h> + +#include <asm/div64.h> +typedef __u64 __bitwise __fs64; +typedef __u32 __bitwise __fs32; +typedef __u16 __bitwise __fs16; + +#define UFS_BBLOCK 0 +#define UFS_BBSIZE 8192 +#define UFS_SBLOCK 8192 +#define UFS_SBSIZE 8192 + +#define UFS_SECTOR_SIZE 512 +#define UFS_SECTOR_BITS 9 +#define UFS_MAGIC 0x00011954 +#define UFS2_MAGIC 0x19540119 +#define UFS_CIGAM 0x54190100 /* byteswapped MAGIC */ + +/* Copied from FreeBSD */ +/* + * Each disk drive contains some number of filesystems. + * A filesystem consists of a number of cylinder groups. + * Each cylinder group has inodes and data. + * + * A filesystem is described by its super-block, which in turn + * describes the cylinder groups. The super-block is critical + * data and is replicated in each cylinder group to protect against + * catastrophic loss. This is done at `newfs' time and the critical + * super-block data does not change, so the copies need not be + * referenced further unless disaster strikes. + * + * For filesystem fs, the offsets of the various blocks of interest + * are given in the super block as: + * [fs->fs_sblkno] Super-block + * [fs->fs_cblkno] Cylinder group block + * [fs->fs_iblkno] Inode blocks + * [fs->fs_dblkno] Data blocks + * The beginning of cylinder group cg in fs, is given by + * the ``cgbase(fs, cg)'' macro. + * + * Depending on the architecture and the media, the superblock may + * reside in any one of four places. For tiny media where every block + * counts, it is placed at the very front of the partition. Historically, + * UFS1 placed it 8K from the front to leave room for the disk label and + * a small bootstrap. For UFS2 it got moved to 64K from the front to leave + * room for the disk label and a bigger bootstrap, and for really piggy + * systems we check at 256K from the front if the first three fail. In + * all cases the size of the superblock will be SBLOCKSIZE. All values are + * given in byte-offset form, so they do not imply a sector size. The + * SBLOCKSEARCH specifies the order in which the locations should be searched. + */ +#define SBLOCK_FLOPPY 0 +#define SBLOCK_UFS1 8192 +#define SBLOCK_UFS2 65536 +#define SBLOCK_PIGGY 262144 +#define SBLOCKSIZE 8192 +#define SBLOCKSEARCH \ + { SBLOCK_UFS2, SBLOCK_UFS1, SBLOCK_FLOPPY, SBLOCK_PIGGY, -1 } + + +/* HP specific MAGIC values */ + +#define UFS_MAGIC_LFN 0x00095014 /* fs supports filenames > 14 chars */ +#define UFS_CIGAM_LFN 0x14500900 /* srahc 41 < semanelif stroppus sf */ + +#define UFS_MAGIC_SEC 0x00612195 /* B1 security fs */ +#define UFS_CIGAM_SEC 0x95216100 + +#define UFS_MAGIC_FEA 0x00195612 /* fs_featurebits supported */ +#define UFS_CIGAM_FEA 0x12561900 + +#define UFS_MAGIC_4GB 0x05231994 /* fs > 4 GB && fs_featurebits */ +#define UFS_CIGAM_4GB 0x94192305 + +/* Seems somebody at HP goofed here. B1 and lfs are both 0x2 !?! */ +#define UFS_FSF_LFN 0x00000001 /* long file names */ +#define UFS_FSF_B1 0x00000002 /* B1 security */ +#define UFS_FSF_LFS 0x00000002 /* large files */ +#define UFS_FSF_LUID 0x00000004 /* large UIDs */ + +/* End of HP stuff */ + + +#define UFS_BSIZE 8192 +#define UFS_MINBSIZE 4096 +#define UFS_FSIZE 1024 +#define UFS_MAXFRAG (UFS_BSIZE / UFS_FSIZE) + +#define UFS_NDADDR 12 +#define UFS_NINDIR 3 + +#define UFS_IND_BLOCK (UFS_NDADDR + 0) +#define UFS_DIND_BLOCK (UFS_NDADDR + 1) +#define UFS_TIND_BLOCK (UFS_NDADDR + 2) + +#define UFS_NDIR_FRAGMENT (UFS_NDADDR << uspi->s_fpbshift) +#define UFS_IND_FRAGMENT (UFS_IND_BLOCK << uspi->s_fpbshift) +#define UFS_DIND_FRAGMENT (UFS_DIND_BLOCK << uspi->s_fpbshift) +#define UFS_TIND_FRAGMENT (UFS_TIND_BLOCK << uspi->s_fpbshift) + +#define UFS_ROOTINO 2 +#define UFS_FIRST_INO (UFS_ROOTINO + 1) + +#define UFS_USEEFT ((__u16)65535) + +#define UFS_FSOK 0x7c269d38 +#define UFS_FSACTIVE ((__s8)0x00) +#define UFS_FSCLEAN ((__s8)0x01) +#define UFS_FSSTABLE ((__s8)0x02) +#define UFS_FSOSF1 ((__s8)0x03) /* is this correct for DEC OSF/1? */ +#define UFS_FSBAD ((__s8)0xff) + +/* From here to next blank line, s_flags for ufs_sb_info */ +/* directory entry encoding */ +#define UFS_DE_MASK 0x00000010 /* mask for the following */ +#define UFS_DE_OLD 0x00000000 +#define UFS_DE_44BSD 0x00000010 +/* uid encoding */ +#define UFS_UID_MASK 0x00000060 /* mask for the following */ +#define UFS_UID_OLD 0x00000000 +#define UFS_UID_44BSD 0x00000020 +#define UFS_UID_EFT 0x00000040 +/* superblock state encoding */ +#define UFS_ST_MASK 0x00000700 /* mask for the following */ +#define UFS_ST_OLD 0x00000000 +#define UFS_ST_44BSD 0x00000100 +#define UFS_ST_SUN 0x00000200 /* Solaris */ +#define UFS_ST_SUNOS 0x00000300 +#define UFS_ST_SUNx86 0x00000400 /* Solaris x86 */ +/*cylinder group encoding */ +#define UFS_CG_MASK 0x00003000 /* mask for the following */ +#define UFS_CG_OLD 0x00000000 +#define UFS_CG_44BSD 0x00002000 +#define UFS_CG_SUN 0x00001000 +/* filesystem type encoding */ +#define UFS_TYPE_MASK 0x00010000 /* mask for the following */ +#define UFS_TYPE_UFS1 0x00000000 +#define UFS_TYPE_UFS2 0x00010000 + + +/* fs_inodefmt options */ +#define UFS_42INODEFMT -1 +#define UFS_44INODEFMT 2 + +/* + * MINFREE gives the minimum acceptable percentage of file system + * blocks which may be free. If the freelist drops below this level + * only the superuser may continue to allocate blocks. This may + * be set to 0 if no reserve of free blocks is deemed necessary, + * however throughput drops by fifty percent if the file system + * is run at between 95% and 100% full; thus the minimum default + * value of fs_minfree is 5%. However, to get good clustering + * performance, 10% is a better choice. hence we use 10% as our + * default value. With 10% free space, fragmentation is not a + * problem, so we choose to optimize for time. + */ +#define UFS_MINFREE 5 +#define UFS_DEFAULTOPT UFS_OPTTIME + +/* + * Turn file system block numbers into disk block addresses. + * This maps file system blocks to device size blocks. + */ +#define ufs_fsbtodb(uspi, b) ((b) << (uspi)->s_fsbtodb) +#define ufs_dbtofsb(uspi, b) ((b) >> (uspi)->s_fsbtodb) + +/* + * Cylinder group macros to locate things in cylinder groups. + * They calc file system addresses of cylinder group data structures. + */ +#define ufs_cgbase(c) (uspi->s_fpg * (c)) +#define ufs_cgstart(c) ((uspi)->fs_magic == UFS2_MAGIC ? ufs_cgbase(c) : \ + (ufs_cgbase(c) + uspi->s_cgoffset * ((c) & ~uspi->s_cgmask))) +#define ufs_cgsblock(c) (ufs_cgstart(c) + uspi->s_sblkno) /* super blk */ +#define ufs_cgcmin(c) (ufs_cgstart(c) + uspi->s_cblkno) /* cg block */ +#define ufs_cgimin(c) (ufs_cgstart(c) + uspi->s_iblkno) /* inode blk */ +#define ufs_cgdmin(c) (ufs_cgstart(c) + uspi->s_dblkno) /* 1st data */ + +/* + * Macros for handling inode numbers: + * inode number to file system block offset. + * inode number to cylinder group number. + * inode number to file system block address. + */ +#define ufs_inotocg(x) ((x) / uspi->s_ipg) +#define ufs_inotocgoff(x) ((x) % uspi->s_ipg) +#define ufs_inotofsba(x) (((u64)ufs_cgimin(ufs_inotocg(x))) + ufs_inotocgoff(x) / uspi->s_inopf) +#define ufs_inotofsbo(x) ((x) % uspi->s_inopf) + +/* + * Compute the cylinder and rotational position of a cyl block addr. + */ +#define ufs_cbtocylno(bno) \ + ((bno) * uspi->s_nspf / uspi->s_spc) +#define ufs_cbtorpos(bno) \ + ((((bno) * uspi->s_nspf % uspi->s_spc / uspi->s_nsect \ + * uspi->s_trackskew + (bno) * uspi->s_nspf % uspi->s_spc \ + % uspi->s_nsect * uspi->s_interleave) % uspi->s_nsect \ + * uspi->s_nrpos) / uspi->s_npsect) + +/* + * The following macros optimize certain frequently calculated + * quantities by using shifts and masks in place of divisions + * modulos and multiplications. + */ +#define ufs_blkoff(loc) ((loc) & uspi->s_qbmask) +#define ufs_fragoff(loc) ((loc) & uspi->s_qfmask) +#define ufs_lblktosize(blk) ((blk) << uspi->s_bshift) +#define ufs_lblkno(loc) ((loc) >> uspi->s_bshift) +#define ufs_numfrags(loc) ((loc) >> uspi->s_fshift) +#define ufs_blkroundup(size) (((size) + uspi->s_qbmask) & uspi->s_bmask) +#define ufs_fragroundup(size) (((size) + uspi->s_qfmask) & uspi->s_fmask) +#define ufs_fragstoblks(frags) ((frags) >> uspi->s_fpbshift) +#define ufs_blkstofrags(blks) ((blks) << uspi->s_fpbshift) +#define ufs_fragnum(fsb) ((fsb) & uspi->s_fpbmask) +#define ufs_blknum(fsb) ((fsb) & ~uspi->s_fpbmask) + +#define UFS_MAXNAMLEN 255 +#define UFS_MAXMNTLEN 512 +#define UFS2_MAXMNTLEN 468 +#define UFS2_MAXVOLLEN 32 +#define UFS_MAXCSBUFS 31 +#define UFS_LINK_MAX 32000 +/* +#define UFS2_NOCSPTRS ((128 / sizeof(void *)) - 4) +*/ +#define UFS2_NOCSPTRS 28 + +/* + * UFS_DIR_PAD defines the directory entries boundaries + * (must be a multiple of 4) + */ +#define UFS_DIR_PAD 4 +#define UFS_DIR_ROUND (UFS_DIR_PAD - 1) +#define UFS_DIR_REC_LEN(name_len) (((name_len) + 1 + 8 + UFS_DIR_ROUND) & ~UFS_DIR_ROUND) + +struct ufs_timeval { + __fs32 tv_sec; + __fs32 tv_usec; +}; + +struct ufs_dir_entry { + __fs32 d_ino; /* inode number of this entry */ + __fs16 d_reclen; /* length of this entry */ + union { + __fs16 d_namlen; /* actual length of d_name */ + struct { + __u8 d_type; /* file type */ + __u8 d_namlen; /* length of string in d_name */ + } d_44; + } d_u; + __u8 d_name[UFS_MAXNAMLEN + 1]; /* file name */ +}; + +struct ufs_csum { + __fs32 cs_ndir; /* number of directories */ + __fs32 cs_nbfree; /* number of free blocks */ + __fs32 cs_nifree; /* number of free inodes */ + __fs32 cs_nffree; /* number of free frags */ +}; +struct ufs2_csum_total { + __fs64 cs_ndir; /* number of directories */ + __fs64 cs_nbfree; /* number of free blocks */ + __fs64 cs_nifree; /* number of free inodes */ + __fs64 cs_nffree; /* number of free frags */ + __fs64 cs_numclusters; /* number of free clusters */ + __fs64 cs_spare[3]; /* future expansion */ +}; + +struct ufs_csum_core { + __u64 cs_ndir; /* number of directories */ + __u64 cs_nbfree; /* number of free blocks */ + __u64 cs_nifree; /* number of free inodes */ + __u64 cs_nffree; /* number of free frags */ + __u64 cs_numclusters; /* number of free clusters */ +}; + +/* + * File system flags + */ +#define UFS_UNCLEAN 0x01 /* file system not clean at mount (unused) */ +#define UFS_DOSOFTDEP 0x02 /* file system using soft dependencies */ +#define UFS_NEEDSFSCK 0x04 /* needs sync fsck (FreeBSD compat, unused) */ +#define UFS_INDEXDIRS 0x08 /* kernel supports indexed directories */ +#define UFS_ACLS 0x10 /* file system has ACLs enabled */ +#define UFS_MULTILABEL 0x20 /* file system is MAC multi-label */ +#define UFS_FLAGS_UPDATED 0x80 /* flags have been moved to new location */ + +#if 0 +/* + * This is the actual superblock, as it is laid out on the disk. + * Do NOT use this structure, because of sizeof(ufs_super_block) > 512 and + * it may occupy several blocks, use + * struct ufs_super_block_(first,second,third) instead. + */ +struct ufs_super_block { + union { + struct { + __fs32 fs_link; /* UNUSED */ + } fs_42; + struct { + __fs32 fs_state; /* file system state flag */ + } fs_sun; + } fs_u0; + __fs32 fs_rlink; /* UNUSED */ + __fs32 fs_sblkno; /* addr of super-block in filesys */ + __fs32 fs_cblkno; /* offset of cyl-block in filesys */ + __fs32 fs_iblkno; /* offset of inode-blocks in filesys */ + __fs32 fs_dblkno; /* offset of first data after cg */ + __fs32 fs_cgoffset; /* cylinder group offset in cylinder */ + __fs32 fs_cgmask; /* used to calc mod fs_ntrak */ + __fs32 fs_time; /* last time written -- time_t */ + __fs32 fs_size; /* number of blocks in fs */ + __fs32 fs_dsize; /* number of data blocks in fs */ + __fs32 fs_ncg; /* number of cylinder groups */ + __fs32 fs_bsize; /* size of basic blocks in fs */ + __fs32 fs_fsize; /* size of frag blocks in fs */ + __fs32 fs_frag; /* number of frags in a block in fs */ +/* these are configuration parameters */ + __fs32 fs_minfree; /* minimum percentage of free blocks */ + __fs32 fs_rotdelay; /* num of ms for optimal next block */ + __fs32 fs_rps; /* disk revolutions per second */ +/* these fields can be computed from the others */ + __fs32 fs_bmask; /* ``blkoff'' calc of blk offsets */ + __fs32 fs_fmask; /* ``fragoff'' calc of frag offsets */ + __fs32 fs_bshift; /* ``lblkno'' calc of logical blkno */ + __fs32 fs_fshift; /* ``numfrags'' calc number of frags */ +/* these are configuration parameters */ + __fs32 fs_maxcontig; /* max number of contiguous blks */ + __fs32 fs_maxbpg; /* max number of blks per cyl group */ +/* these fields can be computed from the others */ + __fs32 fs_fragshift; /* block to frag shift */ + __fs32 fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ + __fs32 fs_sbsize; /* actual size of super block */ + __fs32 fs_csmask; /* csum block offset */ + __fs32 fs_csshift; /* csum block number */ + __fs32 fs_nindir; /* value of NINDIR */ + __fs32 fs_inopb; /* value of INOPB */ + __fs32 fs_nspf; /* value of NSPF */ +/* yet another configuration parameter */ + __fs32 fs_optim; /* optimization preference, see below */ +/* these fields are derived from the hardware */ + union { + struct { + __fs32 fs_npsect; /* # sectors/track including spares */ + } fs_sun; + struct { + __fs32 fs_state; /* file system state time stamp */ + } fs_sunx86; + } fs_u1; + __fs32 fs_interleave; /* hardware sector interleave */ + __fs32 fs_trackskew; /* sector 0 skew, per track */ +/* a unique id for this filesystem (currently unused and unmaintained) */ +/* In 4.3 Tahoe this space is used by fs_headswitch and fs_trkseek */ +/* Neither of those fields is used in the Tahoe code right now but */ +/* there could be problems if they are. */ + __fs32 fs_id[2]; /* file system id */ +/* sizes determined by number of cylinder groups and their sizes */ + __fs32 fs_csaddr; /* blk addr of cyl grp summary area */ + __fs32 fs_cssize; /* size of cyl grp summary area */ + __fs32 fs_cgsize; /* cylinder group size */ +/* these fields are derived from the hardware */ + __fs32 fs_ntrak; /* tracks per cylinder */ + __fs32 fs_nsect; /* sectors per track */ + __fs32 fs_spc; /* sectors per cylinder */ +/* this comes from the disk driver partitioning */ + __fs32 fs_ncyl; /* cylinders in file system */ +/* these fields can be computed from the others */ + __fs32 fs_cpg; /* cylinders per group */ + __fs32 fs_ipg; /* inodes per cylinder group */ + __fs32 fs_fpg; /* blocks per group * fs_frag */ +/* this data must be re-computed after crashes */ + struct ufs_csum fs_cstotal; /* cylinder summary information */ +/* these fields are cleared at mount time */ + __s8 fs_fmod; /* super block modified flag */ + __s8 fs_clean; /* file system is clean flag */ + __s8 fs_ronly; /* mounted read-only flag */ + __s8 fs_flags; + union { + struct { + __s8 fs_fsmnt[UFS_MAXMNTLEN];/* name mounted on */ + __fs32 fs_cgrotor; /* last cg searched */ + __fs32 fs_csp[UFS_MAXCSBUFS];/*list of fs_cs info buffers */ + __fs32 fs_maxcluster; + __fs32 fs_cpc; /* cyl per cycle in postbl */ + __fs16 fs_opostbl[16][8]; /* old rotation block list head */ + } fs_u1; + struct { + __s8 fs_fsmnt[UFS2_MAXMNTLEN]; /* name mounted on */ + __u8 fs_volname[UFS2_MAXVOLLEN]; /* volume name */ + __fs64 fs_swuid; /* system-wide uid */ + __fs32 fs_pad; /* due to alignment of fs_swuid */ + __fs32 fs_cgrotor; /* last cg searched */ + __fs32 fs_ocsp[UFS2_NOCSPTRS]; /*list of fs_cs info buffers */ + __fs32 fs_contigdirs;/*# of contiguously allocated dirs */ + __fs32 fs_csp; /* cg summary info buffer for fs_cs */ + __fs32 fs_maxcluster; + __fs32 fs_active;/* used by snapshots to track fs */ + __fs32 fs_old_cpc; /* cyl per cycle in postbl */ + __fs32 fs_maxbsize;/*maximum blocking factor permitted */ + __fs64 fs_sparecon64[17];/*old rotation block list head */ + __fs64 fs_sblockloc; /* byte offset of standard superblock */ + struct ufs2_csum_total fs_cstotal;/*cylinder summary information*/ + struct ufs_timeval fs_time; /* last time written */ + __fs64 fs_size; /* number of blocks in fs */ + __fs64 fs_dsize; /* number of data blocks in fs */ + __fs64 fs_csaddr; /* blk addr of cyl grp summary area */ + __fs64 fs_pendingblocks;/* blocks in process of being freed */ + __fs32 fs_pendinginodes;/*inodes in process of being freed */ + } fs_u2; + } fs_u11; + union { + struct { + __fs32 fs_sparecon[53];/* reserved for future constants */ + __fs32 fs_reclaim; + __fs32 fs_sparecon2[1]; + __fs32 fs_state; /* file system state time stamp */ + __fs32 fs_qbmask[2]; /* ~usb_bmask */ + __fs32 fs_qfmask[2]; /* ~usb_fmask */ + } fs_sun; + struct { + __fs32 fs_sparecon[53];/* reserved for future constants */ + __fs32 fs_reclaim; + __fs32 fs_sparecon2[1]; + __fs32 fs_npsect; /* # sectors/track including spares */ + __fs32 fs_qbmask[2]; /* ~usb_bmask */ + __fs32 fs_qfmask[2]; /* ~usb_fmask */ + } fs_sunx86; + struct { + __fs32 fs_sparecon[50];/* reserved for future constants */ + __fs32 fs_contigsumsize;/* size of cluster summary array */ + __fs32 fs_maxsymlinklen;/* max length of an internal symlink */ + __fs32 fs_inodefmt; /* format of on-disk inodes */ + __fs32 fs_maxfilesize[2]; /* max representable file size */ + __fs32 fs_qbmask[2]; /* ~usb_bmask */ + __fs32 fs_qfmask[2]; /* ~usb_fmask */ + __fs32 fs_state; /* file system state time stamp */ + } fs_44; + } fs_u2; + __fs32 fs_postblformat; /* format of positional layout tables */ + __fs32 fs_nrpos; /* number of rotational positions */ + __fs32 fs_postbloff; /* (__s16) rotation block list head */ + __fs32 fs_rotbloff; /* (__u8) blocks for each rotation */ + __fs32 fs_magic; /* magic number */ + __u8 fs_space[1]; /* list of blocks for each rotation */ +}; +#endif/*struct ufs_super_block*/ + +/* + * Preference for optimization. + */ +#define UFS_OPTTIME 0 /* minimize allocation time */ +#define UFS_OPTSPACE 1 /* minimize disk fragmentation */ + +/* + * Rotational layout table format types + */ +#define UFS_42POSTBLFMT -1 /* 4.2BSD rotational table format */ +#define UFS_DYNAMICPOSTBLFMT 1 /* dynamic rotational table format */ + +/* + * Convert cylinder group to base address of its global summary info. + */ +#define fs_cs(indx) s_csp[(indx)] + +/* + * Cylinder group block for a file system. + * + * Writable fields in the cylinder group are protected by the associated + * super block lock fs->fs_lock. + */ +#define CG_MAGIC 0x090255 +#define ufs_cg_chkmagic(sb, ucg) \ + (fs32_to_cpu((sb), (ucg)->cg_magic) == CG_MAGIC) +/* + * Macros for access to old cylinder group array structures + */ +#define ufs_ocg_blktot(sb, ucg) fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_btot) +#define ufs_ocg_blks(sb, ucg, cylno) fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_b[cylno]) +#define ufs_ocg_inosused(sb, ucg) fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_iused) +#define ufs_ocg_blksfree(sb, ucg) fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_free) +#define ufs_ocg_chkmagic(sb, ucg) \ + (fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_magic) == CG_MAGIC) + +/* + * size of this structure is 172 B + */ +struct ufs_cylinder_group { + __fs32 cg_link; /* linked list of cyl groups */ + __fs32 cg_magic; /* magic number */ + __fs32 cg_time; /* time last written */ + __fs32 cg_cgx; /* we are the cgx'th cylinder group */ + __fs16 cg_ncyl; /* number of cyl's this cg */ + __fs16 cg_niblk; /* number of inode blocks this cg */ + __fs32 cg_ndblk; /* number of data blocks this cg */ + struct ufs_csum cg_cs; /* cylinder summary information */ + __fs32 cg_rotor; /* position of last used block */ + __fs32 cg_frotor; /* position of last used frag */ + __fs32 cg_irotor; /* position of last used inode */ + __fs32 cg_frsum[UFS_MAXFRAG]; /* counts of available frags */ + __fs32 cg_btotoff; /* (__u32) block totals per cylinder */ + __fs32 cg_boff; /* (short) free block positions */ + __fs32 cg_iusedoff; /* (char) used inode map */ + __fs32 cg_freeoff; /* (u_char) free block map */ + __fs32 cg_nextfreeoff; /* (u_char) next available space */ + union { + struct { + __fs32 cg_clustersumoff; /* (u_int32) counts of avail clusters */ + __fs32 cg_clusteroff; /* (u_int8) free cluster map */ + __fs32 cg_nclusterblks; /* number of clusters this cg */ + __fs32 cg_sparecon[13]; /* reserved for future use */ + } cg_44; + struct { + __fs32 cg_clustersumoff;/* (u_int32) counts of avail clusters */ + __fs32 cg_clusteroff; /* (u_int8) free cluster map */ + __fs32 cg_nclusterblks;/* number of clusters this cg */ + __fs32 cg_niblk; /* number of inode blocks this cg */ + __fs32 cg_initediblk; /* last initialized inode */ + __fs32 cg_sparecon32[3];/* reserved for future use */ + __fs64 cg_time; /* time last written */ + __fs64 cg_sparecon[3]; /* reserved for future use */ + } cg_u2; + __fs32 cg_sparecon[16]; /* reserved for future use */ + } cg_u; + __u8 cg_space[1]; /* space for cylinder group maps */ +/* actually longer */ +}; + +/* Historic Cylinder group info */ +struct ufs_old_cylinder_group { + __fs32 cg_link; /* linked list of cyl groups */ + __fs32 cg_rlink; /* for incore cyl groups */ + __fs32 cg_time; /* time last written */ + __fs32 cg_cgx; /* we are the cgx'th cylinder group */ + __fs16 cg_ncyl; /* number of cyl's this cg */ + __fs16 cg_niblk; /* number of inode blocks this cg */ + __fs32 cg_ndblk; /* number of data blocks this cg */ + struct ufs_csum cg_cs; /* cylinder summary information */ + __fs32 cg_rotor; /* position of last used block */ + __fs32 cg_frotor; /* position of last used frag */ + __fs32 cg_irotor; /* position of last used inode */ + __fs32 cg_frsum[8]; /* counts of available frags */ + __fs32 cg_btot[32]; /* block totals per cylinder */ + __fs16 cg_b[32][8]; /* positions of free blocks */ + __u8 cg_iused[256]; /* used inode map */ + __fs32 cg_magic; /* magic number */ + __u8 cg_free[1]; /* free block map */ +/* actually longer */ +}; + +/* + * structure of an on-disk inode + */ +struct ufs_inode { + __fs16 ui_mode; /* 0x0 */ + __fs16 ui_nlink; /* 0x2 */ + union { + struct { + __fs16 ui_suid; /* 0x4 */ + __fs16 ui_sgid; /* 0x6 */ + } oldids; + __fs32 ui_inumber; /* 0x4 lsf: inode number */ + __fs32 ui_author; /* 0x4 GNU HURD: author */ + } ui_u1; + __fs64 ui_size; /* 0x8 */ + struct ufs_timeval ui_atime; /* 0x10 access */ + struct ufs_timeval ui_mtime; /* 0x18 modification */ + struct ufs_timeval ui_ctime; /* 0x20 creation */ + union { + struct { + __fs32 ui_db[UFS_NDADDR];/* 0x28 data blocks */ + __fs32 ui_ib[UFS_NINDIR];/* 0x58 indirect blocks */ + } ui_addr; + __u8 ui_symlink[4*(UFS_NDADDR+UFS_NINDIR)];/* 0x28 fast symlink */ + } ui_u2; + __fs32 ui_flags; /* 0x64 immutable, append-only... */ + __fs32 ui_blocks; /* 0x68 blocks in use */ + __fs32 ui_gen; /* 0x6c like ext2 i_version, for NFS support */ + union { + struct { + __fs32 ui_shadow; /* 0x70 shadow inode with security data */ + __fs32 ui_uid; /* 0x74 long EFT version of uid */ + __fs32 ui_gid; /* 0x78 long EFT version of gid */ + __fs32 ui_oeftflag; /* 0x7c reserved */ + } ui_sun; + struct { + __fs32 ui_uid; /* 0x70 File owner */ + __fs32 ui_gid; /* 0x74 File group */ + __fs32 ui_spare[2]; /* 0x78 reserved */ + } ui_44; + struct { + __fs32 ui_uid; /* 0x70 */ + __fs32 ui_gid; /* 0x74 */ + __fs16 ui_modeh; /* 0x78 mode high bits */ + __fs16 ui_spare; /* 0x7A unused */ + __fs32 ui_trans; /* 0x7c filesystem translator */ + } ui_hurd; + } ui_u3; +}; + +#define UFS_NXADDR 2 /* External addresses in inode. */ +struct ufs2_inode { + __fs16 ui_mode; /* 0: IFMT, permissions; see below. */ + __fs16 ui_nlink; /* 2: File link count. */ + __fs32 ui_uid; /* 4: File owner. */ + __fs32 ui_gid; /* 8: File group. */ + __fs32 ui_blksize; /* 12: Inode blocksize. */ + __fs64 ui_size; /* 16: File byte count. */ + __fs64 ui_blocks; /* 24: Bytes actually held. */ + __fs64 ui_atime; /* 32: Last access time. */ + __fs64 ui_mtime; /* 40: Last modified time. */ + __fs64 ui_ctime; /* 48: Last inode change time. */ + __fs64 ui_birthtime; /* 56: Inode creation time. */ + __fs32 ui_mtimensec; /* 64: Last modified time. */ + __fs32 ui_atimensec; /* 68: Last access time. */ + __fs32 ui_ctimensec; /* 72: Last inode change time. */ + __fs32 ui_birthnsec; /* 76: Inode creation time. */ + __fs32 ui_gen; /* 80: Generation number. */ + __fs32 ui_kernflags; /* 84: Kernel flags. */ + __fs32 ui_flags; /* 88: Status flags (chflags). */ + __fs32 ui_extsize; /* 92: External attributes block. */ + __fs64 ui_extb[UFS_NXADDR];/* 96: External attributes block. */ + union { + struct { + __fs64 ui_db[UFS_NDADDR]; /* 112: Direct disk blocks. */ + __fs64 ui_ib[UFS_NINDIR];/* 208: Indirect disk blocks.*/ + } ui_addr; + __u8 ui_symlink[2*4*(UFS_NDADDR+UFS_NINDIR)];/* 0x28 fast symlink */ + } ui_u2; + __fs64 ui_spare[3]; /* 232: Reserved; currently unused */ +}; + + +/* FreeBSD has these in sys/stat.h */ +/* ui_flags that can be set by a file owner */ +#define UFS_UF_SETTABLE 0x0000ffff +#define UFS_UF_NODUMP 0x00000001 /* do not dump */ +#define UFS_UF_IMMUTABLE 0x00000002 /* immutable (can't "change") */ +#define UFS_UF_APPEND 0x00000004 /* append-only */ +#define UFS_UF_OPAQUE 0x00000008 /* directory is opaque (unionfs) */ +#define UFS_UF_NOUNLINK 0x00000010 /* can't be removed or renamed */ +/* ui_flags that only root can set */ +#define UFS_SF_SETTABLE 0xffff0000 +#define UFS_SF_ARCHIVED 0x00010000 /* archived */ +#define UFS_SF_IMMUTABLE 0x00020000 /* immutable (can't "change") */ +#define UFS_SF_APPEND 0x00040000 /* append-only */ +#define UFS_SF_NOUNLINK 0x00100000 /* can't be removed or renamed */ + +/* + * This structure is used for reading disk structures larger + * than the size of fragment. + */ +struct ufs_buffer_head { + __u64 fragment; /* first fragment */ + __u64 count; /* number of fragments */ + struct buffer_head * bh[UFS_MAXFRAG]; /* buffers */ +}; + +struct ufs_cg_private_info { + struct ufs_buffer_head c_ubh; + __u32 c_cgx; /* number of cylidner group */ + __u16 c_ncyl; /* number of cyl's this cg */ + __u16 c_niblk; /* number of inode blocks this cg */ + __u32 c_ndblk; /* number of data blocks this cg */ + __u32 c_rotor; /* position of last used block */ + __u32 c_frotor; /* position of last used frag */ + __u32 c_irotor; /* position of last used inode */ + __u32 c_btotoff; /* (__u32) block totals per cylinder */ + __u32 c_boff; /* (short) free block positions */ + __u32 c_iusedoff; /* (char) used inode map */ + __u32 c_freeoff; /* (u_char) free block map */ + __u32 c_nextfreeoff; /* (u_char) next available space */ + __u32 c_clustersumoff;/* (u_int32) counts of avail clusters */ + __u32 c_clusteroff; /* (u_int8) free cluster map */ + __u32 c_nclusterblks; /* number of clusters this cg */ +}; + + +struct ufs_sb_private_info { + struct ufs_buffer_head s_ubh; /* buffer containing super block */ + struct ufs_csum_core cs_total; + __u32 s_sblkno; /* offset of super-blocks in filesys */ + __u32 s_cblkno; /* offset of cg-block in filesys */ + __u32 s_iblkno; /* offset of inode-blocks in filesys */ + __u32 s_dblkno; /* offset of first data after cg */ + __u32 s_cgoffset; /* cylinder group offset in cylinder */ + __u32 s_cgmask; /* used to calc mod fs_ntrak */ + __u32 s_size; /* number of blocks (fragments) in fs */ + __u32 s_dsize; /* number of data blocks in fs */ + __u64 s_u2_size; /* ufs2: number of blocks (fragments) in fs */ + __u64 s_u2_dsize; /*ufs2: number of data blocks in fs */ + __u32 s_ncg; /* number of cylinder groups */ + __u32 s_bsize; /* size of basic blocks */ + __u32 s_fsize; /* size of fragments */ + __u32 s_fpb; /* fragments per block */ + __u32 s_minfree; /* minimum percentage of free blocks */ + __u32 s_bmask; /* `blkoff'' calc of blk offsets */ + __u32 s_fmask; /* s_fsize mask */ + __u32 s_bshift; /* `lblkno'' calc of logical blkno */ + __u32 s_fshift; /* s_fsize shift */ + __u32 s_fpbshift; /* fragments per block shift */ + __u32 s_fsbtodb; /* fsbtodb and dbtofsb shift constant */ + __u32 s_sbsize; /* actual size of super block */ + __u32 s_csmask; /* csum block offset */ + __u32 s_csshift; /* csum block number */ + __u32 s_nindir; /* value of NINDIR */ + __u32 s_inopb; /* value of INOPB */ + __u32 s_nspf; /* value of NSPF */ + __u32 s_npsect; /* # sectors/track including spares */ + __u32 s_interleave; /* hardware sector interleave */ + __u32 s_trackskew; /* sector 0 skew, per track */ + __u64 s_csaddr; /* blk addr of cyl grp summary area */ + __u32 s_cssize; /* size of cyl grp summary area */ + __u32 s_cgsize; /* cylinder group size */ + __u32 s_ntrak; /* tracks per cylinder */ + __u32 s_nsect; /* sectors per track */ + __u32 s_spc; /* sectors per cylinder */ + __u32 s_ipg; /* inodes per cylinder group */ + __u32 s_fpg; /* fragments per group */ + __u32 s_cpc; /* cyl per cycle in postbl */ + __s32 s_contigsumsize;/* size of cluster summary array, 44bsd */ + __s64 s_qbmask; /* ~usb_bmask */ + __s64 s_qfmask; /* ~usb_fmask */ + __s32 s_postblformat; /* format of positional layout tables */ + __s32 s_nrpos; /* number of rotational positions */ + __s32 s_postbloff; /* (__s16) rotation block list head */ + __s32 s_rotbloff; /* (__u8) blocks for each rotation */ + + __u32 s_fpbmask; /* fragments per block mask */ + __u32 s_apb; /* address per block */ + __u32 s_2apb; /* address per block^2 */ + __u32 s_3apb; /* address per block^3 */ + __u32 s_apbmask; /* address per block mask */ + __u32 s_apbshift; /* address per block shift */ + __u32 s_2apbshift; /* address per block shift * 2 */ + __u32 s_3apbshift; /* address per block shift * 3 */ + __u32 s_nspfshift; /* number of sector per fragment shift */ + __u32 s_nspb; /* number of sector per block */ + __u32 s_inopf; /* inodes per fragment */ + __u32 s_sbbase; /* offset of NeXTstep superblock */ + __u32 s_bpf; /* bits per fragment */ + __u32 s_bpfshift; /* bits per fragment shift*/ + __u32 s_bpfmask; /* bits per fragment mask */ + + __u32 s_maxsymlinklen;/* upper limit on fast symlinks' size */ + __s32 fs_magic; /* filesystem magic */ + unsigned int s_dirblksize; +}; + +/* + * Sizes of this structures are: + * ufs_super_block_first 512 + * ufs_super_block_second 512 + * ufs_super_block_third 356 + */ +struct ufs_super_block_first { + union { + struct { + __fs32 fs_link; /* UNUSED */ + } fs_42; + struct { + __fs32 fs_state; /* file system state flag */ + } fs_sun; + } fs_u0; + __fs32 fs_rlink; + __fs32 fs_sblkno; + __fs32 fs_cblkno; + __fs32 fs_iblkno; + __fs32 fs_dblkno; + __fs32 fs_cgoffset; + __fs32 fs_cgmask; + __fs32 fs_time; + __fs32 fs_size; + __fs32 fs_dsize; + __fs32 fs_ncg; + __fs32 fs_bsize; + __fs32 fs_fsize; + __fs32 fs_frag; + __fs32 fs_minfree; + __fs32 fs_rotdelay; + __fs32 fs_rps; + __fs32 fs_bmask; + __fs32 fs_fmask; + __fs32 fs_bshift; + __fs32 fs_fshift; + __fs32 fs_maxcontig; + __fs32 fs_maxbpg; + __fs32 fs_fragshift; + __fs32 fs_fsbtodb; + __fs32 fs_sbsize; + __fs32 fs_csmask; + __fs32 fs_csshift; + __fs32 fs_nindir; + __fs32 fs_inopb; + __fs32 fs_nspf; + __fs32 fs_optim; + union { + struct { + __fs32 fs_npsect; + } fs_sun; + struct { + __fs32 fs_state; + } fs_sunx86; + } fs_u1; + __fs32 fs_interleave; + __fs32 fs_trackskew; + __fs32 fs_id[2]; + __fs32 fs_csaddr; + __fs32 fs_cssize; + __fs32 fs_cgsize; + __fs32 fs_ntrak; + __fs32 fs_nsect; + __fs32 fs_spc; + __fs32 fs_ncyl; + __fs32 fs_cpg; + __fs32 fs_ipg; + __fs32 fs_fpg; + struct ufs_csum fs_cstotal; + __s8 fs_fmod; + __s8 fs_clean; + __s8 fs_ronly; + __s8 fs_flags; + __s8 fs_fsmnt[UFS_MAXMNTLEN - 212]; + +}; + +struct ufs_super_block_second { + union { + struct { + __s8 fs_fsmnt[212]; + __fs32 fs_cgrotor; + __fs32 fs_csp[UFS_MAXCSBUFS]; + __fs32 fs_maxcluster; + __fs32 fs_cpc; + __fs16 fs_opostbl[82]; + } fs_u1; + struct { + __s8 fs_fsmnt[UFS2_MAXMNTLEN - UFS_MAXMNTLEN + 212]; + __u8 fs_volname[UFS2_MAXVOLLEN]; + __fs64 fs_swuid; + __fs32 fs_pad; + __fs32 fs_cgrotor; + __fs32 fs_ocsp[UFS2_NOCSPTRS]; + __fs32 fs_contigdirs; + __fs32 fs_csp; + __fs32 fs_maxcluster; + __fs32 fs_active; + __fs32 fs_old_cpc; + __fs32 fs_maxbsize; + __fs64 fs_sparecon64[17]; + __fs64 fs_sblockloc; + __fs64 cs_ndir; + __fs64 cs_nbfree; + } fs_u2; + } fs_un; +}; + +struct ufs_super_block_third { + union { + struct { + __fs16 fs_opostbl[46]; + } fs_u1; + struct { + __fs64 cs_nifree; /* number of free inodes */ + __fs64 cs_nffree; /* number of free frags */ + __fs64 cs_numclusters; /* number of free clusters */ + __fs64 cs_spare[3]; /* future expansion */ + struct ufs_timeval fs_time; /* last time written */ + __fs64 fs_size; /* number of blocks in fs */ + __fs64 fs_dsize; /* number of data blocks in fs */ + __fs64 fs_csaddr; /* blk addr of cyl grp summary area */ + __fs64 fs_pendingblocks;/* blocks in process of being freed */ + __fs32 fs_pendinginodes;/*inodes in process of being freed */ + } __attribute__ ((packed)) fs_u2; + } fs_un1; + union { + struct { + __fs32 fs_sparecon[53];/* reserved for future constants */ + __fs32 fs_reclaim; + __fs32 fs_sparecon2[1]; + __fs32 fs_state; /* file system state time stamp */ + __fs32 fs_qbmask[2]; /* ~usb_bmask */ + __fs32 fs_qfmask[2]; /* ~usb_fmask */ + } fs_sun; + struct { + __fs32 fs_sparecon[53];/* reserved for future constants */ + __fs32 fs_reclaim; + __fs32 fs_sparecon2[1]; + __fs32 fs_npsect; /* # sectors/track including spares */ + __fs32 fs_qbmask[2]; /* ~usb_bmask */ + __fs32 fs_qfmask[2]; /* ~usb_fmask */ + } fs_sunx86; + struct { + __fs32 fs_sparecon[50];/* reserved for future constants */ + __fs32 fs_contigsumsize;/* size of cluster summary array */ + __fs32 fs_maxsymlinklen;/* max length of an internal symlink */ + __fs32 fs_inodefmt; /* format of on-disk inodes */ + __fs32 fs_maxfilesize[2]; /* max representable file size */ + __fs32 fs_qbmask[2]; /* ~usb_bmask */ + __fs32 fs_qfmask[2]; /* ~usb_fmask */ + __fs32 fs_state; /* file system state time stamp */ + } fs_44; + } fs_un2; + __fs32 fs_postblformat; + __fs32 fs_nrpos; + __fs32 fs_postbloff; + __fs32 fs_rotbloff; + __fs32 fs_magic; + __u8 fs_space[1]; +}; + +#endif /* __LINUX_UFS_FS_H */ diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 410084dae389..85a7fc9e4a4e 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -8,9 +8,9 @@ #include <linux/string.h> #include <linux/slab.h> -#include <linux/ufs_fs.h> #include <linux/buffer_head.h> +#include "ufs_fs.h" #include "ufs.h" #include "swab.h" #include "util.h" diff --git a/fs/utimes.c b/fs/utimes.c index b9912ecbee24..b18da9c0b97f 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/stat.h> #include <linux/utime.h> +#include <linux/syscalls.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -83,7 +84,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags if (error) goto out; - dentry = nd.dentry; + dentry = nd.path.dentry; } inode = dentry->d_inode; @@ -137,7 +138,7 @@ dput_and_out: if (f) fput(f); else - path_release(&nd); + path_put(&nd.path); out: return error; } diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index c28add2fbe95..cd450bea9f1a 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -705,7 +705,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, brelse(sinfo.bh); if (IS_ERR(inode)) { unlock_kernel(); - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); } alias = d_find_alias(inode); if (alias) { diff --git a/fs/xattr.c b/fs/xattr.c index f7c8f87bb390..3acab1615460 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -262,8 +262,8 @@ sys_setxattr(char __user *path, char __user *name, void __user *value, error = user_path_walk(path, &nd); if (error) return error; - error = setxattr(nd.dentry, name, value, size, flags); - path_release(&nd); + error = setxattr(nd.path.dentry, name, value, size, flags); + path_put(&nd.path); return error; } @@ -277,8 +277,8 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value, error = user_path_walk_link(path, &nd); if (error) return error; - error = setxattr(nd.dentry, name, value, size, flags); - path_release(&nd); + error = setxattr(nd.path.dentry, name, value, size, flags); + path_put(&nd.path); return error; } @@ -347,8 +347,8 @@ sys_getxattr(char __user *path, char __user *name, void __user *value, error = user_path_walk(path, &nd); if (error) return error; - error = getxattr(nd.dentry, name, value, size); - path_release(&nd); + error = getxattr(nd.path.dentry, name, value, size); + path_put(&nd.path); return error; } @@ -362,8 +362,8 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value, error = user_path_walk_link(path, &nd); if (error) return error; - error = getxattr(nd.dentry, name, value, size); - path_release(&nd); + error = getxattr(nd.path.dentry, name, value, size); + path_put(&nd.path); return error; } @@ -421,8 +421,8 @@ sys_listxattr(char __user *path, char __user *list, size_t size) error = user_path_walk(path, &nd); if (error) return error; - error = listxattr(nd.dentry, list, size); - path_release(&nd); + error = listxattr(nd.path.dentry, list, size); + path_put(&nd.path); return error; } @@ -435,8 +435,8 @@ sys_llistxattr(char __user *path, char __user *list, size_t size) error = user_path_walk_link(path, &nd); if (error) return error; - error = listxattr(nd.dentry, list, size); - path_release(&nd); + error = listxattr(nd.path.dentry, list, size); + path_put(&nd.path); return error; } @@ -482,8 +482,8 @@ sys_removexattr(char __user *path, char __user *name) error = user_path_walk(path, &nd); if (error) return error; - error = removexattr(nd.dentry, name); - path_release(&nd); + error = removexattr(nd.path.dentry, name); + path_put(&nd.path); return error; } @@ -496,8 +496,8 @@ sys_lremovexattr(char __user *path, char __user *name) error = user_path_walk_link(path, &nd); if (error) return error; - error = removexattr(nd.dentry, name); - path_release(&nd); + error = removexattr(nd.path.dentry, name); + path_put(&nd.path); return error; } diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 index d1491aa7a0e2..97316451fc6d 100644 --- a/fs/xfs/Makefile-linux-2.6 +++ b/fs/xfs/Makefile-linux-2.6 @@ -70,7 +70,6 @@ xfs-y += xfs_alloc.o \ xfs_iget.o \ xfs_inode.o \ xfs_inode_item.o \ - xfs_iocore.o \ xfs_iomap.o \ xfs_itable.o \ xfs_dfrag.o \ diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h deleted file mode 100644 index 50a6191178f4..000000000000 --- a/fs/xfs/linux-2.6/spin.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_SPIN_H__ -#define __XFS_SUPPORT_SPIN_H__ - -#include <linux/sched.h> /* preempt needs this */ -#include <linux/spinlock.h> - -/* - * Map lock_t from IRIX to Linux spinlocks. - * - * We do not make use of lock_t from interrupt context, so we do not - * have to worry about disabling interrupts at all (unlike IRIX). - */ - -typedef spinlock_t lock_t; - -#define SPLDECL(s) unsigned long s -#ifndef DEFINE_SPINLOCK -#define DEFINE_SPINLOCK(s) spinlock_t s = SPIN_LOCK_UNLOCKED -#endif - -#define spinlock_init(lock, name) spin_lock_init(lock) -#define spinlock_destroy(lock) -#define mutex_spinlock(lock) ({ spin_lock(lock); 0; }) -#define mutex_spinunlock(lock, s) do { spin_unlock(lock); (void)s; } while (0) -#define nested_spinlock(lock) spin_lock(lock) -#define nested_spinunlock(lock) spin_unlock(lock) - -#endif /* __XFS_SUPPORT_SPIN_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 2e34b104107c..e0519529c26c 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -107,6 +107,18 @@ xfs_page_trace( #define xfs_page_trace(tag, inode, page, pgoff) #endif +STATIC struct block_device * +xfs_find_bdev_for_inode( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + if (XFS_IS_REALTIME_INODE(ip)) + return mp->m_rtdev_targp->bt_bdev; + else + return mp->m_ddev_targp->bt_bdev; +} + /* * Schedule IO completion handling on a xfsdatad if this was * the final hold on this ioend. If we are asked to wait, @@ -151,7 +163,7 @@ xfs_destroy_ioend( /* * Update on-disk file size now that data has been written to disk. * The current in-memory file size is i_size. If a write is beyond - * eof io_new_size will be the intended file size until i_size is + * eof i_new_size will be the intended file size until i_size is * updated. If this write does not extend all the way to the valid * file size then restrict this update to the end of the write. */ @@ -173,7 +185,7 @@ xfs_setfilesize( xfs_ilock(ip, XFS_ILOCK_EXCL); - isize = MAX(ip->i_size, ip->i_iocore.io_new_size); + isize = MAX(ip->i_size, ip->i_new_size); isize = MIN(isize, bsize); if (ip->i_d.di_size < isize) { @@ -226,12 +238,13 @@ xfs_end_bio_unwritten( { xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); + struct xfs_inode *ip = XFS_I(ioend->io_inode); xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; if (likely(!ioend->io_error)) { - xfs_bmap(XFS_I(ioend->io_inode), offset, size, - BMAPI_UNWRITTEN, NULL, NULL); + if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) + xfs_iomap_write_unwritten(ip, offset, size); xfs_setfilesize(ioend); } xfs_destroy_ioend(ioend); @@ -304,7 +317,7 @@ xfs_map_blocks( xfs_inode_t *ip = XFS_I(inode); int error, nmaps = 1; - error = xfs_bmap(ip, offset, count, + error = xfs_iomap(ip, offset, count, flags, mapp, &nmaps); if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) xfs_iflags_set(ip, XFS_IMODIFIED); @@ -1323,7 +1336,7 @@ __xfs_get_blocks( offset = (xfs_off_t)iblock << inode->i_blkbits; ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); size = bh_result->b_size; - error = xfs_bmap(XFS_I(inode), offset, size, + error = xfs_iomap(XFS_I(inode), offset, size, create ? flags : BMAPI_READ, &iomap, &niomap); if (error) return -error; @@ -1471,28 +1484,21 @@ xfs_vm_direct_IO( { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - xfs_iomap_t iomap; - int maps = 1; - int error; + struct block_device *bdev; ssize_t ret; - error = xfs_bmap(XFS_I(inode), offset, 0, - BMAPI_DEVICE, &iomap, &maps); - if (error) - return -error; + bdev = xfs_find_bdev_for_inode(XFS_I(inode)); if (rw == WRITE) { iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); ret = blockdev_direct_IO_own_locking(rw, iocb, inode, - iomap.iomap_target->bt_bdev, - iov, offset, nr_segs, + bdev, iov, offset, nr_segs, xfs_get_blocks_direct, xfs_end_io_direct); } else { iocb->private = xfs_alloc_ioend(inode, IOMAP_READ); ret = blockdev_direct_IO_no_locking(rw, iocb, inode, - iomap.iomap_target->bt_bdev, - iov, offset, nr_segs, + bdev, iov, offset, nr_segs, xfs_get_blocks_direct, xfs_end_io_direct); } @@ -1525,8 +1531,7 @@ xfs_vm_bmap( struct inode *inode = (struct inode *)mapping->host; struct xfs_inode *ip = XFS_I(inode); - vn_trace_entry(XFS_I(inode), __FUNCTION__, - (inst_t *)__return_address); + xfs_itrace_entry(XFS_I(inode)); xfs_rwlock(ip, VRWLOCK_READ); xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); xfs_rwunlock(ip, VRWLOCK_READ); diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 0382c19d6523..e347bfd47c91 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -387,8 +387,6 @@ _xfs_buf_lookup_pages( if (unlikely(page == NULL)) { if (flags & XBF_READ_AHEAD) { bp->b_page_count = i; - for (i = 0; i < bp->b_page_count; i++) - unlock_page(bp->b_pages[i]); return -ENOMEM; } @@ -418,24 +416,17 @@ _xfs_buf_lookup_pages( ASSERT(!PagePrivate(page)); if (!PageUptodate(page)) { page_count--; - if (blocksize >= PAGE_CACHE_SIZE) { - if (flags & XBF_READ) - bp->b_locked = 1; - } else if (!PagePrivate(page)) { + if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) { if (test_page_region(page, offset, nbytes)) page_count++; } } + unlock_page(page); bp->b_pages[i] = page; offset = 0; } - if (!bp->b_locked) { - for (i = 0; i < bp->b_page_count; i++) - unlock_page(bp->b_pages[i]); - } - if (page_count == bp->b_page_count) bp->b_flags |= XBF_DONE; @@ -751,7 +742,6 @@ xfs_buf_associate_memory( bp->b_pages[i] = mem_to_page((void *)pageaddr); pageaddr += PAGE_CACHE_SIZE; } - bp->b_locked = 0; bp->b_count_desired = len; bp->b_buffer_length = buflen; @@ -1098,25 +1088,13 @@ xfs_buf_iostart( return status; } -STATIC_INLINE int -_xfs_buf_iolocked( - xfs_buf_t *bp) -{ - ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE)); - if (bp->b_flags & XBF_READ) - return bp->b_locked; - return 0; -} - STATIC_INLINE void _xfs_buf_ioend( xfs_buf_t *bp, int schedule) { - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { - bp->b_locked = 0; + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) xfs_buf_ioend(bp, schedule); - } } STATIC void @@ -1147,10 +1125,6 @@ xfs_buf_bio_end_io( if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - - if (_xfs_buf_iolocked(bp)) { - unlock_page(page); - } } while (bvec >= bio->bi_io_vec); _xfs_buf_ioend(bp, 1); @@ -1161,13 +1135,12 @@ STATIC void _xfs_buf_ioapply( xfs_buf_t *bp) { - int i, rw, map_i, total_nr_pages, nr_pages; + int rw, map_i, total_nr_pages, nr_pages; struct bio *bio; int offset = bp->b_offset; int size = bp->b_count_desired; sector_t sector = bp->b_bn; unsigned int blocksize = bp->b_target->bt_bsize; - int locking = _xfs_buf_iolocked(bp); total_nr_pages = bp->b_page_count; map_i = 0; @@ -1190,7 +1163,7 @@ _xfs_buf_ioapply( * filesystem block size is not smaller than the page size. */ if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && - (bp->b_flags & XBF_READ) && locking && + (bp->b_flags & XBF_READ) && (blocksize >= PAGE_CACHE_SIZE)) { bio = bio_alloc(GFP_NOIO, 1); @@ -1207,24 +1180,6 @@ _xfs_buf_ioapply( goto submit_io; } - /* Lock down the pages which we need to for the request */ - if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) { - for (i = 0; size; i++) { - int nbytes = PAGE_CACHE_SIZE - offset; - struct page *page = bp->b_pages[i]; - - if (nbytes > size) - nbytes = size; - - lock_page(page); - - size -= nbytes; - offset = 0; - } - offset = bp->b_offset; - size = bp->b_count_desired; - } - next_chunk: atomic_inc(&bp->b_io_remaining); nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); @@ -1571,7 +1526,7 @@ xfs_alloc_delwrite_queue( INIT_LIST_HEAD(&btp->bt_list); INIT_LIST_HEAD(&btp->bt_delwrite_queue); - spinlock_init(&btp->bt_delwrite_lock, "delwri_lock"); + spin_lock_init(&btp->bt_delwrite_lock); btp->bt_flags = 0; btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); if (IS_ERR(btp->bt_task)) { diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index b5908a34b15d..a3d207de48b8 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -143,7 +143,6 @@ typedef struct xfs_buf { void *b_fspriv2; void *b_fspriv3; unsigned short b_error; /* error code on I/O */ - unsigned short b_locked; /* page array is locked */ unsigned int b_page_count; /* size of page array */ unsigned int b_offset; /* page offset in first page */ struct page **b_pages; /* array of page pointers */ diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 15bd4948832c..ca4f66c4de16 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -118,20 +118,29 @@ xfs_nfs_get_inode( u64 ino, u32 generation) { - xfs_fid_t xfid; - bhv_vnode_t *vp; + xfs_mount_t *mp = XFS_M(sb); + xfs_inode_t *ip; int error; - xfid.fid_len = sizeof(xfs_fid_t) - sizeof(xfid.fid_len); - xfid.fid_pad = 0; - xfid.fid_ino = ino; - xfid.fid_gen = generation; + /* + * NFS can sometimes send requests for ino 0. Fail them gracefully. + */ + if (ino == 0) + return ERR_PTR(-ESTALE); - error = xfs_vget(XFS_M(sb), &vp, &xfid); + error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); if (error) return ERR_PTR(-error); + if (!ip) + return ERR_PTR(-EIO); + + if (!ip->i_d.di_mode || ip->i_d.di_gen != generation) { + xfs_iput_new(ip, XFS_ILOCK_SHARED); + return ERR_PTR(-ENOENT); + } - return vp ? vn_to_inode(vp) : NULL; + xfs_iunlock(ip, XFS_ILOCK_SHARED); + return ip->i_vnode; } STATIC struct dentry * diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 21a1c2b1c5fc..edab1ffbb163 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -350,8 +350,8 @@ xfs_file_readdir( size = buf.used; de = (struct hack_dirent *)buf.dirent; - curr_offset = de->offset /* & 0x7fffffff */; while (size > 0) { + curr_offset = de->offset /* & 0x7fffffff */; if (filldir(dirent, de->name, de->namlen, curr_offset & 0x7fffffff, de->ino, de->d_type)) { @@ -362,7 +362,6 @@ xfs_file_readdir( sizeof(u64)); size -= reclen; de = (struct hack_dirent *)((char *)de + reclen); - curr_offset = de->offset /* & 0x7fffffff */; } } diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 9febf9dc999d..ef90e64641e6 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -47,5 +47,6 @@ xfs_param_t xfs_params = { /* * Global system credential structure. */ -cred_t sys_cred_val, *sys_cred = &sys_cred_val; +static cred_t sys_cred_val; +cred_t *sys_cred = &sys_cred_val; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 98a56568bb24..a9952e490ac9 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -75,7 +75,6 @@ xfs_find_handle( xfs_handle_t handle; xfs_fsop_handlereq_t hreq; struct inode *inode; - bhv_vnode_t *vp; if (copy_from_user(&hreq, arg, sizeof(hreq))) return -XFS_ERROR(EFAULT); @@ -92,10 +91,10 @@ xfs_find_handle( if (error) return error; - ASSERT(nd.dentry); - ASSERT(nd.dentry->d_inode); - inode = igrab(nd.dentry->d_inode); - path_release(&nd); + ASSERT(nd.path.dentry); + ASSERT(nd.path.dentry->d_inode); + inode = igrab(nd.path.dentry->d_inode); + path_put(&nd.path); break; } @@ -134,21 +133,16 @@ xfs_find_handle( return -XFS_ERROR(EBADF); } - /* we need the vnode */ - vp = vn_from_inode(inode); - /* now we can grab the fsid */ memcpy(&handle.ha_fsid, XFS_I(inode)->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); hsize = sizeof(xfs_fsid_t); if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { - xfs_inode_t *ip; + xfs_inode_t *ip = XFS_I(inode); int lock_mode; /* need to get access to the xfs_inode to read the generation */ - ip = xfs_vtoi(vp); - ASSERT(ip); lock_mode = xfs_ilock_map_shared(ip); /* fill in fid section of handle from inode */ @@ -176,21 +170,19 @@ xfs_find_handle( /* - * Convert userspace handle data into vnode (and inode). - * We [ab]use the fact that all the fsop_handlereq ioctl calls - * have a data structure argument whose first component is always - * a xfs_fsop_handlereq_t, so we can cast to and from this type. - * This allows us to optimise the copy_from_user calls and gives - * a handy, shared routine. + * Convert userspace handle data into inode. + * + * We use the fact that all the fsop_handlereq ioctl calls have a data + * structure argument whose first component is always a xfs_fsop_handlereq_t, + * so we can pass that sub structure into this handy, shared routine. * - * If no error, caller must always VN_RELE the returned vp. + * If no error, caller must always iput the returned inode. */ STATIC int xfs_vget_fsop_handlereq( xfs_mount_t *mp, struct inode *parinode, /* parent inode pointer */ xfs_fsop_handlereq_t *hreq, - bhv_vnode_t **vp, struct inode **inode) { void __user *hanp; @@ -199,8 +191,6 @@ xfs_vget_fsop_handlereq( xfs_handle_t *handlep; xfs_handle_t handle; xfs_inode_t *ip; - struct inode *inodep; - bhv_vnode_t *vpp; xfs_ino_t ino; __u32 igen; int error; @@ -241,7 +231,7 @@ xfs_vget_fsop_handlereq( } /* - * Get the XFS inode, building a vnode to go with it. + * Get the XFS inode, building a Linux inode to go with it. */ error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); if (error) @@ -253,12 +243,9 @@ xfs_vget_fsop_handlereq( return XFS_ERROR(ENOENT); } - vpp = XFS_ITOV(ip); - inodep = vn_to_inode(vpp); xfs_iunlock(ip, XFS_ILOCK_SHARED); - *vp = vpp; - *inode = inodep; + *inode = XFS_ITOV(ip); return 0; } @@ -275,7 +262,6 @@ xfs_open_by_handle( struct file *filp; struct inode *inode; struct dentry *dentry; - bhv_vnode_t *vp; xfs_fsop_handlereq_t hreq; if (!capable(CAP_SYS_ADMIN)) @@ -283,7 +269,7 @@ xfs_open_by_handle( if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); if (error) return -error; @@ -385,7 +371,6 @@ xfs_readlink_by_handle( { struct inode *inode; xfs_fsop_handlereq_t hreq; - bhv_vnode_t *vp; __u32 olen; void *link; int error; @@ -395,7 +380,7 @@ xfs_readlink_by_handle( if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); if (error) return -error; @@ -438,34 +423,32 @@ xfs_fssetdm_by_handle( struct fsdmidata fsd; xfs_fsop_setdm_handlereq_t dmhreq; struct inode *inode; - bhv_vnode_t *vp; if (!capable(CAP_MKNOD)) return -XFS_ERROR(EPERM); if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &vp, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &inode); if (error) return -error; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { - VN_RELE(vp); - return -XFS_ERROR(EPERM); + error = -XFS_ERROR(EPERM); + goto out; } if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { - VN_RELE(vp); - return -XFS_ERROR(EFAULT); + error = -XFS_ERROR(EFAULT); + goto out; } - error = xfs_set_dmattrs(xfs_vtoi(vp), - fsd.fsd_dmevmask, fsd.fsd_dmstate); + error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask, + fsd.fsd_dmstate); - VN_RELE(vp); - if (error) - return -error; - return 0; + out: + iput(inode); + return error; } STATIC int @@ -478,7 +461,6 @@ xfs_attrlist_by_handle( attrlist_cursor_kern_t *cursor; xfs_fsop_attrlist_handlereq_t al_hreq; struct inode *inode; - bhv_vnode_t *vp; char *kbuf; if (!capable(CAP_SYS_ADMIN)) @@ -488,8 +470,7 @@ xfs_attrlist_by_handle( if (al_hreq.buflen > XATTR_LIST_MAX) return -XFS_ERROR(EINVAL); - error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, - &vp, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode); if (error) goto out; @@ -509,7 +490,7 @@ xfs_attrlist_by_handle( out_kfree: kfree(kbuf); out_vn_rele: - VN_RELE(vp); + iput(inode); out: return -error; } @@ -531,7 +512,7 @@ xfs_attrmulti_attr_get( if (!kbuf) return ENOMEM; - error = xfs_attr_get(XFS_I(inode), name, kbuf, len, flags, NULL); + error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags, NULL); if (error) goto out_kfree; @@ -598,7 +579,6 @@ xfs_attrmulti_by_handle( xfs_attr_multiop_t *ops; xfs_fsop_attrmulti_handlereq_t am_hreq; struct inode *inode; - bhv_vnode_t *vp; unsigned int i, size; char *attr_name; @@ -607,7 +587,7 @@ xfs_attrmulti_by_handle( if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &vp, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &inode); if (error) goto out; @@ -666,7 +646,7 @@ xfs_attrmulti_by_handle( out_kfree_ops: kfree(ops); out_vn_rele: - VN_RELE(vp); + iput(inode); out: return -error; } @@ -702,7 +682,6 @@ xfs_ioc_fsgeometry( STATIC int xfs_ioc_xattr( - bhv_vnode_t *vp, xfs_inode_t *ip, struct file *filp, unsigned int cmd, @@ -735,12 +714,10 @@ xfs_ioctl( void __user *arg) { struct inode *inode = filp->f_path.dentry->d_inode; - bhv_vnode_t *vp = vn_from_inode(inode); xfs_mount_t *mp = ip->i_mount; int error; - vn_trace_entry(XFS_I(inode), "xfs_ioctl", (inst_t *)__return_address); - + xfs_itrace_entry(XFS_I(inode)); switch (cmd) { case XFS_IOC_ALLOCSP: @@ -764,7 +741,7 @@ xfs_ioctl( case XFS_IOC_DIOINFO: { struct dioattr da; xfs_buftarg_t *target = - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? + XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; da.d_mem = da.d_miniosz = 1 << target->bt_sshift; @@ -796,7 +773,7 @@ xfs_ioctl( case XFS_IOC_GETXFLAGS: case XFS_IOC_SETXFLAGS: case XFS_IOC_FSSETXATTR: - return xfs_ioc_xattr(vp, ip, filp, cmd, arg); + return xfs_ioc_xattr(ip, filp, cmd, arg); case XFS_IOC_FSSETDM: { struct fsdmidata dmi; @@ -1203,7 +1180,6 @@ xfs_ioc_fsgetxattr( STATIC int xfs_ioc_xattr( - bhv_vnode_t *vp, xfs_inode_t *ip, struct file *filp, unsigned int cmd, @@ -1237,7 +1213,7 @@ xfs_ioc_xattr( error = xfs_setattr(ip, vattr, attr_flags, NULL); if (likely(!error)) - __vn_revalidate(vp, vattr); /* update flags */ + vn_revalidate(XFS_ITOV(ip)); /* update flags */ error = -error; break; } @@ -1272,7 +1248,7 @@ xfs_ioc_xattr( error = xfs_setattr(ip, vattr, attr_flags, NULL); if (likely(!error)) - __vn_revalidate(vp, vattr); /* update flags */ + vn_revalidate(XFS_ITOV(ip)); /* update flags */ error = -error; break; } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index bf2a956b63c2..a4b254eb43b2 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -44,6 +44,7 @@ #include "xfs_error.h" #include "xfs_dfrag.h" #include "xfs_vnodeops.h" +#include "xfs_ioctl32.h" #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) @@ -379,9 +380,6 @@ xfs_compat_ioctl( switch (cmd) { case XFS_IOC_DIOINFO: case XFS_IOC_FSGEOMETRY: - case XFS_IOC_GETVERSION: - case XFS_IOC_GETXFLAGS: - case XFS_IOC_SETXFLAGS: case XFS_IOC_FSGETXATTR: case XFS_IOC_FSSETXATTR: case XFS_IOC_FSGETXATTRA: @@ -407,6 +405,11 @@ xfs_compat_ioctl( case XFS_IOC_ERROR_CLEARALL: break; + case XFS_IOC32_GETXFLAGS: + case XFS_IOC32_SETXFLAGS: + case XFS_IOC32_GETVERSION: + cmd = _NATIVE_IOC(cmd, long); + break; #ifdef BROKEN_X86_ALIGNMENT /* xfs_flock_t has wrong u32 vs u64 alignment */ case XFS_IOC_ALLOCSP_32: diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 5e8bb7f71b5a..cc4abd3daa49 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -52,6 +52,7 @@ #include <linux/xattr.h> #include <linux/namei.h> #include <linux/security.h> +#include <linux/falloc.h> /* * Bring the atime in the XFS inode uptodate. @@ -71,6 +72,22 @@ xfs_synchronize_atime( } /* + * If the linux inode exists, mark it dirty. + * Used when commiting a dirty inode into a transaction so that + * the inode will get written back by the linux code + */ +void +xfs_mark_inode_dirty_sync( + xfs_inode_t *ip) +{ + bhv_vnode_t *vp; + + vp = XFS_ITOV_NULL(ip); + if (vp) + mark_inode_dirty_sync(vn_to_inode(vp)); +} + +/* * Change the requested timestamp in the given inode. * We don't lock across timestamp updates, and we don't log them but * we do record the fact that there is dirty information in core. @@ -184,10 +201,6 @@ xfs_validate_fields( struct xfs_inode *ip = XFS_I(inode); loff_t size; - inode->i_nlink = ip->i_d.di_nlink; - inode->i_blocks = - XFS_FSB_TO_BB(ip->i_mount, ip->i_d.di_nblocks + - ip->i_delayed_blks); /* we're under i_sem so i_size can't change under us */ size = XFS_ISIZE(ip); if (i_size_read(inode) != size) @@ -542,12 +555,31 @@ xfs_vn_put_link( #ifdef CONFIG_XFS_POSIX_ACL STATIC int +xfs_check_acl( + struct inode *inode, + int mask) +{ + struct xfs_inode *ip = XFS_I(inode); + int error; + + xfs_itrace_entry(ip); + + if (XFS_IFORK_Q(ip)) { + error = xfs_acl_iaccess(ip, mask, NULL); + if (error != -1) + return -error; + } + + return -EAGAIN; +} + +STATIC int xfs_vn_permission( - struct inode *inode, - int mode, - struct nameidata *nd) + struct inode *inode, + int mask, + struct nameidata *nd) { - return -xfs_access(XFS_I(inode), mode << 6, NULL); + return generic_permission(inode, mask, xfs_check_acl); } #else #define xfs_vn_permission NULL @@ -555,33 +587,61 @@ xfs_vn_permission( STATIC int xfs_vn_getattr( - struct vfsmount *mnt, - struct dentry *dentry, - struct kstat *stat) + struct vfsmount *mnt, + struct dentry *dentry, + struct kstat *stat) { - struct inode *inode = dentry->d_inode; - bhv_vattr_t vattr = { .va_mask = XFS_AT_STAT }; - int error; - - error = xfs_getattr(XFS_I(inode), &vattr, ATTR_LAZY); - if (likely(!error)) { - stat->size = i_size_read(inode); - stat->dev = inode->i_sb->s_dev; - stat->rdev = (vattr.va_rdev == 0) ? 0 : - MKDEV(sysv_major(vattr.va_rdev) & 0x1ff, - sysv_minor(vattr.va_rdev)); - stat->mode = vattr.va_mode; - stat->nlink = vattr.va_nlink; - stat->uid = vattr.va_uid; - stat->gid = vattr.va_gid; - stat->ino = vattr.va_nodeid; - stat->atime = vattr.va_atime; - stat->mtime = vattr.va_mtime; - stat->ctime = vattr.va_ctime; - stat->blocks = vattr.va_nblocks; - stat->blksize = vattr.va_blocksize; + struct inode *inode = dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + xfs_itrace_entry(ip); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + stat->size = XFS_ISIZE(ip); + stat->dev = inode->i_sb->s_dev; + stat->mode = ip->i_d.di_mode; + stat->nlink = ip->i_d.di_nlink; + stat->uid = ip->i_d.di_uid; + stat->gid = ip->i_d.di_gid; + stat->ino = ip->i_ino; +#if XFS_BIG_INUMS + stat->ino += mp->m_inoadd; +#endif + stat->atime = inode->i_atime; + stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec; + stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; + stat->ctime.tv_sec = ip->i_d.di_ctime.t_sec; + stat->ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + stat->blocks = + XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); + + + switch (inode->i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + stat->blksize = BLKDEV_IOSIZE; + stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, + sysv_minor(ip->i_df.if_u2.if_rdev)); + break; + default: + if (XFS_IS_REALTIME_INODE(ip)) { + /* + * If the file blocks are being allocated from a + * realtime volume, then return the inode's realtime + * extent size or the realtime volume's extent size. + */ + stat->blksize = + xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; + } else + stat->blksize = xfs_preferred_iosize(mp); + stat->rdev = 0; + break; } - return -error; + + return 0; } STATIC int @@ -636,7 +696,7 @@ xfs_vn_setattr( error = xfs_setattr(XFS_I(inode), &vattr, flags, NULL); if (likely(!error)) - __vn_revalidate(vn_from_inode(inode), &vattr); + vn_revalidate(vn_from_inode(inode)); return -error; } @@ -750,6 +810,47 @@ xfs_vn_removexattr( return namesp->attr_remove(vp, attr, xflags); } +STATIC long +xfs_vn_fallocate( + struct inode *inode, + int mode, + loff_t offset, + loff_t len) +{ + long error; + loff_t new_size = 0; + xfs_flock64_t bf; + xfs_inode_t *ip = XFS_I(inode); + + /* preallocation on directories not yet supported */ + error = -ENODEV; + if (S_ISDIR(inode->i_mode)) + goto out_error; + + bf.l_whence = 0; + bf.l_start = offset; + bf.l_len = len; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, + 0, NULL, ATTR_NOLOCK); + if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && + offset + len > i_size_read(inode)) + new_size = offset + len; + + /* Change file size if needed */ + if (new_size) { + bhv_vattr_t va; + + va.va_mask = XFS_AT_SIZE; + va.va_size = new_size; + error = xfs_setattr(ip, &va, ATTR_NOLOCK, NULL); + } + + xfs_iunlock(ip, XFS_IOLOCK_EXCL); +out_error: + return error; +} const struct inode_operations xfs_inode_operations = { .permission = xfs_vn_permission, @@ -760,6 +861,7 @@ const struct inode_operations xfs_inode_operations = { .getxattr = xfs_vn_getxattr, .listxattr = xfs_vn_listxattr, .removexattr = xfs_vn_removexattr, + .fallocate = xfs_vn_fallocate, }; const struct inode_operations xfs_dir_inode_operations = { diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index dc3752de22da..3ca39c4e5d2a 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -43,7 +43,6 @@ #include <kmem.h> #include <mrlock.h> -#include <spin.h> #include <sv.h> #include <mutex.h> #include <sema.h> @@ -75,6 +74,7 @@ #include <linux/notifier.h> #include <linux/delay.h> #include <linux/log2.h> +#include <linux/spinlock.h> #include <asm/page.h> #include <asm/div64.h> @@ -136,43 +136,19 @@ #define current_restore_flags_nested(sp, f) \ (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) -#define NBPP PAGE_SIZE -#define NDPP (1 << (PAGE_SHIFT - 9)) +#define spinlock_destroy(lock) #define NBBY 8 /* number of bits per byte */ -#define NBPC PAGE_SIZE /* Number of bytes per click */ -#define BPCSHIFT PAGE_SHIFT /* LOG2(NBPC) if exact */ /* * Size of block device i/o is parameterized here. * Currently the system supports page-sized i/o. */ -#define BLKDEV_IOSHIFT BPCSHIFT +#define BLKDEV_IOSHIFT PAGE_CACHE_SHIFT #define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT) /* number of BB's per block device block */ #define BLKDEV_BB BTOBB(BLKDEV_IOSIZE) -/* bytes to clicks */ -#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) -#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) -#define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) -#define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT) - -/* off_t bytes to clicks */ -#define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) -#define offtoct(x) ((xfs_off_t)(x)>>BPCSHIFT) - -/* clicks to off_t bytes */ -#define ctooff(x) ((xfs_off_t)(x)<<BPCSHIFT) - -/* clicks to bytes */ -#define ctob(x) ((__psunsigned_t)(x)<<BPCSHIFT) -#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) -#define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT) - -/* bytes to clicks */ -#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) - #define ENOATTR ENODATA /* Attribute not found */ #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ @@ -205,10 +181,6 @@ #define xfs_stack_trace() dump_stack() #define xfs_itruncate_data(ip, off) \ (-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off))) -#define xfs_statvfs_fsid(statp, mp) \ - ({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \ - __kernel_fsid_t *fsid = &(statp)->f_fsid; \ - (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); }) /* Move the kernel do_div definition off to one side */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 6f614f35f650..166353388490 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -58,14 +58,12 @@ void xfs_rw_enter_trace( int tag, - xfs_iocore_t *io, + xfs_inode_t *ip, void *data, size_t segs, loff_t offset, int ioflags) { - xfs_inode_t *ip = XFS_IO_INODE(io); - if (ip->i_rwtrace == NULL) return; ktrace_enter(ip->i_rwtrace, @@ -78,8 +76,8 @@ xfs_rw_enter_trace( (void *)((unsigned long)((offset >> 32) & 0xffffffff)), (void *)((unsigned long)(offset & 0xffffffff)), (void *)((unsigned long)ioflags), - (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)), - (void *)((unsigned long)(io->io_new_size & 0xffffffff)), + (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)), + (void *)((unsigned long)(ip->i_new_size & 0xffffffff)), (void *)((unsigned long)current_pid()), (void *)NULL, (void *)NULL, @@ -89,13 +87,12 @@ xfs_rw_enter_trace( void xfs_inval_cached_trace( - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, xfs_off_t len, xfs_off_t first, xfs_off_t last) { - xfs_inode_t *ip = XFS_IO_INODE(io); if (ip->i_rwtrace == NULL) return; @@ -131,7 +128,7 @@ xfs_inval_cached_trace( */ STATIC int xfs_iozero( - struct inode *ip, /* inode */ + struct xfs_inode *ip, /* inode */ loff_t pos, /* offset in file */ size_t count) /* size of data to zero */ { @@ -139,7 +136,7 @@ xfs_iozero( struct address_space *mapping; int status; - mapping = ip->i_mapping; + mapping = ip->i_vnode->i_mapping; do { unsigned offset, bytes; void *fsdata; @@ -205,7 +202,7 @@ xfs_read( if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? + XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->bt_smask) || (size & target->bt_smask)) { @@ -246,9 +243,8 @@ xfs_read( if (unlikely(ioflags & IO_ISDIRECT)) { if (VN_CACHED(vp)) - ret = xfs_flushinval_pages(ip, - ctooff(offtoct(*offset)), - -1, FI_REMAPF_LOCKED); + ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), + -1, FI_REMAPF_LOCKED); mutex_unlock(&inode->i_mutex); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -256,7 +252,7 @@ xfs_read( } } - xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, + xfs_rw_enter_trace(XFS_READ_ENTER, ip, (void *)iovp, segs, *offset, ioflags); iocb->ki_pos = *offset; @@ -301,7 +297,7 @@ xfs_splice_read( return -error; } } - xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore, + xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip, pipe, count, *ppos, ioflags); ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); if (ret > 0) @@ -323,7 +319,6 @@ xfs_splice_write( { bhv_vnode_t *vp = XFS_ITOV(ip); xfs_mount_t *mp = ip->i_mount; - xfs_iocore_t *io = &ip->i_iocore; ssize_t ret; struct inode *inode = outfilp->f_mapping->host; xfs_fsize_t isize, new_size; @@ -350,10 +345,10 @@ xfs_splice_write( xfs_ilock(ip, XFS_ILOCK_EXCL); if (new_size > ip->i_size) - io->io_new_size = new_size; + ip->i_new_size = new_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore, + xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip, pipe, count, *ppos, ioflags); ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); if (ret > 0) @@ -370,9 +365,9 @@ xfs_splice_write( xfs_iunlock(ip, XFS_ILOCK_EXCL); } - if (io->io_new_size) { + if (ip->i_new_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); - io->io_new_size = 0; + ip->i_new_size = 0; if (ip->i_d.di_size > ip->i_size) ip->i_d.di_size = ip->i_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -389,20 +384,19 @@ xfs_splice_write( */ STATIC int /* error (positive) */ xfs_zero_last_block( - struct inode *ip, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_fsize_t offset, xfs_fsize_t isize) { xfs_fileoff_t last_fsb; - xfs_mount_t *mp = io->io_mount; + xfs_mount_t *mp = ip->i_mount; int nimaps; int zero_offset; int zero_len; int error = 0; xfs_bmbt_irec_t imap; - ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); + ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); zero_offset = XFS_B_FSB_OFFSET(mp, isize); if (zero_offset == 0) { @@ -415,7 +409,7 @@ xfs_zero_last_block( last_fsb = XFS_B_TO_FSBT(mp, isize); nimaps = 1; - error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap, + error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { return error; @@ -433,14 +427,14 @@ xfs_zero_last_block( * out sync. We need to drop the ilock while we do this so we * don't deadlock when the buffer cache calls back to us. */ - XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); + xfs_iunlock(ip, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); zero_len = mp->m_sb.sb_blocksize - zero_offset; if (isize + zero_len > offset) zero_len = offset - isize; error = xfs_iozero(ip, isize, zero_len); - XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); ASSERT(error >= 0); return error; } @@ -458,35 +452,33 @@ xfs_zero_last_block( int /* error (positive) */ xfs_zero_eof( - bhv_vnode_t *vp, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, /* starting I/O offset */ xfs_fsize_t isize) /* current inode size */ { - struct inode *ip = vn_to_inode(vp); + xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t start_zero_fsb; xfs_fileoff_t end_zero_fsb; xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; xfs_fileoff_t zero_off; xfs_fsize_t zero_len; - xfs_mount_t *mp = io->io_mount; int nimaps; int error = 0; xfs_bmbt_irec_t imap; - ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); - ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); ASSERT(offset > isize); /* * First handle zeroing the block on which isize resides. * We only zero a part of that block so it is handled specially. */ - error = xfs_zero_last_block(ip, io, offset, isize); + error = xfs_zero_last_block(ip, offset, isize); if (error) { - ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); - ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); return error; } @@ -514,11 +506,11 @@ xfs_zero_eof( while (start_zero_fsb <= end_zero_fsb) { nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; - error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, + error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { - ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); - ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); return error; } ASSERT(nimaps > 0); @@ -542,7 +534,7 @@ xfs_zero_eof( * Drop the inode lock while we're doing the I/O. * We'll still have the iolock to protect us. */ - XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_iunlock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); @@ -558,14 +550,13 @@ xfs_zero_eof( start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); - XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); } return 0; out_lock: - - XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); ASSERT(error >= 0); return error; } @@ -587,7 +578,6 @@ xfs_write( xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; - xfs_iocore_t *io; int iolock; int eventsent = 0; bhv_vrwlock_t locktype; @@ -607,8 +597,7 @@ xfs_write( if (count == 0) return 0; - io = &xip->i_iocore; - mp = io->io_mount; + mp = xip->i_mount; xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); @@ -667,7 +656,7 @@ start: if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = - (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? + XFS_IS_REALTIME_INODE(xip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) { @@ -688,7 +677,7 @@ start: new_size = pos + count; if (new_size > xip->i_size) - io->io_new_size = new_size; + xip->i_new_size = new_size; if (likely(!(ioflags & IO_INVIS))) { file_update_time(file); @@ -706,7 +695,7 @@ start: */ if (pos > xip->i_size) { - error = xfs_zero_eof(vp, io, pos, xip->i_size); + error = xfs_zero_eof(xip, pos, xip->i_size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL); goto out_unlock_internal; @@ -740,10 +729,10 @@ retry: if ((ioflags & IO_ISDIRECT)) { if (VN_CACHED(vp)) { WARN_ON(need_i_mutex == 0); - xfs_inval_cached_trace(io, pos, -1, - ctooff(offtoct(pos)), -1); + xfs_inval_cached_trace(xip, pos, -1, + (pos & PAGE_CACHE_MASK), -1); error = xfs_flushinval_pages(xip, - ctooff(offtoct(pos)), + (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (error) goto out_unlock_internal; @@ -751,7 +740,7 @@ retry: if (need_i_mutex) { /* demote the lock now the cached pages are gone */ - XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); + xfs_ilock_demote(xip, XFS_IOLOCK_EXCL); mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; @@ -759,7 +748,7 @@ retry: need_i_mutex = 0; } - xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs, + xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); @@ -779,7 +768,7 @@ retry: goto relock; } } else { - xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs, + xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); @@ -843,9 +832,9 @@ retry: } out_unlock_internal: - if (io->io_new_size) { + if (xip->i_new_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); - io->io_new_size = 0; + xip->i_new_size = 0; /* * If this was a direct or synchronous I/O that failed (such * as ENOSPC) then part of the I/O may have been written to @@ -894,25 +883,6 @@ xfs_bdstrat_cb(struct xfs_buf *bp) } } - -int -xfs_bmap( - xfs_inode_t *ip, - xfs_off_t offset, - ssize_t count, - int flags, - xfs_iomap_t *iomapp, - int *niomaps) -{ - xfs_iocore_t *io = &ip->i_iocore; - - ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); - ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == - ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); - - return xfs_iomap(io, offset, count, flags, iomapp, niomaps); -} - /* * Wrapper around bdstrat so that we can stop data * from going to disk in case we are shutting down the filesystem. diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index 4b7747a828d9..e200253139cf 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -19,7 +19,6 @@ #define __XFS_LRW_H__ struct xfs_mount; -struct xfs_iocore; struct xfs_inode; struct xfs_bmbt_irec; struct xfs_buf; @@ -60,20 +59,19 @@ struct xfs_iomap; #define XFS_IOMAP_UNWRITTEN 27 #define XFS_SPLICE_READ_ENTER 28 #define XFS_SPLICE_WRITE_ENTER 29 -extern void xfs_rw_enter_trace(int, struct xfs_iocore *, - void *, size_t, loff_t, int); -extern void xfs_inval_cached_trace(struct xfs_iocore *, - xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t); +extern void xfs_rw_enter_trace(int, struct xfs_inode *, + void *, size_t, loff_t, int); +extern void xfs_inval_cached_trace(struct xfs_inode *, + xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t); #else -#define xfs_rw_enter_trace(tag, io, data, size, offset, ioflags) -#define xfs_inval_cached_trace(io, offset, len, first, last) +#define xfs_rw_enter_trace(tag, ip, data, size, offset, ioflags) +#define xfs_inval_cached_trace(ip, offset, len, first, last) #endif extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); extern int xfs_bdstrat_cb(struct xfs_buf *); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); -extern int xfs_zero_eof(struct inode *, struct xfs_iocore *, xfs_off_t, - xfs_fsize_t); +extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); #endif /* __XFS_LRW_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 8cb63c60c048..21dfc9da235e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -41,6 +41,7 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_itable.h" +#include "xfs_fsops.h" #include "xfs_rw.h" #include "xfs_acl.h" #include "xfs_attr.h" @@ -49,6 +50,8 @@ #include "xfs_vnodeops.h" #include "xfs_vfsops.h" #include "xfs_version.h" +#include "xfs_log_priv.h" +#include "xfs_trans_priv.h" #include <linux/namei.h> #include <linux/init.h> @@ -87,6 +90,435 @@ xfs_args_allocate( return args; } +#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ +#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ +#define MNTOPT_LOGDEV "logdev" /* log device */ +#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ +#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ +#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ +#define MNTOPT_INO64 "ino64" /* force inodes into 64-bit range */ +#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ +#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ +#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ +#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ +#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ +#define MNTOPT_MTPT "mtpt" /* filesystem mount point */ +#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ +#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ +#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ +#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ +#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ +#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ +#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and + * unwritten extent conversion */ +#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ +#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ +#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ +#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ +#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ +#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ +#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes + * in stat(). */ +#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ +#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ +#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */ +#define MNTOPT_QUOTA "quota" /* disk quotas (user) */ +#define MNTOPT_NOQUOTA "noquota" /* no quotas */ +#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */ +#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */ +#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */ +#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */ +#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */ +#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */ +#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */ +#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ +#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ +#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ +#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ +#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ +#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ + +STATIC unsigned long +suffix_strtoul(char *s, char **endp, unsigned int base) +{ + int last, shift_left_factor = 0; + char *value = s; + + last = strlen(value) - 1; + if (value[last] == 'K' || value[last] == 'k') { + shift_left_factor = 10; + value[last] = '\0'; + } + if (value[last] == 'M' || value[last] == 'm') { + shift_left_factor = 20; + value[last] = '\0'; + } + if (value[last] == 'G' || value[last] == 'g') { + shift_left_factor = 30; + value[last] = '\0'; + } + + return simple_strtoul((const char *)s, endp, base) << shift_left_factor; +} + +STATIC int +xfs_parseargs( + struct xfs_mount *mp, + char *options, + struct xfs_mount_args *args, + int update) +{ + char *this_char, *value, *eov; + int dsunit, dswidth, vol_dsunit, vol_dswidth; + int iosize; + int ikeep = 0; + + args->flags |= XFSMNT_BARRIER; + args->flags2 |= XFSMNT2_COMPAT_IOSIZE; + + if (!options) + goto done; + + iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0; + + while ((this_char = strsep(&options, ",")) != NULL) { + if (!*this_char) + continue; + if ((value = strchr(this_char, '=')) != NULL) + *value++ = 0; + + if (!strcmp(this_char, MNTOPT_LOGBUFS)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + args->logbufs = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + args->logbufsize = suffix_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + strncpy(args->logname, value, MAXNAMELEN); + } else if (!strcmp(this_char, MNTOPT_MTPT)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + strncpy(args->mtpt, value, MAXNAMELEN); + } else if (!strcmp(this_char, MNTOPT_RTDEV)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + strncpy(args->rtname, value, MAXNAMELEN); + } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + iosize = simple_strtoul(value, &eov, 10); + args->flags |= XFSMNT_IOSIZE; + args->iosizelog = (uint8_t) iosize; + } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + iosize = suffix_strtoul(value, &eov, 10); + args->flags |= XFSMNT_IOSIZE; + args->iosizelog = ffs(iosize) - 1; + } else if (!strcmp(this_char, MNTOPT_GRPID) || + !strcmp(this_char, MNTOPT_BSDGROUPS)) { + mp->m_flags |= XFS_MOUNT_GRPID; + } else if (!strcmp(this_char, MNTOPT_NOGRPID) || + !strcmp(this_char, MNTOPT_SYSVGROUPS)) { + mp->m_flags &= ~XFS_MOUNT_GRPID; + } else if (!strcmp(this_char, MNTOPT_WSYNC)) { + args->flags |= XFSMNT_WSYNC; + } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { + args->flags |= XFSMNT_OSYNCISOSYNC; + } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { + args->flags |= XFSMNT_NORECOVERY; + } else if (!strcmp(this_char, MNTOPT_INO64)) { + args->flags |= XFSMNT_INO64; +#if !XFS_BIG_INUMS + cmn_err(CE_WARN, + "XFS: %s option not allowed on this system", + this_char); + return EINVAL; +#endif + } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { + args->flags |= XFSMNT_NOALIGN; + } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { + args->flags |= XFSMNT_SWALLOC; + } else if (!strcmp(this_char, MNTOPT_SUNIT)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + dsunit = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + dswidth = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { + args->flags &= ~XFSMNT_32BITINODES; +#if !XFS_BIG_INUMS + cmn_err(CE_WARN, + "XFS: %s option not allowed on this system", + this_char); + return EINVAL; +#endif + } else if (!strcmp(this_char, MNTOPT_NOUUID)) { + args->flags |= XFSMNT_NOUUID; + } else if (!strcmp(this_char, MNTOPT_BARRIER)) { + args->flags |= XFSMNT_BARRIER; + } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { + args->flags &= ~XFSMNT_BARRIER; + } else if (!strcmp(this_char, MNTOPT_IKEEP)) { + ikeep = 1; + args->flags &= ~XFSMNT_IDELETE; + } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { + args->flags |= XFSMNT_IDELETE; + } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { + args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; + } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { + args->flags2 |= XFSMNT2_COMPAT_IOSIZE; + } else if (!strcmp(this_char, MNTOPT_ATTR2)) { + args->flags |= XFSMNT_ATTR2; + } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { + args->flags &= ~XFSMNT_ATTR2; + } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { + args->flags2 |= XFSMNT2_FILESTREAMS; + } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { + args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA); + args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA); + } else if (!strcmp(this_char, MNTOPT_QUOTA) || + !strcmp(this_char, MNTOPT_UQUOTA) || + !strcmp(this_char, MNTOPT_USRQUOTA)) { + args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || + !strcmp(this_char, MNTOPT_UQUOTANOENF)) { + args->flags |= XFSMNT_UQUOTA; + args->flags &= ~XFSMNT_UQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_PQUOTA) || + !strcmp(this_char, MNTOPT_PRJQUOTA)) { + args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { + args->flags |= XFSMNT_PQUOTA; + args->flags &= ~XFSMNT_PQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_GQUOTA) || + !strcmp(this_char, MNTOPT_GRPQUOTA)) { + args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { + args->flags |= XFSMNT_GQUOTA; + args->flags &= ~XFSMNT_GQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_DMAPI)) { + args->flags |= XFSMNT_DMAPI; + } else if (!strcmp(this_char, MNTOPT_XDSM)) { + args->flags |= XFSMNT_DMAPI; + } else if (!strcmp(this_char, MNTOPT_DMI)) { + args->flags |= XFSMNT_DMAPI; + } else if (!strcmp(this_char, "ihashsize")) { + cmn_err(CE_WARN, + "XFS: ihashsize no longer used, option is deprecated."); + } else if (!strcmp(this_char, "osyncisdsync")) { + /* no-op, this is now the default */ + cmn_err(CE_WARN, + "XFS: osyncisdsync is now the default, option is deprecated."); + } else if (!strcmp(this_char, "irixsgid")) { + cmn_err(CE_WARN, + "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); + } else { + cmn_err(CE_WARN, + "XFS: unknown mount option [%s].", this_char); + return EINVAL; + } + } + + if (args->flags & XFSMNT_NORECOVERY) { + if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) { + cmn_err(CE_WARN, + "XFS: no-recovery mounts must be read-only."); + return EINVAL; + } + } + + if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { + cmn_err(CE_WARN, + "XFS: sunit and swidth options incompatible with the noalign option"); + return EINVAL; + } + + if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { + cmn_err(CE_WARN, + "XFS: cannot mount with both project and group quota"); + return EINVAL; + } + + if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') { + printk("XFS: %s option needs the mount point option as well\n", + MNTOPT_DMAPI); + return EINVAL; + } + + if ((dsunit && !dswidth) || (!dsunit && dswidth)) { + cmn_err(CE_WARN, + "XFS: sunit and swidth must be specified together"); + return EINVAL; + } + + if (dsunit && (dswidth % dsunit != 0)) { + cmn_err(CE_WARN, + "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", + dswidth, dsunit); + return EINVAL; + } + + /* + * Applications using DMI filesystems often expect the + * inode generation number to be monotonically increasing. + * If we delete inode chunks we break this assumption, so + * keep unused inode chunks on disk for DMI filesystems + * until we come up with a better solution. + * Note that if "ikeep" or "noikeep" mount options are + * supplied, then they are honored. + */ + if (!(args->flags & XFSMNT_DMAPI) && !ikeep) + args->flags |= XFSMNT_IDELETE; + + if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { + if (dsunit) { + args->sunit = dsunit; + args->flags |= XFSMNT_RETERR; + } else { + args->sunit = vol_dsunit; + } + dswidth ? (args->swidth = dswidth) : + (args->swidth = vol_dswidth); + } else { + args->sunit = args->swidth = 0; + } + +done: + if (args->flags & XFSMNT_32BITINODES) + mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + if (args->flags2) + args->flags |= XFSMNT_FLAGS2; + return 0; +} + +struct proc_xfs_info { + int flag; + char *str; +}; + +STATIC int +xfs_showargs( + struct xfs_mount *mp, + struct seq_file *m) +{ + static struct proc_xfs_info xfs_info_set[] = { + /* the few simple ones we can get from the mount struct */ + { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, + { XFS_MOUNT_INO64, "," MNTOPT_INO64 }, + { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, + { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, + { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, + { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, + { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, + { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, + { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, + { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, + { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, + { 0, NULL } + }; + static struct proc_xfs_info xfs_info_unset[] = { + /* the few simple ones we can get from the mount struct */ + { XFS_MOUNT_IDELETE, "," MNTOPT_IKEEP }, + { XFS_MOUNT_COMPAT_IOSIZE, "," MNTOPT_LARGEIO }, + { XFS_MOUNT_BARRIER, "," MNTOPT_NOBARRIER }, + { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_64BITINODE }, + { 0, NULL } + }; + struct proc_xfs_info *xfs_infop; + + for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { + if (mp->m_flags & xfs_infop->flag) + seq_puts(m, xfs_infop->str); + } + for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) { + if (!(mp->m_flags & xfs_infop->flag)) + seq_puts(m, xfs_infop->str); + } + + if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) + seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", + (int)(1 << mp->m_writeio_log) >> 10); + + if (mp->m_logbufs > 0) + seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); + if (mp->m_logbsize > 0) + seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); + + if (mp->m_logname) + seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); + if (mp->m_rtname) + seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); + + if (mp->m_dalign > 0) + seq_printf(m, "," MNTOPT_SUNIT "=%d", + (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); + if (mp->m_swidth > 0) + seq_printf(m, "," MNTOPT_SWIDTH "=%d", + (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); + + if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) + seq_puts(m, "," MNTOPT_USRQUOTA); + else if (mp->m_qflags & XFS_UQUOTA_ACCT) + seq_puts(m, "," MNTOPT_UQUOTANOENF); + + if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) + seq_puts(m, "," MNTOPT_PRJQUOTA); + else if (mp->m_qflags & XFS_PQUOTA_ACCT) + seq_puts(m, "," MNTOPT_PQUOTANOENF); + + if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD)) + seq_puts(m, "," MNTOPT_GRPQUOTA); + else if (mp->m_qflags & XFS_GQUOTA_ACCT) + seq_puts(m, "," MNTOPT_GQUOTANOENF); + + if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) + seq_puts(m, "," MNTOPT_NOQUOTA); + + return 0; +} __uint64_t xfs_max_file_offset( unsigned int blockshift) @@ -137,7 +569,7 @@ xfs_set_inodeops( break; case S_IFLNK: inode->i_op = &xfs_symlink_inode_operations; - if (inode->i_blocks) + if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE)) inode->i_mapping->a_ops = &xfs_address_space_operations; break; default: @@ -174,8 +606,6 @@ xfs_revalidate_inode( inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); - inode->i_blocks = - XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; @@ -334,6 +764,64 @@ xfs_blkdev_issue_flush( blkdev_issue_flush(buftarg->bt_bdev, NULL); } +/* + * XFS AIL push thread support + */ +void +xfsaild_wakeup( + xfs_mount_t *mp, + xfs_lsn_t threshold_lsn) +{ + mp->m_ail.xa_target = threshold_lsn; + wake_up_process(mp->m_ail.xa_task); +} + +int +xfsaild( + void *data) +{ + xfs_mount_t *mp = (xfs_mount_t *)data; + xfs_lsn_t last_pushed_lsn = 0; + long tout = 0; + + while (!kthread_should_stop()) { + if (tout) + schedule_timeout_interruptible(msecs_to_jiffies(tout)); + tout = 1000; + + /* swsusp */ + try_to_freeze(); + + ASSERT(mp->m_log); + if (XFS_FORCED_SHUTDOWN(mp)) + continue; + + tout = xfsaild_push(mp, &last_pushed_lsn); + } + + return 0; +} /* xfsaild */ + +int +xfsaild_start( + xfs_mount_t *mp) +{ + mp->m_ail.xa_target = 0; + mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild"); + if (IS_ERR(mp->m_ail.xa_task)) + return -PTR_ERR(mp->m_ail.xa_task); + return 0; +} + +void +xfsaild_stop( + xfs_mount_t *mp) +{ + kthread_stop(mp->m_ail.xa_task); +} + + + STATIC struct inode * xfs_fs_alloc_inode( struct super_block *sb) @@ -361,7 +849,7 @@ xfs_fs_inode_init_once( inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); } -STATIC int +STATIC int __init xfs_init_zones(void) { xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode", @@ -410,8 +898,7 @@ xfs_fs_write_inode( { int error = 0, flags = FLUSH_INODE; - vn_trace_entry(XFS_I(inode), __FUNCTION__, - (inst_t *)__return_address); + xfs_itrace_entry(XFS_I(inode)); if (sync) { filemap_fdatawait(inode->i_mapping); flags |= FLUSH_SYNC; @@ -438,8 +925,7 @@ xfs_fs_clear_inode( * find an inode with di_mode == 0 but without IGET_CREATE set. */ if (ip) { - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - + xfs_itrace_entry(ip); XFS_STATS_INC(vn_rele); XFS_STATS_INC(vn_remove); XFS_STATS_INC(vn_reclaim); @@ -683,8 +1169,44 @@ xfs_fs_statfs( struct dentry *dentry, struct kstatfs *statp) { - return -xfs_statvfs(XFS_M(dentry->d_sb), statp, - vn_from_inode(dentry->d_inode)); + struct xfs_mount *mp = XFS_M(dentry->d_sb); + xfs_sb_t *sbp = &mp->m_sb; + __uint64_t fakeinos, id; + xfs_extlen_t lsize; + + statp->f_type = XFS_SB_MAGIC; + statp->f_namelen = MAXNAMELEN - 1; + + id = huge_encode_dev(mp->m_ddev_targp->bt_dev); + statp->f_fsid.val[0] = (u32)id; + statp->f_fsid.val[1] = (u32)(id >> 32); + + xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); + + spin_lock(&mp->m_sb_lock); + statp->f_bsize = sbp->sb_blocksize; + lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; + statp->f_blocks = sbp->sb_dblocks - lsize; + statp->f_bfree = statp->f_bavail = + sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); + fakeinos = statp->f_bfree << sbp->sb_inopblog; +#if XFS_BIG_INUMS + fakeinos += mp->m_inoadd; +#endif + statp->f_files = + MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); + if (mp->m_maxicount) +#if XFS_BIG_INUMS + if (!mp->m_inoadd) +#endif + statp->f_files = min_t(typeof(statp->f_files), + statp->f_files, + mp->m_maxicount); + statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); + spin_unlock(&mp->m_sb_lock); + + XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp); + return 0; } STATIC int @@ -704,11 +1226,19 @@ xfs_fs_remount( return -error; } +/* + * Second stage of a freeze. The data is already frozen so we only + * need to take care of themetadata. Once that's done write a dummy + * record to dirty the log in case of a crash while frozen. + */ STATIC void xfs_fs_lockfs( struct super_block *sb) { - xfs_freeze(XFS_M(sb)); + struct xfs_mount *mp = XFS_M(sb); + + xfs_attr_quiesce(mp); + xfs_fs_log_dummy(mp); } STATIC int @@ -779,7 +1309,6 @@ xfs_fs_fill_super( struct inode *rootvp; struct xfs_mount *mp = NULL; struct xfs_mount_args *args = xfs_args_allocate(sb, silent); - struct kstatfs statvfs; int error; mp = xfs_mount_init(); @@ -807,21 +1336,19 @@ xfs_fs_fill_super( if (error) goto fail_vfsop; - error = xfs_statvfs(mp, &statvfs, NULL); - if (error) - goto fail_unmount; - sb->s_dirt = 1; - sb->s_magic = statvfs.f_type; - sb->s_blocksize = statvfs.f_bsize; - sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1; + sb->s_magic = XFS_SB_MAGIC; + sb->s_blocksize = mp->m_sb.sb_blocksize; + sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); sb->s_time_gran = 1; set_posix_acl_flag(sb); - error = xfs_root(mp, &rootvp); - if (error) + rootvp = igrab(mp->m_rootip->i_vnode); + if (!rootvp) { + error = ENOENT; goto fail_unmount; + } sb->s_root = d_alloc_root(vn_to_inode(rootvp)); if (!sb->s_root) { @@ -841,8 +1368,7 @@ xfs_fs_fill_super( goto fail_vnrele; } - vn_trace_exit(XFS_I(sb->s_root->d_inode), __FUNCTION__, - (inst_t *)__return_address); + xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); kmem_free(args, sizeof(*args)); return 0; diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 814169fd7e1e..bc7afe007338 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -40,7 +40,7 @@ #define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) static wait_queue_head_t vsync[NVSYNC]; -void +void __init vn_init(void) { int i; @@ -82,84 +82,55 @@ vn_ioerror( xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); } -bhv_vnode_t * -vn_initialize( - struct inode *inode) -{ - bhv_vnode_t *vp = vn_from_inode(inode); - - XFS_STATS_INC(vn_active); - XFS_STATS_INC(vn_alloc); - - ASSERT(VN_CACHED(vp) == 0); - - return vp; -} - /* - * Revalidate the Linux inode from the vattr. + * Revalidate the Linux inode from the XFS inode. * Note: i_size _not_ updated; we must hold the inode * semaphore when doing that - callers responsibility. */ -void -vn_revalidate_core( - bhv_vnode_t *vp, - bhv_vattr_t *vap) +int +vn_revalidate( + bhv_vnode_t *vp) { - struct inode *inode = vn_to_inode(vp); - - inode->i_mode = vap->va_mode; - inode->i_nlink = vap->va_nlink; - inode->i_uid = vap->va_uid; - inode->i_gid = vap->va_gid; - inode->i_blocks = vap->va_nblocks; - inode->i_mtime = vap->va_mtime; - inode->i_ctime = vap->va_ctime; - if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) + struct inode *inode = vn_to_inode(vp); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + unsigned long xflags; + + xfs_itrace_entry(ip); + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + inode->i_mode = ip->i_d.di_mode; + inode->i_uid = ip->i_d.di_uid; + inode->i_gid = ip->i_d.di_gid; + inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; + inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; + inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; + inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + + xflags = xfs_ip2xflags(ip); + if (xflags & XFS_XFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; - if (vap->va_xflags & XFS_XFLAG_APPEND) + if (xflags & XFS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else inode->i_flags &= ~S_APPEND; - if (vap->va_xflags & XFS_XFLAG_SYNC) + if (xflags & XFS_XFLAG_SYNC) inode->i_flags |= S_SYNC; else inode->i_flags &= ~S_SYNC; - if (vap->va_xflags & XFS_XFLAG_NOATIME) + if (xflags & XFS_XFLAG_NOATIME) inode->i_flags |= S_NOATIME; else inode->i_flags &= ~S_NOATIME; -} - -/* - * Revalidate the Linux inode from the vnode. - */ -int -__vn_revalidate( - bhv_vnode_t *vp, - bhv_vattr_t *vattr) -{ - int error; - - vn_trace_entry(xfs_vtoi(vp), __FUNCTION__, (inst_t *)__return_address); - vattr->va_mask = XFS_AT_STAT | XFS_AT_XFLAGS; - error = xfs_getattr(xfs_vtoi(vp), vattr, 0); - if (likely(!error)) { - vn_revalidate_core(vp, vattr); - xfs_iflags_clear(xfs_vtoi(vp), XFS_IMODIFIED); - } - return -error; -} - -int -vn_revalidate( - bhv_vnode_t *vp) -{ - bhv_vattr_t vattr; + xfs_iunlock(ip, XFS_ILOCK_SHARED); - return __vn_revalidate(vp, &vattr); + xfs_iflags_clear(ip, XFS_IMODIFIED); + return 0; } /* @@ -179,7 +150,7 @@ vn_hold( return vp; } -#ifdef XFS_VNODE_TRACE +#ifdef XFS_INODE_TRACE /* * Reference count of Linux inode if present, -1 if the xfs_inode @@ -211,32 +182,32 @@ static inline int xfs_icount(struct xfs_inode *ip) * Vnode tracing code. */ void -vn_trace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) +_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) { - KTRACE_ENTER(ip, VNODE_KTRACE_ENTRY, func, 0, ra); + KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra); } void -vn_trace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) +_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) { - KTRACE_ENTER(ip, VNODE_KTRACE_EXIT, func, 0, ra); + KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra); } void -vn_trace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) +xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) { - KTRACE_ENTER(ip, VNODE_KTRACE_HOLD, file, line, ra); + KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra); } void -vn_trace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) +_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) { - KTRACE_ENTER(ip, VNODE_KTRACE_REF, file, line, ra); + KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra); } void -vn_trace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) +xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) { - KTRACE_ENTER(ip, VNODE_KTRACE_RELE, file, line, ra); + KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra); } -#endif /* XFS_VNODE_TRACE */ +#endif /* XFS_INODE_TRACE */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 55fb46948589..b5ea418693b1 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -187,10 +187,7 @@ typedef struct bhv_vattr { (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) extern void vn_init(void); -extern bhv_vnode_t *vn_initialize(struct inode *); extern int vn_revalidate(bhv_vnode_t *); -extern int __vn_revalidate(bhv_vnode_t *, bhv_vattr_t *); -extern void vn_revalidate_core(bhv_vnode_t *, bhv_vattr_t *); /* * Yeah, these don't take vnode anymore at all, all this should be @@ -210,12 +207,12 @@ static inline int vn_count(bhv_vnode_t *vp) */ extern bhv_vnode_t *vn_hold(bhv_vnode_t *); -#if defined(XFS_VNODE_TRACE) +#if defined(XFS_INODE_TRACE) #define VN_HOLD(vp) \ ((void)vn_hold(vp), \ - vn_trace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address)) + xfs_itrace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address)) #define VN_RELE(vp) \ - (vn_trace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \ + (xfs_itrace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \ iput(vn_to_inode(vp))) #else #define VN_HOLD(vp) ((void)vn_hold(vp)) @@ -238,11 +235,6 @@ static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp) /* * Dealing with bad inodes */ -static inline void vn_mark_bad(bhv_vnode_t *vp) -{ - make_bad_inode(vn_to_inode(vp)); -} - static inline int VN_BAD(bhv_vnode_t *vp) { return is_bad_inode(vn_to_inode(vp)); @@ -296,26 +288,36 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) /* * Tracking vnode activity. */ -#if defined(XFS_VNODE_TRACE) - -#define VNODE_TRACE_SIZE 16 /* number of trace entries */ -#define VNODE_KTRACE_ENTRY 1 -#define VNODE_KTRACE_EXIT 2 -#define VNODE_KTRACE_HOLD 3 -#define VNODE_KTRACE_REF 4 -#define VNODE_KTRACE_RELE 5 - -extern void vn_trace_entry(struct xfs_inode *, const char *, inst_t *); -extern void vn_trace_exit(struct xfs_inode *, const char *, inst_t *); -extern void vn_trace_hold(struct xfs_inode *, char *, int, inst_t *); -extern void vn_trace_ref(struct xfs_inode *, char *, int, inst_t *); -extern void vn_trace_rele(struct xfs_inode *, char *, int, inst_t *); +#if defined(XFS_INODE_TRACE) + +#define INODE_TRACE_SIZE 16 /* number of trace entries */ +#define INODE_KTRACE_ENTRY 1 +#define INODE_KTRACE_EXIT 2 +#define INODE_KTRACE_HOLD 3 +#define INODE_KTRACE_REF 4 +#define INODE_KTRACE_RELE 5 + +extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *); +extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *); +extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *); +extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); +extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); +#define xfs_itrace_entry(ip) \ + _xfs_itrace_entry(ip, __FUNCTION__, (inst_t *)__return_address) +#define xfs_itrace_exit(ip) \ + _xfs_itrace_exit(ip, __FUNCTION__, (inst_t *)__return_address) +#define xfs_itrace_exit_tag(ip, tag) \ + _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) +#define xfs_itrace_ref(ip) \ + _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address) + #else -#define vn_trace_entry(a,b,c) -#define vn_trace_exit(a,b,c) -#define vn_trace_hold(a,b,c,d) -#define vn_trace_ref(a,b,c,d) -#define vn_trace_rele(a,b,c,d) +#define xfs_itrace_entry(a) +#define xfs_itrace_exit(a) +#define xfs_itrace_exit_tag(a, b) +#define xfs_itrace_hold(a, b, c, d) +#define xfs_itrace_ref(a) +#define xfs_itrace_rele(a, b, c, d) #endif #endif /* __XFS_VNODE_H__ */ diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index cfdd35ee9f7a..665babcca6a6 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1209,7 +1209,6 @@ xfs_qm_dqflush( xfs_buf_t *bp; xfs_disk_dquot_t *ddqp; int error; - SPLDECL(s); ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); @@ -1270,9 +1269,9 @@ xfs_qm_dqflush( mp = dqp->q_mount; /* lsn is 64 bits */ - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); /* * Attach an iodone routine so that we can remove this dquot from the @@ -1318,7 +1317,6 @@ xfs_qm_dqflush_done( xfs_dq_logitem_t *qip) { xfs_dquot_t *dqp; - SPLDECL(s); dqp = qip->qli_dquot; @@ -1333,15 +1331,15 @@ xfs_qm_dqflush_done( if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && qip->qli_item.li_lsn == qip->qli_flush_lsn) { - AIL_LOCK(dqp->q_mount, s); + spin_lock(&dqp->q_mount->m_ail_lock); /* * xfs_trans_delete_ail() drops the AIL lock. */ if (qip->qli_item.li_lsn == qip->qli_flush_lsn) xfs_trans_delete_ail(dqp->q_mount, - (xfs_log_item_t*)qip, s); + (xfs_log_item_t*)qip); else - AIL_UNLOCK(dqp->q_mount, s); + spin_unlock(&dqp->q_mount->m_ail_lock); } /* diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 78d3ab95c5fd..5c371a92e3e2 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -123,11 +123,6 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) vsema(&((dqp)->q_flock)); \ (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); } -#define XFS_DQ_PINLOCK(dqp) mutex_spinlock( \ - &(XFS_DQ_TO_QINF(dqp)->qi_pinlock)) -#define XFS_DQ_PINUNLOCK(dqp, s) mutex_spinunlock( \ - &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s) - #define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock))) #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index ddb61fe22a5c..1800e8d1f646 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -94,14 +94,13 @@ STATIC void xfs_qm_dquot_logitem_pin( xfs_dq_logitem_t *logitem) { - unsigned long s; xfs_dquot_t *dqp; dqp = logitem->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - s = XFS_DQ_PINLOCK(dqp); + spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); dqp->q_pincount++; - XFS_DQ_PINUNLOCK(dqp, s); + spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); } /* @@ -115,17 +114,16 @@ xfs_qm_dquot_logitem_unpin( xfs_dq_logitem_t *logitem, int stale) { - unsigned long s; xfs_dquot_t *dqp; dqp = logitem->qli_dquot; ASSERT(dqp->q_pincount > 0); - s = XFS_DQ_PINLOCK(dqp); + spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); dqp->q_pincount--; if (dqp->q_pincount == 0) { sv_broadcast(&dqp->q_pinwait); } - XFS_DQ_PINUNLOCK(dqp, s); + spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); } /* ARGSUSED */ @@ -189,8 +187,6 @@ void xfs_qm_dqunpin_wait( xfs_dquot_t *dqp) { - SPLDECL(s); - ASSERT(XFS_DQ_IS_LOCKED(dqp)); if (dqp->q_pincount == 0) { return; @@ -200,9 +196,9 @@ xfs_qm_dqunpin_wait( * Give the log a push so we don't wait here too long. */ xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); - s = XFS_DQ_PINLOCK(dqp); + spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); if (dqp->q_pincount == 0) { - XFS_DQ_PINUNLOCK(dqp, s); + spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); return; } sv_wait(&(dqp->q_pinwait), PINOD, @@ -216,8 +212,8 @@ xfs_qm_dqunpin_wait( * If so, we want to push it out to help us take this item off the AIL as soon * as possible. * - * We must not be holding the AIL_LOCK at this point. Calling incore() to - * search the buffer cache can be a time consuming thing, and AIL_LOCK is a + * We must not be holding the AIL lock at this point. Calling incore() to + * search the buffer cache can be a time consuming thing, and AIL lock is a * spinlock. */ STATIC void @@ -322,7 +318,7 @@ xfs_qm_dquot_logitem_trylock( * want to do that now since we might sleep in the device * strategy routine. We also don't want to grab the buffer lock * here because we'd like not to call into the buffer cache - * while holding the AIL_LOCK. + * while holding the AIL lock. * Make sure to only return PUSHBUF if we set pushbuf_flag * ourselves. If someone else is doing it then we don't * want to go to the push routine and duplicate their efforts. @@ -562,15 +558,14 @@ xfs_qm_qoffend_logitem_committed( xfs_lsn_t lsn) { xfs_qoff_logitem_t *qfs; - SPLDECL(s); qfs = qfe->qql_start_lip; - AIL_LOCK(qfs->qql_item.li_mountp,s); + spin_lock(&qfs->qql_item.li_mountp->m_ail_lock); /* * Delete the qoff-start logitem from the AIL. * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs, s); + xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); kmem_free(qfs, sizeof(xfs_qoff_logitem_t)); kmem_free(qfe, sizeof(xfs_qoff_logitem_t)); return (xfs_lsn_t)-1; diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index d488645f833d..1f3da5b8657b 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -310,7 +310,6 @@ xfs_qm_mount_quotas( xfs_mount_t *mp, int mfsi_flags) { - unsigned long s; int error = 0; uint sbf; @@ -367,13 +366,13 @@ xfs_qm_mount_quotas( write_changes: /* - * We actually don't have to acquire the SB_LOCK at all. + * We actually don't have to acquire the m_sb_lock at all. * This can only be called from mount, and that's single threaded. XXX */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); sbf = mp->m_sb.sb_qflags; mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { @@ -1139,7 +1138,7 @@ xfs_qm_init_quotainfo( return error; } - spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin"); + spin_lock_init(&qinf->qi_pinlock); xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); qinf->qi_dqreclaims = 0; @@ -1370,7 +1369,6 @@ xfs_qm_qino_alloc( { xfs_trans_t *tp; int error; - unsigned long s; int committed; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); @@ -1402,7 +1400,7 @@ xfs_qm_qino_alloc( * sbfields arg may contain fields other than *QUOTINO; * VERSIONNUM for example. */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); if (flags & XFS_QMOPT_SBVERSION) { #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) unsigned oldv = mp->m_sb.sb_versionnum; @@ -1429,7 +1427,7 @@ xfs_qm_qino_alloc( mp->m_sb.sb_uquotino = (*ip)->i_ino; else mp->m_sb.sb_gquotino = (*ip)->i_ino; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, sbfields); if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { @@ -1650,14 +1648,14 @@ xfs_qm_quotacheck_dqadjust( * Adjust the inode count and the block count to reflect this inode's * resource usage. */ - be64_add(&dqp->q_core.d_icount, 1); + be64_add_cpu(&dqp->q_core.d_icount, 1); dqp->q_res_icount++; if (nblks) { - be64_add(&dqp->q_core.d_bcount, nblks); + be64_add_cpu(&dqp->q_core.d_bcount, nblks); dqp->q_res_bcount += nblks; } if (rtblks) { - be64_add(&dqp->q_core.d_rtbcount, rtblks); + be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks); dqp->q_res_rtbcount += rtblks; } diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index 23ccaa5fceaf..baf537c1c177 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -52,8 +52,8 @@ extern kmem_zone_t *qm_dqtrxzone; /* * Dquot hashtable constants/threshold values. */ -#define XFS_QM_HASHSIZE_LOW (NBPP / sizeof(xfs_dqhash_t)) -#define XFS_QM_HASHSIZE_HIGH ((NBPP * 4) / sizeof(xfs_dqhash_t)) +#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) +#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t)) /* * This defines the unit of allocation of dquots. @@ -106,7 +106,7 @@ typedef struct xfs_qm { typedef struct xfs_quotainfo { xfs_inode_t *qi_uquotaip; /* user quota inode */ xfs_inode_t *qi_gquotaip; /* group quota inode */ - lock_t qi_pinlock; /* dquot pinning mutex */ + spinlock_t qi_pinlock; /* dquot pinning lock */ xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ int qi_dqreclaims; /* a change here indicates a removal in the dqlist */ diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index ad5579d4eac4..2cc5886cfe85 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -200,7 +200,6 @@ xfs_qm_scall_quotaoff( boolean_t force) { uint dqtype; - unsigned long s; int error; uint inactivate_flags; xfs_qoff_logitem_t *qoffstart; @@ -237,9 +236,9 @@ xfs_qm_scall_quotaoff( if ((flags & XFS_ALL_QUOTA_ACCT) == 0) { mp->m_qflags &= ~(flags); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); mp->m_sb.sb_qflags = mp->m_qflags; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); /* XXX what to do if error ? Revert back to old vals incore ? */ @@ -415,7 +414,6 @@ xfs_qm_scall_quotaon( uint flags) { int error; - unsigned long s; uint qf; uint accflags; __int64_t sbflags; @@ -468,10 +466,10 @@ xfs_qm_scall_quotaon( * Change sb_qflags on disk but not incore mp->qflags * if this is the root filesystem. */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); qf = mp->m_sb.sb_qflags; mp->m_sb.sb_qflags = qf | flags; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); /* * There's nothing to change if it's the same. @@ -815,7 +813,6 @@ xfs_qm_log_quotaoff( { xfs_trans_t *tp; int error; - unsigned long s; xfs_qoff_logitem_t *qoffi=NULL; uint oldsbqflag=0; @@ -832,10 +829,10 @@ xfs_qm_log_quotaoff( qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); xfs_trans_log_quotaoff_item(tp, qoffi); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); oldsbqflag = mp->m_sb.sb_qflags; mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, XFS_SB_QFLAGS); @@ -854,9 +851,9 @@ error0: * No one else is modifying sb_qflags, so this is OK. * We still hold the quotaofflock. */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); mp->m_sb.sb_qflags = oldsbqflag; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } *qoffstartp = qoffi; return (error); diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 7de6874bf1b8..f441f836ca8b 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -421,13 +421,13 @@ xfs_trans_apply_dquot_deltas( (xfs_qcnt_t) -qtrx->qt_icount_delta); #endif if (totalbdelta) - be64_add(&d->d_bcount, (xfs_qcnt_t)totalbdelta); + be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta); if (qtrx->qt_icount_delta) - be64_add(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta); + be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta); if (totalrtbdelta) - be64_add(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta); + be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta); /* * Get any default limits in use. diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index f45a49ffd3a3..c27abef7b84f 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -17,7 +17,6 @@ */ #include <xfs.h> #include "debug.h" -#include "spin.h" static char message[1024]; /* keep it off the stack */ static DEFINE_SPINLOCK(xfs_err_lock); @@ -81,3 +80,9 @@ assfail(char *expr, char *file, int line) printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line); BUG(); } + +void +xfs_hex_dump(void *p, int length) +{ + print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); +} diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c index 5cf2e86caa71..129067cfcb86 100644 --- a/fs/xfs/support/ktrace.c +++ b/fs/xfs/support/ktrace.c @@ -21,7 +21,7 @@ static kmem_zone_t *ktrace_hdr_zone; static kmem_zone_t *ktrace_ent_zone; static int ktrace_zentries; -void +void __init ktrace_init(int zentries) { ktrace_zentries = zentries; @@ -36,7 +36,7 @@ ktrace_init(int zentries) ASSERT(ktrace_ent_zone); } -void +void __exit ktrace_uninit(void) { kmem_zone_destroy(ktrace_hdr_zone); @@ -90,8 +90,6 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep) return NULL; } - spinlock_init(&(ktp->kt_lock), "kt_lock"); - ktp->kt_entries = ktep; ktp->kt_nentries = nentries; ktp->kt_index = 0; @@ -114,8 +112,6 @@ ktrace_free(ktrace_t *ktp) if (ktp == (ktrace_t *)NULL) return; - spinlock_destroy(&ktp->kt_lock); - /* * Special treatment for the Vnode trace buffer. */ diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h index 0d73216287c0..56e72b40a859 100644 --- a/fs/xfs/support/ktrace.h +++ b/fs/xfs/support/ktrace.h @@ -18,8 +18,6 @@ #ifndef __XFS_SUPPORT_KTRACE_H__ #define __XFS_SUPPORT_KTRACE_H__ -#include <spin.h> - /* * Trace buffer entry structure. */ @@ -31,7 +29,6 @@ typedef struct ktrace_entry { * Trace buffer header structure. */ typedef struct ktrace { - lock_t kt_lock; /* mutex to guard counters */ int kt_nentries; /* number of entries in trace buf */ int kt_index; /* current index in entries */ int kt_rollover; diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c index e157015c70ff..493a6ecf8590 100644 --- a/fs/xfs/support/uuid.c +++ b/fs/xfs/support/uuid.c @@ -133,7 +133,7 @@ uuid_table_remove(uuid_t *uuid) mutex_unlock(&uuid_monitor); } -void +void __init uuid_init(void) { mutex_init(&uuid_monitor); diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index b5a7d92c6843..540e4c989825 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -37,7 +37,7 @@ #define XFS_LOG_TRACE 1 #define XFS_RW_TRACE 1 #define XFS_BUF_TRACE 1 -#define XFS_VNODE_TRACE 1 +#define XFS_INODE_TRACE 1 #define XFS_FILESTREAMS_TRACE 1 #endif diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 5bfb66f33caf..7272fe39a92d 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -392,32 +392,6 @@ xfs_acl_allow_set( } /* - * The access control process to determine the access permission: - * if uid == file owner id, use the file owner bits. - * if gid == file owner group id, use the file group bits. - * scan ACL for a matching user or group, and use matched entry - * permission. Use total permissions of all matching group entries, - * until all acl entries are exhausted. The final permission produced - * by matching acl entry or entries needs to be & with group permission. - * if not owner, owning group, or matching entry in ACL, use file - * other bits. - */ -STATIC int -xfs_acl_capability_check( - mode_t mode, - cred_t *cr) -{ - if ((mode & ACL_READ) && !capable_cred(cr, CAP_DAC_READ_SEARCH)) - return EACCES; - if ((mode & ACL_WRITE) && !capable_cred(cr, CAP_DAC_OVERRIDE)) - return EACCES; - if ((mode & ACL_EXECUTE) && !capable_cred(cr, CAP_DAC_OVERRIDE)) - return EACCES; - - return 0; -} - -/* * Note: cr is only used here for the capability check if the ACL test fails. * It is not used to find out the credentials uid or groups etc, as was * done in IRIX. It is assumed that the uid and groups for the current @@ -438,7 +412,6 @@ xfs_acl_access( matched.ae_tag = 0; /* Invalid type */ matched.ae_perm = 0; - md >>= 6; /* Normalize the bits for comparison */ for (i = 0; i < fap->acl_cnt; i++) { /* @@ -520,7 +493,8 @@ xfs_acl_access( break; } - return xfs_acl_capability_check(md, cr); + /* EACCES tells generic_permission to check for capability overrides */ + return EACCES; } /* diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 34b7d3391299..332a772461c4 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -75,7 +75,6 @@ extern int xfs_acl_vremove(bhv_vnode_t *, int); #define _ACL_GET_DEFAULT(pv,pd) (xfs_acl_vtoacl(pv,NULL,pd) == 0) #define _ACL_ACCESS_EXISTS xfs_acl_vhasacl_access #define _ACL_DEFAULT_EXISTS xfs_acl_vhasacl_default -#define _ACL_XFS_IACCESS(i,m,c) (XFS_IFORK_Q(i) ? xfs_acl_iaccess(i,m,c) : -1) #define _ACL_ALLOC(a) ((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP)) #define _ACL_FREE(a) ((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0) @@ -95,7 +94,6 @@ extern int xfs_acl_vremove(bhv_vnode_t *, int); #define _ACL_GET_DEFAULT(pv,pd) (0) #define _ACL_ACCESS_EXISTS (NULL) #define _ACL_DEFAULT_EXISTS (NULL) -#define _ACL_XFS_IACCESS(i,m,c) (-1) #endif #endif /* __XFS_ACL_H__ */ diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 9381b0360c4b..61b292a9fb41 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -193,7 +193,7 @@ typedef struct xfs_perag xfs_agino_t pagi_count; /* number of allocated inodes */ int pagb_count; /* pagb slots in use */ #ifdef __KERNEL__ - lock_t pagb_lock; /* lock for pagb_list */ + spinlock_t pagb_lock; /* lock for pagb_list */ #endif xfs_perag_busy_t *pagb_list; /* unstable blocks */ atomic_t pagf_fstrms; /* # of filestreams active in this AG */ diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 012a649a19c3..bdbfbbee4959 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -592,7 +592,7 @@ xfs_alloc_ag_vextent( if (!(args->wasfromfl)) { agf = XFS_BUF_TO_AGF(args->agbp); - be32_add(&agf->agf_freeblks, -(args->len)); + be32_add_cpu(&agf->agf_freeblks, -(args->len)); xfs_trans_agblocks_delta(args->tp, -((long)(args->len))); args->pag->pagf_freeblks -= args->len; @@ -1720,7 +1720,7 @@ xfs_free_ag_extent( agf = XFS_BUF_TO_AGF(agbp); pag = &mp->m_perag[agno]; - be32_add(&agf->agf_freeblks, len); + be32_add_cpu(&agf->agf_freeblks, len); xfs_trans_agblocks_delta(tp, len); pag->pagf_freeblks += len; XFS_WANT_CORRUPTED_GOTO( @@ -2008,18 +2008,18 @@ xfs_alloc_get_freelist( * Get the block number and update the data structures. */ bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]); - be32_add(&agf->agf_flfirst, 1); + be32_add_cpu(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) agf->agf_flfirst = 0; pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; - be32_add(&agf->agf_flcount, -1); + be32_add_cpu(&agf->agf_flcount, -1); xfs_trans_agflist_delta(tp, -1); pag->pagf_flcount--; logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; if (btreeblk) { - be32_add(&agf->agf_btreeblks, 1); + be32_add_cpu(&agf->agf_btreeblks, 1); pag->pagf_btreeblks++; logflags |= XFS_AGF_BTREEBLKS; } @@ -2117,17 +2117,17 @@ xfs_alloc_put_freelist( be32_to_cpu(agf->agf_seqno), &agflbp))) return error; agfl = XFS_BUF_TO_AGFL(agflbp); - be32_add(&agf->agf_fllast, 1); + be32_add_cpu(&agf->agf_fllast, 1); if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) agf->agf_fllast = 0; pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; - be32_add(&agf->agf_flcount, 1); + be32_add_cpu(&agf->agf_flcount, 1); xfs_trans_agflist_delta(tp, 1); pag->pagf_flcount++; logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; if (btreeblk) { - be32_add(&agf->agf_btreeblks, -1); + be32_add_cpu(&agf->agf_btreeblks, -1); pag->pagf_btreeblks--; logflags |= XFS_AGF_BTREEBLKS; } @@ -2206,7 +2206,7 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); pag->pagf_levels[XFS_BTNUM_CNTi] = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); - spinlock_init(&pag->pagb_lock, "xfspagb"); + spin_lock_init(&pag->pagb_lock); pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS * sizeof(xfs_perag_busy_t), KM_SLEEP); pag->pagf_init = 1; @@ -2500,10 +2500,9 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, xfs_mount_t *mp; xfs_perag_busy_t *bsy; int n; - SPLDECL(s); mp = tp->t_mountp; - s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); + spin_lock(&mp->m_perag[agno].pagb_lock); /* search pagb_list for an open slot */ for (bsy = mp->m_perag[agno].pagb_list, n = 0; @@ -2533,7 +2532,7 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, xfs_trans_set_sync(tp); } - mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); + spin_unlock(&mp->m_perag[agno].pagb_lock); } void @@ -2543,11 +2542,10 @@ xfs_alloc_clear_busy(xfs_trans_t *tp, { xfs_mount_t *mp; xfs_perag_busy_t *list; - SPLDECL(s); mp = tp->t_mountp; - s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); + spin_lock(&mp->m_perag[agno].pagb_lock); list = mp->m_perag[agno].pagb_list; ASSERT(idx < XFS_PAGB_NUM_SLOTS); @@ -2559,7 +2557,7 @@ xfs_alloc_clear_busy(xfs_trans_t *tp, TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp); } - mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); + spin_unlock(&mp->m_perag[agno].pagb_lock); } @@ -2578,11 +2576,10 @@ xfs_alloc_search_busy(xfs_trans_t *tp, xfs_agblock_t uend, bend; xfs_lsn_t lsn; int cnt; - SPLDECL(s); mp = tp->t_mountp; - s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); + spin_lock(&mp->m_perag[agno].pagb_lock); cnt = mp->m_perag[agno].pagb_count; uend = bno + len - 1; @@ -2615,12 +2612,12 @@ xfs_alloc_search_busy(xfs_trans_t *tp, if (cnt) { TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp); lsn = bsy->busy_tp->t_commit_lsn; - mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); + spin_unlock(&mp->m_perag[agno].pagb_lock); xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); } else { TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp); n = -1; - mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); + spin_unlock(&mp->m_perag[agno].pagb_lock); } return n; diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 1603ce595853..3ce2645508ae 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -221,7 +221,7 @@ xfs_alloc_delrec( */ bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]); agf->agf_roots[cur->bc_btnum] = *lpp; - be32_add(&agf->agf_levels[cur->bc_btnum], -1); + be32_add_cpu(&agf->agf_levels[cur->bc_btnum], -1); mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--; /* * Put this buffer/block on the ag's freelist. @@ -1256,9 +1256,9 @@ xfs_alloc_lshift( /* * Bump and log left's numrecs, decrement and log right's numrecs. */ - be16_add(&left->bb_numrecs, 1); + be16_add_cpu(&left->bb_numrecs, 1); xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add(&right->bb_numrecs, -1); + be16_add_cpu(&right->bb_numrecs, -1); xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); /* * Slide the contents of right down one entry. @@ -1346,7 +1346,7 @@ xfs_alloc_newroot( agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno); - be32_add(&agf->agf_levels[cur->bc_btnum], 1); + be32_add_cpu(&agf->agf_levels[cur->bc_btnum], 1); seqno = be32_to_cpu(agf->agf_seqno); mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++; xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, @@ -1558,9 +1558,9 @@ xfs_alloc_rshift( /* * Decrement and log left's numrecs, bump and log right's numrecs. */ - be16_add(&left->bb_numrecs, -1); + be16_add_cpu(&left->bb_numrecs, -1); xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); /* * Using a temporary cursor, update the parent key values of the @@ -1643,7 +1643,7 @@ xfs_alloc_split( */ if ((be16_to_cpu(left->bb_numrecs) & 1) && cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; /* * For non-leaf blocks, copy keys and addresses over to the new block. @@ -1689,7 +1689,7 @@ xfs_alloc_split( * Adjust numrecs, sibling pointers. */ lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp)); - be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); + be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); right->bb_rightsib = left->bb_rightsib; left->bb_rightsib = cpu_to_be32(rbno); right->bb_leftsib = cpu_to_be32(lbno); diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index c4836890b726..f9472a2076d4 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h @@ -170,21 +170,6 @@ } \ } -static inline void be16_add(__be16 *a, __s16 b) -{ - *a = cpu_to_be16(be16_to_cpu(*a) + b); -} - -static inline void be32_add(__be32 *a, __s32 b) -{ - *a = cpu_to_be32(be32_to_cpu(*a) + b); -} - -static inline void be64_add(__be64 *a, __s64 b) -{ - *a = cpu_to_be64(be64_to_cpu(*a) + b); -} - /* * In directories inode numbers are stored as unaligned arrays of unsigned * 8bit integers on disk. diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 93fa64dd1be6..e58f321fdae9 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -929,7 +929,7 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) * This leaf block cannot have a "remote" value, we only call this routine * if bmap_one_block() says there is only one block (ie: no remote blks). */ -int +STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args) { xfs_inode_t *dp; diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 81f45dae1c57..b08e2a2a8add 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -226,17 +226,15 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) STATIC void xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) { - unsigned long s; - if ((mp->m_flags & XFS_MOUNT_ATTR2) && !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) { - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) { XFS_SB_VERSION_ADDATTR2(&mp->m_sb); - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); } else - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } } @@ -319,7 +317,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) memcpy(sfe->nameval, args->name, args->namelen); memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen); sf->hdr.count++; - be16_add(&sf->hdr.totsize, size); + be16_add_cpu(&sf->hdr.totsize, size); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); xfs_sbversion_add_attr2(mp, args->trans); @@ -365,7 +363,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) if (end != totsize) memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end); sf->hdr.count--; - be16_add(&sf->hdr.totsize, -size); + be16_add_cpu(&sf->hdr.totsize, -size); /* * Fix up the start offset of the attribute fork @@ -1135,7 +1133,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) xfs_da_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); } - be16_add(&hdr->count, 1); + be16_add_cpu(&hdr->count, 1); /* * Allocate space for the new string (at the end of the run). @@ -1149,7 +1147,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) mp->m_sb.sb_blocksize, NULL)); ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp)); ASSERT((be16_to_cpu(map->size) & 0x3) == 0); - be16_add(&map->size, + be16_add_cpu(&map->size, -xfs_attr_leaf_newentsize(args->namelen, args->valuelen, mp->m_sb.sb_blocksize, &tmp)); entry->nameidx = cpu_to_be16(be16_to_cpu(map->base) + @@ -1216,12 +1214,12 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) map = &hdr->freemap[0]; for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) { if (be16_to_cpu(map->base) == tmp) { - be16_add(&map->base, sizeof(xfs_attr_leaf_entry_t)); - be16_add(&map->size, + be16_add_cpu(&map->base, sizeof(xfs_attr_leaf_entry_t)); + be16_add_cpu(&map->size, -((int)sizeof(xfs_attr_leaf_entry_t))); } } - be16_add(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index)); + be16_add_cpu(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index)); xfs_da_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); return(0); @@ -1729,9 +1727,9 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp)); ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp)); if (be16_to_cpu(map->base) == tablesize) { - be16_add(&map->base, + be16_add_cpu(&map->base, -((int)sizeof(xfs_attr_leaf_entry_t))); - be16_add(&map->size, sizeof(xfs_attr_leaf_entry_t)); + be16_add_cpu(&map->size, sizeof(xfs_attr_leaf_entry_t)); } if ((be16_to_cpu(map->base) + be16_to_cpu(map->size)) @@ -1753,19 +1751,19 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) if ((before >= 0) || (after >= 0)) { if ((before >= 0) && (after >= 0)) { map = &hdr->freemap[before]; - be16_add(&map->size, entsize); - be16_add(&map->size, + be16_add_cpu(&map->size, entsize); + be16_add_cpu(&map->size, be16_to_cpu(hdr->freemap[after].size)); hdr->freemap[after].base = 0; hdr->freemap[after].size = 0; } else if (before >= 0) { map = &hdr->freemap[before]; - be16_add(&map->size, entsize); + be16_add_cpu(&map->size, entsize); } else { map = &hdr->freemap[after]; /* both on-disk, don't endian flip twice */ map->base = entry->nameidx; - be16_add(&map->size, entsize); + be16_add_cpu(&map->size, entsize); } } else { /* @@ -1790,7 +1788,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) * Compress the remaining entries and zero out the removed stuff. */ memset(XFS_ATTR_LEAF_NAME(leaf, args->index), 0, entsize); - be16_add(&hdr->usedbytes, -entsize); + be16_add_cpu(&hdr->usedbytes, -entsize); xfs_da_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index), entsize)); @@ -1798,7 +1796,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) tmp = (be16_to_cpu(hdr->count) - args->index) * sizeof(xfs_attr_leaf_entry_t); memmove((char *)entry, (char *)(entry+1), tmp); - be16_add(&hdr->count, -1); + be16_add_cpu(&hdr->count, -1); xfs_da_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); entry = &leaf->entries[be16_to_cpu(hdr->count)]; @@ -2184,15 +2182,15 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, */ if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */ memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp); - be16_add(&hdr_s->usedbytes, -tmp); - be16_add(&hdr_s->count, -1); + be16_add_cpu(&hdr_s->usedbytes, -tmp); + be16_add_cpu(&hdr_s->count, -1); entry_d--; /* to compensate for ++ in loop hdr */ desti--; if ((start_s + i) < offset) result++; /* insertion index adjustment */ } else { #endif /* GROT */ - be16_add(&hdr_d->firstused, -tmp); + be16_add_cpu(&hdr_d->firstused, -tmp); /* both on-disk, don't endian flip twice */ entry_d->hashval = entry_s->hashval; /* both on-disk, don't endian flip twice */ @@ -2205,10 +2203,10 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, ASSERT(be16_to_cpu(entry_s->nameidx) + tmp <= XFS_LBSIZE(mp)); memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp); - be16_add(&hdr_s->usedbytes, -tmp); - be16_add(&hdr_d->usedbytes, tmp); - be16_add(&hdr_s->count, -1); - be16_add(&hdr_d->count, 1); + be16_add_cpu(&hdr_s->usedbytes, -tmp); + be16_add_cpu(&hdr_d->usedbytes, tmp); + be16_add_cpu(&hdr_s->count, -1); + be16_add_cpu(&hdr_d->count, 1); tmp = be16_to_cpu(hdr_d->count) * sizeof(xfs_attr_leaf_entry_t) + sizeof(xfs_attr_leaf_hdr_t); @@ -2249,7 +2247,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, * Fill in the freemap information */ hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t)); - be16_add(&hdr_d->freemap[0].base, be16_to_cpu(hdr_d->count) * + be16_add_cpu(&hdr_d->freemap[0].base, be16_to_cpu(hdr_d->count) * sizeof(xfs_attr_leaf_entry_t)); hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused) - be16_to_cpu(hdr_d->freemap[0].base)); diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index fab0b6d5a41b..48228848f5ae 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c @@ -25,109 +25,6 @@ * XFS bit manipulation routines, used in non-realtime code. */ -#ifndef HAVE_ARCH_HIGHBIT -/* - * Index of high bit number in byte, -1 for none set, 0..7 otherwise. - */ -static const char xfs_highbit[256] = { - -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */ - 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */ - 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */ - 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 1f */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 20 .. 27 */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 28 .. 2f */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 30 .. 37 */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 38 .. 3f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 40 .. 47 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 48 .. 4f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 50 .. 57 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 58 .. 5f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 60 .. 67 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 68 .. 6f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 70 .. 77 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 78 .. 7f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 80 .. 87 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 88 .. 8f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 90 .. 97 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 98 .. 9f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* a0 .. a7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* a8 .. af */ - 7, 7, 7, 7, 7, 7, 7, 7, /* b0 .. b7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* b8 .. bf */ - 7, 7, 7, 7, 7, 7, 7, 7, /* c0 .. c7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* c8 .. cf */ - 7, 7, 7, 7, 7, 7, 7, 7, /* d0 .. d7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* d8 .. df */ - 7, 7, 7, 7, 7, 7, 7, 7, /* e0 .. e7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* e8 .. ef */ - 7, 7, 7, 7, 7, 7, 7, 7, /* f0 .. f7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* f8 .. ff */ -}; -#endif - -/* - * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set. - */ -inline int -xfs_highbit32( - __uint32_t v) -{ -#ifdef HAVE_ARCH_HIGHBIT - return highbit32(v); -#else - int i; - - if (v & 0xffff0000) - if (v & 0xff000000) - i = 24; - else - i = 16; - else if (v & 0x0000ffff) - if (v & 0x0000ff00) - i = 8; - else - i = 0; - else - return -1; - return i + xfs_highbit[(v >> i) & 0xff]; -#endif -} - -/* - * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set. - */ -int -xfs_lowbit64( - __uint64_t v) -{ - __uint32_t w = (__uint32_t)v; - int n = 0; - - if (w) { /* lower bits */ - n = ffs(w); - } else { /* upper bits */ - w = (__uint32_t)(v >> 32); - if (w && (n = ffs(w))) - n += 32; - } - return n - 1; -} - -/* - * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set. - */ -int -xfs_highbit64( - __uint64_t v) -{ - __uint32_t h = (__uint32_t)(v >> 32); - - if (h) - return xfs_highbit32(h) + 32; - return xfs_highbit32((__uint32_t)v); -} - - /* * Return whether bitmap is empty. * Size is number of words in the bitmap, which is padded to word boundary diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index 082641a9782c..325a007dec91 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -47,13 +47,30 @@ static inline __uint64_t xfs_mask64lo(int n) } /* Get high bit set out of 32-bit argument, -1 if none set */ -extern int xfs_highbit32(__uint32_t v); - -/* Get low bit set out of 64-bit argument, -1 if none set */ -extern int xfs_lowbit64(__uint64_t v); +static inline int xfs_highbit32(__uint32_t v) +{ + return fls(v) - 1; +} /* Get high bit set out of 64-bit argument, -1 if none set */ -extern int xfs_highbit64(__uint64_t); +static inline int xfs_highbit64(__uint64_t v) +{ + return fls64(v) - 1; +} + +/* Get low bit set out of 32-bit argument, -1 if none set */ +static inline int xfs_lowbit32(__uint32_t v) +{ + __uint32_t t = v; + return (t) ? find_first_bit((unsigned long *)&t, 32) : -1; +} + +/* Get low bit set out of 64-bit argument, -1 if none set */ +static inline int xfs_lowbit64(__uint64_t v) +{ + __uint64_t t = v; + return (t) ? find_first_bit((unsigned long *)&t, 64) : -1; +} /* Return whether bitmap is empty (1 == empty) */ extern int xfs_bitmap_empty(uint *map, uint size); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 2e9b34b7344b..1c0a5a585a82 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -2830,11 +2830,11 @@ xfs_bmap_btalloc( args.prod = align; if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); - } else if (mp->m_sb.sb_blocksize >= NBPP) { + } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) { args.prod = 1; args.mod = 0; } else { - args.prod = NBPP >> mp->m_sb.sb_blocklog; + args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog; if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod)))) args.mod = (xfs_extlen_t)(args.prod - args.mod); } @@ -2969,7 +2969,7 @@ STATIC int xfs_bmap_alloc( xfs_bmalloca_t *ap) /* bmap alloc argument struct */ { - if ((ap->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && ap->userdata) + if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata) return xfs_bmap_rtalloc(ap); return xfs_bmap_btalloc(ap); } @@ -3096,8 +3096,7 @@ xfs_bmap_del_extent( /* * Realtime allocation. Free it and record di_nblocks update. */ - if (whichfork == XFS_DATA_FORK && - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { + if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { xfs_fsblock_t bno; xfs_filblks_t len; @@ -3956,7 +3955,6 @@ xfs_bmap_add_attrfork( xfs_bmap_free_t flist; /* freed extent records */ xfs_mount_t *mp; /* mount structure */ xfs_trans_t *tp; /* transaction pointer */ - unsigned long s; /* spinlock spl value */ int blks; /* space reservation */ int version = 1; /* superblock attr version */ int committed; /* xaction was committed */ @@ -4053,7 +4051,7 @@ xfs_bmap_add_attrfork( (!XFS_SB_VERSION_HASATTR2(&mp->m_sb) && version == 2)) { __int64_t sbfields = 0; - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); if (!XFS_SB_VERSION_HASATTR(&mp->m_sb)) { XFS_SB_VERSION_ADDATTR(&mp->m_sb); sbfields |= XFS_SB_VERSIONNUM; @@ -4063,10 +4061,10 @@ xfs_bmap_add_attrfork( sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); } if (sbfields) { - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, sbfields); } else - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } if ((error = xfs_bmap_finish(&tp, &flist, &committed))) goto error2; @@ -6394,7 +6392,7 @@ xfs_bmap_count_blocks( * Recursively walks each level of a btree * to count total fsblocks is use. */ -int /* error */ +STATIC int /* error */ xfs_bmap_count_tree( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ @@ -6470,7 +6468,7 @@ xfs_bmap_count_tree( /* * Count leaf blocks given a range of extent records. */ -int +STATIC int xfs_bmap_count_leaves( xfs_ifork_t *ifp, xfs_extnum_t idx, @@ -6490,7 +6488,7 @@ xfs_bmap_count_leaves( * Count leaf blocks given a range of extent records originally * in btree format. */ -int +STATIC int xfs_bmap_disk_count_leaves( xfs_extnum_t idx, xfs_bmbt_block_t *block, diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 68267d75ff19..87224b7d7984 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -25,6 +25,8 @@ struct xfs_inode; struct xfs_mount; struct xfs_trans; +extern kmem_zone_t *xfs_bmap_free_item_zone; + /* * DELTA: describe a change to the in-core extent list. * diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 32b49ec00fb5..bd18987326a3 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -631,7 +631,7 @@ xfs_bmbt_delrec( memcpy(lrp, rrp, numrrecs * sizeof(*lrp)); xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs); } - be16_add(&left->bb_numrecs, numrrecs); + be16_add_cpu(&left->bb_numrecs, numrrecs); left->bb_rightsib = right->bb_rightsib; xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS); if (be64_to_cpu(left->bb_rightsib) != NULLDFSBNO) { @@ -924,7 +924,7 @@ xfs_bmbt_killroot( xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork); block = ifp->if_broot; } - be16_add(&block->bb_numrecs, i); + be16_add_cpu(&block->bb_numrecs, i); ASSERT(block->bb_numrecs == cblock->bb_numrecs); kp = XFS_BMAP_KEY_IADDR(block, 1, cur); ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur); @@ -947,7 +947,7 @@ xfs_bmbt_killroot( XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(cur->bc_tp, cbp); cur->bc_bufs[level - 1] = NULL; - be16_add(&block->bb_level, -1); + be16_add_cpu(&block->bb_level, -1); xfs_trans_log_inode(cur->bc_tp, ip, XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); cur->bc_nlevels--; @@ -1401,9 +1401,9 @@ xfs_bmbt_rshift( key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp)); rkp = &key; } - be16_add(&left->bb_numrecs, -1); + be16_add_cpu(&left->bb_numrecs, -1); xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS); - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); #ifdef DEBUG if (level > 0) xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1); @@ -1535,7 +1535,7 @@ xfs_bmbt_split( right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2); if ((be16_to_cpu(left->bb_numrecs) & 1) && cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; if (level > 0) { lkp = XFS_BMAP_KEY_IADDR(left, i, cur); @@ -1562,7 +1562,7 @@ xfs_bmbt_split( xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); *startoff = xfs_bmbt_disk_get_startoff(rrp); } - be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); + be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); right->bb_rightsib = left->bb_rightsib; left->bb_rightsib = cpu_to_be64(args.fsbno); right->bb_leftsib = cpu_to_be64(lbno); @@ -2062,8 +2062,7 @@ xfs_bmbt_insert( pcur->bc_private.b.allocated; pcur->bc_private.b.allocated = 0; ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) || - (cur->bc_private.b.ip->i_d.di_flags & - XFS_DIFLAG_REALTIME)); + XFS_IS_REALTIME_INODE(cur->bc_private.b.ip)); cur->bc_private.b.firstblock = pcur->bc_private.b.firstblock; ASSERT(cur->bc_private.b.flist == @@ -2241,7 +2240,7 @@ xfs_bmbt_newroot( bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0); cblock = XFS_BUF_TO_BMBT_BLOCK(bp); *cblock = *block; - be16_add(&block->bb_level, 1); + be16_add_cpu(&block->bb_level, 1); block->bb_numrecs = cpu_to_be16(1); cur->bc_nlevels++; cur->bc_ptrs[level + 1] = 1; diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 6e40a0a198ff..7440b78f9cec 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -24,6 +24,8 @@ struct xfs_inode; struct xfs_mount; struct xfs_trans; +extern kmem_zone_t *xfs_btree_cur_zone; + /* * This nonsense is to make -wlint happy. */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index c8f2c2886fe4..63debd147eb5 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -378,7 +378,6 @@ xfs_buf_item_unpin( xfs_mount_t *mp; xfs_buf_t *bp; int freed; - SPLDECL(s); bp = bip->bli_buf; ASSERT(bp != NULL); @@ -409,8 +408,8 @@ xfs_buf_item_unpin( XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); } else { - AIL_LOCK(mp,s); - xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s); + spin_lock(&mp->m_ail_lock); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); xfs_buf_item_relse(bp); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); } @@ -1113,7 +1112,6 @@ xfs_buf_iodone( xfs_buf_log_item_t *bip) { struct xfs_mount *mp; - SPLDECL(s); ASSERT(bip->bli_buf == bp); @@ -1128,11 +1126,11 @@ xfs_buf_iodone( * * Either way, AIL is useless if we're forcing a shutdown. */ - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); /* * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); #ifdef XFS_TRANS_DEBUG kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp)); diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index d7e136143066..5a41c348bb1c 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -18,6 +18,8 @@ #ifndef __XFS_BUF_ITEM_H__ #define __XFS_BUF_ITEM_H__ +extern kmem_zone_t *xfs_buf_item_zone; + /* * This is the structure used to lay out a buf log item in the * log. The data map describes which 128 byte chunks of the buffer diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 26d09e2e1a7f..021a8f7e563f 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -511,12 +511,12 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, * Move the req'd B-tree elements from high in node1 to * low in node2. */ - be16_add(&node2->hdr.count, count); + be16_add_cpu(&node2->hdr.count, count); tmp = count * (uint)sizeof(xfs_da_node_entry_t); btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count]; btree_d = &node2->btree[0]; memcpy(btree_d, btree_s, tmp); - be16_add(&node1->hdr.count, -count); + be16_add_cpu(&node1->hdr.count, -count); } else { /* * Move the req'd B-tree elements from low in node2 to @@ -527,7 +527,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, btree_s = &node2->btree[0]; btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)]; memcpy(btree_d, btree_s, tmp); - be16_add(&node1->hdr.count, count); + be16_add_cpu(&node1->hdr.count, count); xfs_da_log_buf(tp, blk1->bp, XFS_DA_LOGRANGE(node1, btree_d, tmp)); @@ -539,7 +539,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, btree_s = &node2->btree[count]; btree_d = &node2->btree[0]; memmove(btree_d, btree_s, tmp); - be16_add(&node2->hdr.count, -count); + be16_add_cpu(&node2->hdr.count, -count); } /* @@ -604,7 +604,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, btree->before = cpu_to_be32(newblk->blkno); xfs_da_log_buf(state->args->trans, oldblk->bp, XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree))); - be16_add(&node->hdr.count, 1); + be16_add_cpu(&node->hdr.count, 1); xfs_da_log_buf(state->args->trans, oldblk->bp, XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); @@ -959,7 +959,7 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) memset((char *)btree, 0, sizeof(xfs_da_node_entry_t)); xfs_da_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); - be16_add(&node->hdr.count, -1); + be16_add_cpu(&node->hdr.count, -1); xfs_da_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); @@ -1018,7 +1018,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, */ tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); memcpy(btree, &drop_node->btree[0], tmp); - be16_add(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count)); + be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count)); xfs_da_log_buf(tp, save_blk->bp, XFS_DA_LOGRANGE(save_node, &save_node->hdr, @@ -2218,7 +2218,7 @@ xfs_da_state_free(xfs_da_state_t *state) #ifdef XFS_DABUF_DEBUG xfs_dabuf_t *xfs_dabuf_global_list; -lock_t xfs_dabuf_global_lock; +spinlock_t xfs_dabuf_global_lock; #endif /* @@ -2264,10 +2264,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) } #ifdef XFS_DABUF_DEBUG { - SPLDECL(s); xfs_dabuf_t *p; - s = mutex_spinlock(&xfs_dabuf_global_lock); + spin_lock(&xfs_dabuf_global_lock); for (p = xfs_dabuf_global_list; p; p = p->next) { ASSERT(p->blkno != dabuf->blkno || p->target != dabuf->target); @@ -2277,7 +2276,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) xfs_dabuf_global_list->prev = dabuf; dabuf->next = xfs_dabuf_global_list; xfs_dabuf_global_list = dabuf; - mutex_spinunlock(&xfs_dabuf_global_lock, s); + spin_unlock(&xfs_dabuf_global_lock); } #endif return dabuf; @@ -2319,16 +2318,14 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf) kmem_free(dabuf->data, BBTOB(dabuf->bbcount)); #ifdef XFS_DABUF_DEBUG { - SPLDECL(s); - - s = mutex_spinlock(&xfs_dabuf_global_lock); + spin_lock(&xfs_dabuf_global_lock); if (dabuf->prev) dabuf->prev->next = dabuf->next; else xfs_dabuf_global_list = dabuf->next; if (dabuf->next) dabuf->next->prev = dabuf->prev; - mutex_spinunlock(&xfs_dabuf_global_lock, s); + spin_unlock(&xfs_dabuf_global_lock); } memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf)); #endif diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 44dabf02f2a3..7facf86f74f9 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -260,6 +260,7 @@ void xfs_da_binval(struct xfs_trans *tp, xfs_dabuf_t *dabuf); xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf); extern struct kmem_zone *xfs_da_state_zone; +extern struct kmem_zone *xfs_dabuf_zone; #endif /* __KERNEL__ */ #endif /* __XFS_DA_BTREE_H__ */ diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 584f1ae85cd9..3f53fad356a3 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -52,76 +52,72 @@ xfs_swapext( xfs_swapext_t __user *sxu) { xfs_swapext_t *sxp; - xfs_inode_t *ip=NULL, *tip=NULL; - xfs_mount_t *mp; - struct file *fp = NULL, *tfp = NULL; - bhv_vnode_t *vp, *tvp; + xfs_inode_t *ip, *tip; + struct file *file, *target_file; int error = 0; sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL); if (!sxp) { error = XFS_ERROR(ENOMEM); - goto error0; + goto out; } if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t))) { error = XFS_ERROR(EFAULT); - goto error0; + goto out_free_sxp; } /* Pull information for the target fd */ - if (((fp = fget((int)sxp->sx_fdtarget)) == NULL) || - ((vp = vn_from_inode(fp->f_path.dentry->d_inode)) == NULL)) { + file = fget((int)sxp->sx_fdtarget); + if (!file) { error = XFS_ERROR(EINVAL); - goto error0; + goto out_free_sxp; } - ip = xfs_vtoi(vp); - if (ip == NULL) { + if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); - goto error0; + goto out_put_file; } - if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) || - ((tvp = vn_from_inode(tfp->f_path.dentry->d_inode)) == NULL)) { + target_file = fget((int)sxp->sx_fdtmp); + if (!target_file) { error = XFS_ERROR(EINVAL); - goto error0; + goto out_put_file; } - tip = xfs_vtoi(tvp); - if (tip == NULL) { + if (!(target_file->f_mode & FMODE_WRITE) || + (target_file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); - goto error0; + goto out_put_target_file; } + ip = XFS_I(file->f_path.dentry->d_inode); + tip = XFS_I(target_file->f_path.dentry->d_inode); + if (ip->i_mount != tip->i_mount) { - error = XFS_ERROR(EINVAL); - goto error0; + error = XFS_ERROR(EINVAL); + goto out_put_target_file; } if (ip->i_ino == tip->i_ino) { - error = XFS_ERROR(EINVAL); - goto error0; + error = XFS_ERROR(EINVAL); + goto out_put_target_file; } - mp = ip->i_mount; - - if (XFS_FORCED_SHUTDOWN(mp)) { - error = XFS_ERROR(EIO); - goto error0; + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + error = XFS_ERROR(EIO); + goto out_put_target_file; } - error = XFS_SWAP_EXTENTS(mp, &ip->i_iocore, &tip->i_iocore, sxp); - - error0: - if (fp != NULL) - fput(fp); - if (tfp != NULL) - fput(tfp); - - if (sxp != NULL) - kmem_free(sxp, sizeof(xfs_swapext_t)); + error = xfs_swap_extents(ip, tip, sxp); + out_put_target_file: + fput(target_file); + out_put_file: + fput(file); + out_free_sxp: + kmem_free(sxp, sizeof(xfs_swapext_t)); + out: return error; } @@ -169,15 +165,6 @@ xfs_swap_extents( xfs_lock_inodes(ips, 2, 0, lock_flags); locked = 1; - /* Check permissions */ - error = xfs_iaccess(ip, S_IWUSR, NULL); - if (error) - goto error0; - - error = xfs_iaccess(tip, S_IWUSR, NULL); - if (error) - goto error0; - /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); @@ -185,8 +172,7 @@ xfs_swap_extents( } /* Verify both files are either real-time or non-realtime */ - if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != - (tip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { + if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { error = XFS_ERROR(EINVAL); goto error0; } @@ -199,7 +185,7 @@ xfs_swap_extents( } if (VN_CACHED(tvp) != 0) { - xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1); + xfs_inval_cached_trace(tip, 0, -1, 0, -1); error = xfs_flushinval_pages(tip, 0, -1, FI_REMAPF_LOCKED); if (error) diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index dedd713574e1..c9065eaf2a4d 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -171,69 +171,35 @@ typedef enum xfs_dinode_fmt /* * Inode data & attribute fork sizes, per inode. */ -#define XFS_CFORK_Q(dcp) ((dcp)->di_forkoff != 0) -#define XFS_CFORK_Q_DISK(dcp) ((dcp)->di_forkoff != 0) - -#define XFS_CFORK_BOFF(dcp) ((int)((dcp)->di_forkoff << 3)) -#define XFS_CFORK_BOFF_DISK(dcp) ((int)((dcp)->di_forkoff << 3)) - -#define XFS_CFORK_DSIZE_DISK(dcp,mp) \ - (XFS_CFORK_Q_DISK(dcp) ? XFS_CFORK_BOFF_DISK(dcp) : XFS_LITINO(mp)) -#define XFS_CFORK_DSIZE(dcp,mp) \ - (XFS_CFORK_Q(dcp) ? XFS_CFORK_BOFF(dcp) : XFS_LITINO(mp)) - -#define XFS_CFORK_ASIZE_DISK(dcp,mp) \ - (XFS_CFORK_Q_DISK(dcp) ? XFS_LITINO(mp) - XFS_CFORK_BOFF_DISK(dcp) : 0) -#define XFS_CFORK_ASIZE(dcp,mp) \ - (XFS_CFORK_Q(dcp) ? XFS_LITINO(mp) - XFS_CFORK_BOFF(dcp) : 0) - -#define XFS_CFORK_SIZE_DISK(dcp,mp,w) \ - ((w) == XFS_DATA_FORK ? \ - XFS_CFORK_DSIZE_DISK(dcp, mp) : \ - XFS_CFORK_ASIZE_DISK(dcp, mp)) -#define XFS_CFORK_SIZE(dcp,mp,w) \ - ((w) == XFS_DATA_FORK ? \ - XFS_CFORK_DSIZE(dcp, mp) : XFS_CFORK_ASIZE(dcp, mp)) +#define XFS_DFORK_Q(dip) ((dip)->di_core.di_forkoff != 0) +#define XFS_DFORK_BOFF(dip) ((int)((dip)->di_core.di_forkoff << 3)) #define XFS_DFORK_DSIZE(dip,mp) \ - XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp) -#define XFS_DFORK_DSIZE_HOST(dip,mp) \ - XFS_CFORK_DSIZE(&(dip)->di_core, mp) + (XFS_DFORK_Q(dip) ? \ + XFS_DFORK_BOFF(dip) : \ + XFS_LITINO(mp)) #define XFS_DFORK_ASIZE(dip,mp) \ - XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp) -#define XFS_DFORK_ASIZE_HOST(dip,mp) \ - XFS_CFORK_ASIZE(&(dip)->di_core, mp) -#define XFS_DFORK_SIZE(dip,mp,w) \ - XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w) -#define XFS_DFORK_SIZE_HOST(dip,mp,w) \ - XFS_CFORK_SIZE(&(dip)->di_core, mp, w) + (XFS_DFORK_Q(dip) ? \ + XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : \ + 0) +#define XFS_DFORK_SIZE(dip,mp,w) \ + ((w) == XFS_DATA_FORK ? \ + XFS_DFORK_DSIZE(dip, mp) : \ + XFS_DFORK_ASIZE(dip, mp)) -#define XFS_DFORK_Q(dip) XFS_CFORK_Q_DISK(&(dip)->di_core) -#define XFS_DFORK_BOFF(dip) XFS_CFORK_BOFF_DISK(&(dip)->di_core) -#define XFS_DFORK_DPTR(dip) ((dip)->di_u.di_c) -#define XFS_DFORK_APTR(dip) \ +#define XFS_DFORK_DPTR(dip) ((dip)->di_u.di_c) +#define XFS_DFORK_APTR(dip) \ ((dip)->di_u.di_c + XFS_DFORK_BOFF(dip)) -#define XFS_DFORK_PTR(dip,w) \ +#define XFS_DFORK_PTR(dip,w) \ ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip)) -#define XFS_CFORK_FORMAT(dcp,w) \ - ((w) == XFS_DATA_FORK ? (dcp)->di_format : (dcp)->di_aformat) -#define XFS_CFORK_FMT_SET(dcp,w,n) \ +#define XFS_DFORK_FORMAT(dip,w) \ ((w) == XFS_DATA_FORK ? \ - ((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n))) -#define XFS_DFORK_FORMAT(dip,w) XFS_CFORK_FORMAT(&(dip)->di_core, w) - -#define XFS_CFORK_NEXTENTS_DISK(dcp,w) \ + (dip)->di_core.di_format : \ + (dip)->di_core.di_aformat) +#define XFS_DFORK_NEXTENTS(dip,w) \ ((w) == XFS_DATA_FORK ? \ - be32_to_cpu((dcp)->di_nextents) : \ - be16_to_cpu((dcp)->di_anextents)) -#define XFS_CFORK_NEXTENTS(dcp,w) \ - ((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents) -#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w) -#define XFS_DFORK_NEXTENTS_HOST(dip,w) XFS_CFORK_NEXTENTS(&(dip)->di_core, w) - -#define XFS_CFORK_NEXT_SET(dcp,w,n) \ - ((w) == XFS_DATA_FORK ? \ - ((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n))) + be32_to_cpu((dip)->di_core.di_nextents) : \ + be16_to_cpu((dip)->di_core.di_anextents)) #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) @@ -273,6 +239,12 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) #define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT) +#ifdef CONFIG_XFS_RT +#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) +#else +#define XFS_IS_REALTIME_INODE(ip) (0) +#endif + #define XFS_DIFLAG_ANY \ (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index b0f1ee8fcb90..be7c4251fa61 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -42,6 +42,7 @@ #include "xfs_dir2_node.h" #include "xfs_dir2_trace.h" #include "xfs_error.h" +#include "xfs_vnodeops.h" void @@ -301,7 +302,7 @@ xfs_readdir( int rval; /* return value */ int v; /* type-checking value */ - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return XFS_ERROR(EIO); diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index a5f4f4fb8868..fb5a556725b3 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -271,7 +271,7 @@ xfs_dir2_block_addname( } lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); lfloghigh -= be32_to_cpu(btp->stale) - 1; - be32_add(&btp->count, -(be32_to_cpu(btp->stale) - 1)); + be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); xfs_dir2_data_make_free(tp, bp, (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), @@ -326,7 +326,7 @@ xfs_dir2_block_addname( /* * Update the tail (entry count). */ - be32_add(&btp->count, 1); + be32_add_cpu(&btp->count, 1); /* * If we now need to rebuild the bestfree map, do so. * This needs to happen before the next call to use_free. @@ -387,7 +387,7 @@ xfs_dir2_block_addname( lfloglow = MIN(mid, lfloglow); lfloghigh = MAX(highstale, lfloghigh); } - be32_add(&btp->stale, -1); + be32_add_cpu(&btp->stale, -1); } /* * Point to the new data entry. @@ -767,7 +767,7 @@ xfs_dir2_block_removename( /* * Fix up the block tail. */ - be32_add(&btp->stale, 1); + be32_add_cpu(&btp->stale, 1); xfs_dir2_block_log_tail(tp, bp); /* * Remove the leaf entry by marking it stale. diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index d2452699e9b1..fb8c9e08b23d 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -587,7 +587,7 @@ xfs_dir2_data_make_free( /* * Fix up the new big freespace. */ - be16_add(&prevdup->length, len + be16_to_cpu(postdup->length)); + be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, prevdup); @@ -621,7 +621,7 @@ xfs_dir2_data_make_free( */ else if (prevdup) { dfp = xfs_dir2_data_freefind(d, prevdup); - be16_add(&prevdup->length, len); + be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, prevdup); diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 0ca0020ba09f..bc52b803d79b 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -359,7 +359,7 @@ xfs_dir2_leaf_addname( bestsp--; memmove(&bestsp[0], &bestsp[1], be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0])); - be32_add(<p->bestcount, 1); + be32_add_cpu(<p->bestcount, 1); xfs_dir2_leaf_log_tail(tp, lbp); xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); } @@ -445,7 +445,7 @@ xfs_dir2_leaf_addname( */ lfloglow = index; lfloghigh = be16_to_cpu(leaf->hdr.count); - be16_add(&leaf->hdr.count, 1); + be16_add_cpu(&leaf->hdr.count, 1); } /* * There are stale entries. @@ -523,7 +523,7 @@ xfs_dir2_leaf_addname( lfloglow = MIN(index, lfloglow); lfloghigh = MAX(highstale, lfloghigh); } - be16_add(&leaf->hdr.stale, -1); + be16_add_cpu(&leaf->hdr.stale, -1); } /* * Fill in the new leaf entry. @@ -626,7 +626,7 @@ xfs_dir2_leaf_compact( * Update and log the header, log the leaf entries. */ ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to); - be16_add(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale))); + be16_add_cpu(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale))); leaf->hdr.stale = 0; xfs_dir2_leaf_log_header(args->trans, bp); if (loglow != -1) @@ -728,7 +728,7 @@ xfs_dir2_leaf_compact_x1( /* * Adjust the leaf header values. */ - be16_add(&leaf->hdr.count, -(from - to)); + be16_add_cpu(&leaf->hdr.count, -(from - to)); leaf->hdr.stale = cpu_to_be16(1); /* * Remember the low/high stale value only in the "right" @@ -1470,7 +1470,7 @@ xfs_dir2_leaf_removename( /* * We just mark the leaf entry stale by putting a null in it. */ - be16_add(&leaf->hdr.stale, 1); + be16_add_cpu(&leaf->hdr.stale, 1); xfs_dir2_leaf_log_header(tp, lbp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); xfs_dir2_leaf_log_ents(tp, lbp, index, index); @@ -1531,7 +1531,7 @@ xfs_dir2_leaf_removename( */ memmove(&bestsp[db - i], bestsp, (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp)); - be32_add(<p->bestcount, -(db - i)); + be32_add_cpu(<p->bestcount, -(db - i)); xfs_dir2_leaf_log_tail(tp, lbp); xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); } else @@ -1712,7 +1712,7 @@ xfs_dir2_leaf_trim_data( * Eliminate the last bests entry from the table. */ bestsp = xfs_dir2_leaf_bests_p(ltp); - be32_add(<p->bestcount, -1); + be32_add_cpu(<p->bestcount, -1); memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); xfs_dir2_leaf_log_tail(tp, lbp); xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index eb18e399e836..8dade711f099 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -254,7 +254,7 @@ xfs_dir2_leafn_add( (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep)); lfloglow = index; lfloghigh = be16_to_cpu(leaf->hdr.count); - be16_add(&leaf->hdr.count, 1); + be16_add_cpu(&leaf->hdr.count, 1); } /* * There are stale entries. We'll use one for the new entry. @@ -322,7 +322,7 @@ xfs_dir2_leafn_add( lfloglow = MIN(index, lfloglow); lfloghigh = MAX(highstale, lfloghigh); } - be16_add(&leaf->hdr.stale, -1); + be16_add_cpu(&leaf->hdr.stale, -1); } /* * Insert the new entry, log everything. @@ -697,10 +697,10 @@ xfs_dir2_leafn_moveents( /* * Update the headers and log them. */ - be16_add(&leaf_s->hdr.count, -(count)); - be16_add(&leaf_s->hdr.stale, -(stale)); - be16_add(&leaf_d->hdr.count, count); - be16_add(&leaf_d->hdr.stale, stale); + be16_add_cpu(&leaf_s->hdr.count, -(count)); + be16_add_cpu(&leaf_s->hdr.stale, -(stale)); + be16_add_cpu(&leaf_d->hdr.count, count); + be16_add_cpu(&leaf_d->hdr.stale, stale); xfs_dir2_leaf_log_header(tp, bp_s); xfs_dir2_leaf_log_header(tp, bp_d); xfs_dir2_leafn_check(args->dp, bp_s); @@ -885,7 +885,7 @@ xfs_dir2_leafn_remove( * Kill the leaf entry by marking it stale. * Log the leaf block changes. */ - be16_add(&leaf->hdr.stale, 1); + be16_add_cpu(&leaf->hdr.stale, 1); xfs_dir2_leaf_log_header(tp, bp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); xfs_dir2_leaf_log_ents(tp, bp, index, index); @@ -971,7 +971,7 @@ xfs_dir2_leafn_remove( /* * One less used entry in the free table. */ - be32_add(&free->hdr.nused, -1); + be32_add_cpu(&free->hdr.nused, -1); xfs_dir2_free_log_header(tp, fbp); /* * If this was the last entry in the table, we can @@ -1642,7 +1642,7 @@ xfs_dir2_node_addname_int( * (this should always be true) then update the header. */ if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) { - be32_add(&free->hdr.nused, 1); + be32_add_cpu(&free->hdr.nused, 1); xfs_dir2_free_log_header(tp, fbp); } /* diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index a4634d94e561..05e5365d3c31 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -230,37 +230,6 @@ xfs_error_report( } } -STATIC void -xfs_hex_dump(void *p, int length) -{ - __uint8_t *uip = (__uint8_t*)p; - int i; - char sbuf[128], *s; - - s = sbuf; - *s = '\0'; - for (i=0; i<length; i++, uip++) { - if ((i % 16) == 0) { - if (*s != '\0') - cmn_err(CE_ALERT, "%s\n", sbuf); - s = sbuf; - sprintf(s, "0x%x: ", i); - while( *s != '\0') - s++; - } - sprintf(s, "%02x ", *uip); - - /* - * the kernel sprintf is a void; user sprintf returns - * the sprintf'ed string's length. Find the new end- - * of-string - */ - while( *s != '\0') - s++; - } - cmn_err(CE_ALERT, "%s\n", sbuf); -} - void xfs_corruption_error( char *tag, diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 10e9d9619ae5..6490d2a9f8e1 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -174,6 +174,8 @@ extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, /* PRINTFLIKE3 */ extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...); +extern void xfs_hex_dump(void *p, int length); + #define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \ xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index f938a51be81b..132bd07b9bb8 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -110,19 +110,18 @@ STATIC void xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) { xfs_mount_t *mp; - SPLDECL(s); mp = efip->efi_item.li_mountp; - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } } @@ -138,10 +137,9 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) { xfs_mount_t *mp; xfs_log_item_desc_t *lidp; - SPLDECL(s); mp = efip->efi_item.li_mountp; - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * free the xaction descriptor pointing to this item @@ -152,11 +150,11 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) * pull the item off the AIL. * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } } @@ -350,13 +348,12 @@ xfs_efi_release(xfs_efi_log_item_t *efip, { xfs_mount_t *mp; int extents_left; - SPLDECL(s); mp = efip->efi_item.li_mountp; ASSERT(efip->efi_next_extent > 0); ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); ASSERT(efip->efi_next_extent >= nextents); efip->efi_next_extent -= nextents; extents_left = efip->efi_next_extent; @@ -364,10 +361,10 @@ xfs_efi_release(xfs_efi_log_item_t *efip, /* * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 36d8f6aa11af..eb03eab5ca52 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -348,7 +348,7 @@ _xfs_filestream_update_ag( } /* xfs_fstrm_free_func(): callback for freeing cached stream items. */ -void +STATIC void xfs_fstrm_free_func( unsigned long ino, void *data) diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index aab966276517..3bed6433d050 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -419,9 +419,13 @@ typedef struct xfs_handle { /* * ioctl commands that are used by Linux filesystems */ -#define XFS_IOC_GETXFLAGS _IOR('f', 1, long) -#define XFS_IOC_SETXFLAGS _IOW('f', 2, long) -#define XFS_IOC_GETVERSION _IOR('v', 1, long) +#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS +#define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS +#define XFS_IOC_GETVERSION FS_IOC_GETVERSION +/* 32-bit compat counterparts */ +#define XFS_IOC32_GETXFLAGS FS_IOC32_GETFLAGS +#define XFS_IOC32_SETXFLAGS FS_IOC32_SETFLAGS +#define XFS_IOC32_GETVERSION FS_IOC32_GETVERSION /* * ioctl commands that replace IRIX fcntl()'s diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index c92d5b821029..eadc1591c795 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -318,7 +318,7 @@ xfs_growfs_data_private( } ASSERT(bp); agi = XFS_BUF_TO_AGI(bp); - be32_add(&agi->agi_length, new); + be32_add_cpu(&agi->agi_length, new); ASSERT(nagcount == oagcount || be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks); xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH); @@ -331,7 +331,7 @@ xfs_growfs_data_private( } ASSERT(bp); agf = XFS_BUF_TO_AGF(bp); - be32_add(&agf->agf_length, new); + be32_add_cpu(&agf->agf_length, new); ASSERT(be32_to_cpu(agf->agf_length) == be32_to_cpu(agi->agi_length)); xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); @@ -462,15 +462,13 @@ xfs_fs_counts( xfs_mount_t *mp, xfs_fsop_counts_t *cnt) { - unsigned long s; - xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); cnt->freertx = mp->m_sb.sb_frextents; cnt->freeino = mp->m_sb.sb_ifree; cnt->allocino = mp->m_sb.sb_icount; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); return 0; } @@ -497,7 +495,6 @@ xfs_reserve_blocks( { __int64_t lcounter, delta, fdblks_delta; __uint64_t request; - unsigned long s; /* If inval is null, report current values and return */ if (inval == (__uint64_t *)NULL) { @@ -515,7 +512,7 @@ xfs_reserve_blocks( * problem. we needto work out if we are freeing or allocation * blocks first, then we can do the modification as necessary. * - * We do this under the XFS_SB_LOCK so that if we are near + * We do this under the m_sb_lock so that if we are near * ENOSPC, we will hold out any changes while we work out * what to do. This means that the amount of free space can * change while we do this, so we need to retry if we end up @@ -526,7 +523,7 @@ xfs_reserve_blocks( * enabled, disabled or even compiled in.... */ retry: - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED); /* @@ -569,7 +566,7 @@ out: outval->resblks = mp->m_resblks; outval->resblks_avail = mp->m_resblks_avail; } - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); if (fdblks_delta) { /* diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 1409c2d61c11..c5836b951d0c 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -301,8 +301,8 @@ xfs_ialloc_ag_alloc( } xfs_trans_inode_alloc_buf(tp, fbuf); } - be32_add(&agi->agi_count, newlen); - be32_add(&agi->agi_freecount, newlen); + be32_add_cpu(&agi->agi_count, newlen); + be32_add_cpu(&agi->agi_freecount, newlen); agno = be32_to_cpu(agi->agi_seqno); down_read(&args.mp->m_peraglock); args.mp->m_perag[agno].pagi_freecount += newlen; @@ -885,7 +885,7 @@ nextag: if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) goto error0; - be32_add(&agi->agi_freecount, -1); + be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); mp->m_perag[tagno].pagi_freecount--; @@ -1065,8 +1065,8 @@ xfs_difree( * to be freed when the transaction is committed. */ ilen = XFS_IALLOC_INODES(mp); - be32_add(&agi->agi_count, -ilen); - be32_add(&agi->agi_freecount, -(ilen - 1)); + be32_add_cpu(&agi->agi_count, -ilen); + be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); mp->m_perag[agno].pagi_freecount -= ilen - 1; @@ -1095,7 +1095,7 @@ xfs_difree( /* * Change the inode free counts and log the ag/sb changes. */ - be32_add(&agi->agi_freecount, 1); + be32_add_cpu(&agi->agi_freecount, 1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); mp->m_perag[agno].pagi_freecount++; diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 8cdeeaf8632b..e5310c90e50f 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -189,7 +189,7 @@ xfs_inobt_delrec( */ bno = be32_to_cpu(agi->agi_root); agi->agi_root = *pp; - be32_add(&agi->agi_level, -1); + be32_add_cpu(&agi->agi_level, -1); /* * Free the block. */ @@ -1132,7 +1132,7 @@ xfs_inobt_lshift( /* * Bump and log left's numrecs, decrement and log right's numrecs. */ - be16_add(&left->bb_numrecs, 1); + be16_add_cpu(&left->bb_numrecs, 1); xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); #ifdef DEBUG if (level > 0) @@ -1140,7 +1140,7 @@ xfs_inobt_lshift( else xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp); #endif - be16_add(&right->bb_numrecs, -1); + be16_add_cpu(&right->bb_numrecs, -1); xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); /* * Slide the contents of right down one entry. @@ -1232,7 +1232,7 @@ xfs_inobt_newroot( * Set the root data in the a.g. inode structure. */ agi->agi_root = cpu_to_be32(args.agbno); - be32_add(&agi->agi_level, 1); + be32_add_cpu(&agi->agi_level, 1); xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); /* @@ -1426,9 +1426,9 @@ xfs_inobt_rshift( /* * Decrement and log left's numrecs, bump and log right's numrecs. */ - be16_add(&left->bb_numrecs, -1); + be16_add_cpu(&left->bb_numrecs, -1); xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); #ifdef DEBUG if (level > 0) xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1); @@ -1529,7 +1529,7 @@ xfs_inobt_split( */ if ((be16_to_cpu(left->bb_numrecs) & 1) && cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; /* * For non-leaf blocks, copy keys and addresses over to the new block. @@ -1565,7 +1565,7 @@ xfs_inobt_split( * Find the left block number by looking in the buffer. * Adjust numrecs, sibling pointers. */ - be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); + be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); right->bb_rightsib = left->bb_rightsib; left->bb_rightsib = cpu_to_be32(args.agbno); right->bb_leftsib = cpu_to_be32(lbno); diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index bf8e9aff272e..8efc4a5b8b92 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -81,8 +81,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; #define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) #define XFS_INOBT_IS_FREE(rp,i) \ (((rp)->ir_free & XFS_INOBT_MASK(i)) != 0) -#define XFS_INOBT_IS_FREE_DISK(rp,i) \ - ((be64_to_cpu((rp)->ir_free) & XFS_INOBT_MASK(i)) != 0) #define XFS_INOBT_SET_FREE(rp,i) ((rp)->ir_free |= XFS_INOBT_MASK(i)) #define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i)) diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index fb69ef180b27..f01b07687faf 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -65,7 +65,7 @@ */ STATIC int xfs_iget_core( - bhv_vnode_t *vp, + struct inode *inode, xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, @@ -74,9 +74,9 @@ xfs_iget_core( xfs_inode_t **ipp, xfs_daddr_t bno) { + struct inode *old_inode; xfs_inode_t *ip; xfs_inode_t *iq; - bhv_vnode_t *inode_vp; int error; xfs_icluster_t *icl, *new_icl = NULL; unsigned long first_index, mask; @@ -111,8 +111,8 @@ again: goto again; } - inode_vp = XFS_ITOV_NULL(ip); - if (inode_vp == NULL) { + old_inode = ip->i_vnode; + if (old_inode == NULL) { /* * If IRECLAIM is set this inode is * on its way out of the system, @@ -140,28 +140,9 @@ again: return ENOENT; } - /* - * There may be transactions sitting in the - * incore log buffers or being flushed to disk - * at this time. We can't clear the - * XFS_IRECLAIMABLE flag until these - * transactions have hit the disk, otherwise we - * will void the guarantee the flag provides - * xfs_iunpin() - */ - if (xfs_ipincount(ip)) { - read_unlock(&pag->pag_ici_lock); - xfs_log_force(mp, 0, - XFS_LOG_FORCE|XFS_LOG_SYNC); - XFS_STATS_INC(xs_ig_frecycle); - goto again; - } - - vn_trace_exit(ip, "xfs_iget.alloc", - (inst_t *)__return_address); + xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); XFS_STATS_INC(xs_ig_found); - xfs_iflags_clear(ip, XFS_IRECLAIMABLE); read_unlock(&pag->pag_ici_lock); @@ -171,13 +152,11 @@ again: goto finish_inode; - } else if (vp != inode_vp) { - struct inode *inode = vn_to_inode(inode_vp); - + } else if (inode != old_inode) { /* The inode is being torn down, pause and * try again. */ - if (inode->i_state & (I_FREEING | I_CLEAR)) { + if (old_inode->i_state & (I_FREEING | I_CLEAR)) { read_unlock(&pag->pag_ici_lock); delay(1); XFS_STATS_INC(xs_ig_frecycle); @@ -190,7 +169,7 @@ again: */ cmn_err(CE_PANIC, "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", - inode_vp, vp); + old_inode, inode); } /* @@ -200,20 +179,16 @@ again: XFS_STATS_INC(xs_ig_found); finish_inode: - if (ip->i_d.di_mode == 0) { - if (!(flags & XFS_IGET_CREATE)) { - xfs_put_perag(mp, pag); - return ENOENT; - } - xfs_iocore_inode_reinit(ip); + if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { + xfs_put_perag(mp, pag); + return ENOENT; } if (lock_flags != 0) xfs_ilock(ip, lock_flags); xfs_iflags_clear(ip, XFS_ISTALE); - vn_trace_exit(ip, "xfs_iget.found", - (inst_t *)__return_address); + xfs_itrace_exit_tag(ip, "xfs_iget.found"); goto return_ip; } @@ -234,10 +209,16 @@ finish_inode: return error; } - vn_trace_exit(ip, "xfs_iget.alloc", (inst_t *)__return_address); + xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); + + + mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, + "xfsino", ip->i_ino); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); + init_waitqueue_head(&ip->i_ipin_wait); + atomic_set(&ip->i_pincount, 0); + initnsema(&ip->i_flock, 1, "xfsfino"); - xfs_inode_lock_init(ip, vp); - xfs_iocore_inode_init(ip); if (lock_flags) xfs_ilock(ip, lock_flags); @@ -333,9 +314,6 @@ finish_inode: ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); - ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == - ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); - xfs_iflags_set(ip, XFS_IMODIFIED); *ipp = ip; @@ -343,7 +321,7 @@ finish_inode: * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. */ - xfs_initialize_vnode(mp, vp, ip); + xfs_initialize_vnode(mp, inode, ip); return 0; } @@ -363,69 +341,58 @@ xfs_iget( xfs_daddr_t bno) { struct inode *inode; - bhv_vnode_t *vp = NULL; + xfs_inode_t *ip; int error; XFS_STATS_INC(xs_ig_attempts); retry: inode = iget_locked(mp->m_super, ino); - if (inode) { - xfs_inode_t *ip; - - vp = vn_from_inode(inode); - if (inode->i_state & I_NEW) { - vn_initialize(inode); - error = xfs_iget_core(vp, mp, tp, ino, flags, - lock_flags, ipp, bno); - if (error) { - vn_mark_bad(vp); - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - iput(inode); - } - } else { - /* - * If the inode is not fully constructed due to - * filehandle mismatches wait for the inode to go - * away and try again. - * - * iget_locked will call __wait_on_freeing_inode - * to wait for the inode to go away. - */ - if (is_bad_inode(inode) || - ((ip = xfs_vtoi(vp)) == NULL)) { - iput(inode); - delay(1); - goto retry; - } - - if (lock_flags != 0) - xfs_ilock(ip, lock_flags); - XFS_STATS_INC(xs_ig_found); - *ipp = ip; - error = 0; + if (!inode) + /* If we got no inode we are out of memory */ + return ENOMEM; + + if (inode->i_state & I_NEW) { + XFS_STATS_INC(vn_active); + XFS_STATS_INC(vn_alloc); + + error = xfs_iget_core(inode, mp, tp, ino, flags, + lock_flags, ipp, bno); + if (error) { + make_bad_inode(inode); + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + iput(inode); } - } else - error = ENOMEM; /* If we got no inode we are out of memory */ + return error; + } - return error; -} + /* + * If the inode is not fully constructed due to + * filehandle mismatches wait for the inode to go + * away and try again. + * + * iget_locked will call __wait_on_freeing_inode + * to wait for the inode to go away. + */ + if (is_bad_inode(inode)) { + iput(inode); + delay(1); + goto retry; + } -/* - * Do the setup for the various locks within the incore inode. - */ -void -xfs_inode_lock_init( - xfs_inode_t *ip, - bhv_vnode_t *vp) -{ - mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, - "xfsino", ip->i_ino); - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - init_waitqueue_head(&ip->i_ipin_wait); - atomic_set(&ip->i_pincount, 0); - initnsema(&ip->i_flock, 1, "xfsfino"); + ip = XFS_I(inode); + if (!ip) { + iput(inode); + delay(1); + goto retry; + } + + if (lock_flags != 0) + xfs_ilock(ip, lock_flags); + XFS_STATS_INC(xs_ig_found); + *ipp = ip; + return 0; } /* @@ -465,11 +432,9 @@ void xfs_iput(xfs_inode_t *ip, uint lock_flags) { - bhv_vnode_t *vp = XFS_ITOV(ip); - - vn_trace_entry(ip, "xfs_iput", (inst_t *)__return_address); + xfs_itrace_entry(ip); xfs_iunlock(ip, lock_flags); - VN_RELE(vp); + IRELE(ip); } /* @@ -479,20 +444,19 @@ void xfs_iput_new(xfs_inode_t *ip, uint lock_flags) { - bhv_vnode_t *vp = XFS_ITOV(ip); - struct inode *inode = vn_to_inode(vp); + struct inode *inode = ip->i_vnode; - vn_trace_entry(ip, "xfs_iput_new", (inst_t *)__return_address); + xfs_itrace_entry(ip); if ((ip->i_d.di_mode == 0)) { ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - vn_mark_bad(vp); + make_bad_inode(inode); } if (inode->i_state & I_NEW) unlock_new_inode(inode); if (lock_flags) xfs_iunlock(ip, lock_flags); - VN_RELE(vp); + IRELE(ip); } @@ -505,8 +469,6 @@ xfs_iput_new(xfs_inode_t *ip, void xfs_ireclaim(xfs_inode_t *ip) { - bhv_vnode_t *vp; - /* * Remove from old hash list and mount list. */ @@ -535,9 +497,8 @@ xfs_ireclaim(xfs_inode_t *ip) /* * Pull our behavior descriptor from the vnode chain. */ - vp = XFS_ITOV_NULL(ip); - if (vp) { - vn_to_inode(vp)->i_private = NULL; + if (ip->i_vnode) { + ip->i_vnode->i_private = NULL; ip->i_vnode = NULL; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 344948082819..a550546a7083 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -15,6 +15,8 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <linux/log2.h> + #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" @@ -826,15 +828,17 @@ xfs_ip2xflags( xfs_icdinode_t *dic = &ip->i_d; return _xfs_dic2xflags(dic->di_flags) | - (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); + (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); } uint xfs_dic2xflags( - xfs_dinode_core_t *dic) + xfs_dinode_t *dip) { + xfs_dinode_core_t *dic = &dip->di_core; + return _xfs_dic2xflags(be16_to_cpu(dic->di_flags)) | - (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); + (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); } /* @@ -884,8 +888,8 @@ xfs_iread( * Initialize inode's trace buffers. * Do this before xfs_iformat in case it adds entries. */ -#ifdef XFS_VNODE_TRACE - ip->i_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); +#ifdef XFS_INODE_TRACE + ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP); #endif #ifdef XFS_BMAP_TRACE ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); @@ -1220,10 +1224,8 @@ xfs_ialloc( ip->i_d.di_extsize = pip->i_d.di_extsize; } } else if ((mode & S_IFMT) == S_IFREG) { - if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { + if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_REALTIME; - ip->i_iocore.io_flags |= XFS_IOCORE_RT; - } if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { di_flags |= XFS_DIFLAG_EXTSIZE; ip->i_d.di_extsize = pip->i_d.di_extsize; @@ -1298,7 +1300,10 @@ xfs_isize_check( if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) return; - if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE)) + if (XFS_IS_REALTIME_INODE(ip)) + return; + + if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) return; nimaps = 2; @@ -1711,7 +1716,7 @@ xfs_itruncate_finish( * runs. */ XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_BUNMAPI(mp, ntp, &ip->i_iocore, + error = xfs_bunmapi(ntp, ip, first_unmap_block, unmap_len, XFS_BMAPI_AFLAG(fork) | (sync ? 0 : XFS_BMAPI_ASYNC), @@ -1844,8 +1849,6 @@ xfs_igrow_start( xfs_fsize_t new_size, cred_t *credp) { - int error; - ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); ASSERT(new_size > ip->i_size); @@ -1855,9 +1858,7 @@ xfs_igrow_start( * xfs_write_file() beyond the end of the file * and any blocks between the old and new file sizes. */ - error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, - ip->i_size); - return error; + return xfs_zero_eof(ip, new_size, ip->i_size); } /* @@ -1959,24 +1960,6 @@ xfs_iunlink( ASSERT(agi->agi_unlinked[bucket_index]); ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); - if (error) - return error; - - /* - * Clear the on-disk di_nlink. This is to prevent xfs_bulkstat - * from picking up this inode when it is reclaimed (its incore state - * initialzed but not flushed to disk yet). The in-core di_nlink is - * already cleared in xfs_droplink() and a corresponding transaction - * logged. The hack here just synchronizes the in-core to on-disk - * di_nlink value in advance before the actual inode sync to disk. - * This is OK because the inode is already unlinked and would never - * change its di_nlink again for this inode generation. - * This is a temporary hack that would require a proper fix - * in the future. - */ - dip->di_core.di_nlink = 0; - if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { /* * There is already another inode in the bucket we need @@ -1984,6 +1967,10 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); + if (error) + return error; + ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); /* both on-disk, don't endian flip twice */ dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; @@ -2209,7 +2196,6 @@ xfs_ifree_cluster( xfs_inode_log_item_t *iip; xfs_log_item_t *lip; xfs_perag_t *pag = xfs_get_perag(mp, inum); - SPLDECL(s); if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { blks_per_cluster = 1; @@ -2311,9 +2297,9 @@ xfs_ifree_cluster( iip = (xfs_inode_log_item_t *)lip; ASSERT(iip->ili_logged == 1); lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); iip->ili_flush_lsn = iip->ili_item.li_lsn; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); xfs_iflags_set(iip->ili_inode, XFS_ISTALE); pre_flushed++; } @@ -2334,9 +2320,9 @@ xfs_ifree_cluster( iip->ili_last_fields = iip->ili_format.ilf_fields; iip->ili_format.ilf_fields = 0; iip->ili_logged = 1; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); iip->ili_flush_lsn = iip->ili_item.li_lsn; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) @@ -2374,6 +2360,8 @@ xfs_ifree( int error; int delete; xfs_ino_t first_ino; + xfs_dinode_t *dip; + xfs_buf_t *ibp; ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); ASSERT(ip->i_transp == tp); @@ -2409,8 +2397,27 @@ xfs_ifree( * by reincarnations of this inode. */ ip->i_d.di_gen++; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0); + if (error) + return error; + + /* + * Clear the on-disk di_mode. This is to prevent xfs_bulkstat + * from picking up this inode when it is reclaimed (its incore state + * initialzed but not flushed to disk yet). The in-core di_mode is + * already cleared and a corresponding transaction logged. + * The hack here just synchronizes the in-core to on-disk + * di_mode value in advance before the actual inode sync to disk. + * This is OK because the inode is already unlinked and would never + * change its di_mode again for this inode generation. + * This is a temporary hack that would require a proper fix + * in the future. + */ + dip->di_core.di_mode = 0; + if (delete) { xfs_ifree_cluster(ip, tp, first_ino); } @@ -2735,7 +2742,6 @@ void xfs_idestroy( xfs_inode_t *ip) { - switch (ip->i_d.di_mode & S_IFMT) { case S_IFREG: case S_IFDIR: @@ -2749,7 +2755,7 @@ xfs_idestroy( mrfree(&ip->i_iolock); freesema(&ip->i_flock); -#ifdef XFS_VNODE_TRACE +#ifdef XFS_INODE_TRACE ktrace_free(ip->i_trace); #endif #ifdef XFS_BMAP_TRACE @@ -2775,16 +2781,15 @@ xfs_idestroy( */ xfs_mount_t *mp = ip->i_mount; xfs_log_item_t *lip = &ip->i_itemp->ili_item; - int s; ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || XFS_FORCED_SHUTDOWN(ip->i_mount)); if (lip->li_flags & XFS_LI_IN_AIL) { - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); if (lip->li_flags & XFS_LI_IN_AIL) - xfs_trans_delete_ail(mp, lip, s); + xfs_trans_delete_ail(mp, lip); else - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } xfs_inode_item_destroy(ip); } @@ -2816,40 +2821,8 @@ xfs_iunpin( { ASSERT(atomic_read(&ip->i_pincount) > 0); - if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) { - - /* - * If the inode is currently being reclaimed, the link between - * the bhv_vnode and the xfs_inode will be broken after the - * XFS_IRECLAIM* flag is set. Hence, if these flags are not - * set, then we can move forward and mark the linux inode dirty - * knowing that it is still valid as it won't freed until after - * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The - * i_flags_lock is used to synchronise the setting of the - * XFS_IRECLAIM* flags and the breaking of the link, and so we - * can execute atomically w.r.t to reclaim by holding this lock - * here. - * - * However, we still need to issue the unpin wakeup call as the - * inode reclaim may be blocked waiting for the inode to become - * unpinned. - */ - - if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) { - bhv_vnode_t *vp = XFS_ITOV_NULL(ip); - struct inode *inode = NULL; - - BUG_ON(vp == NULL); - inode = vn_to_inode(vp); - BUG_ON(inode->i_state & I_CLEAR); - - /* make sync come back and flush this inode */ - if (!(inode->i_state & (I_NEW|I_FREEING))) - mark_inode_dirty_sync(inode); - } - spin_unlock(&ip->i_flags_lock); + if (atomic_dec_and_test(&ip->i_pincount)) wake_up(&ip->i_ipin_wait); - } } /* @@ -3338,7 +3311,6 @@ xfs_iflush_int( #ifdef XFS_TRANS_DEBUG int first; #endif - SPLDECL(s); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); ASSERT(issemalocked(&(ip->i_flock))); @@ -3533,9 +3505,9 @@ xfs_iflush_int( iip->ili_logged = 1; ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); iip->ili_flush_lsn = iip->ili_item.li_lsn; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); /* * Attach the function xfs_iflush_done to the inode's @@ -3611,95 +3583,6 @@ xfs_iflush_all( XFS_MOUNT_IUNLOCK(mp); } -/* - * xfs_iaccess: check accessibility of inode for mode. - */ -int -xfs_iaccess( - xfs_inode_t *ip, - mode_t mode, - cred_t *cr) -{ - int error; - mode_t orgmode = mode; - struct inode *inode = vn_to_inode(XFS_ITOV(ip)); - - if (mode & S_IWUSR) { - umode_t imode = inode->i_mode; - - if (IS_RDONLY(inode) && - (S_ISREG(imode) || S_ISDIR(imode) || S_ISLNK(imode))) - return XFS_ERROR(EROFS); - - if (IS_IMMUTABLE(inode)) - return XFS_ERROR(EACCES); - } - - /* - * If there's an Access Control List it's used instead of - * the mode bits. - */ - if ((error = _ACL_XFS_IACCESS(ip, mode, cr)) != -1) - return error ? XFS_ERROR(error) : 0; - - if (current_fsuid(cr) != ip->i_d.di_uid) { - mode >>= 3; - if (!in_group_p((gid_t)ip->i_d.di_gid)) - mode >>= 3; - } - - /* - * If the DACs are ok we don't need any capability check. - */ - if ((ip->i_d.di_mode & mode) == mode) - return 0; - /* - * Read/write DACs are always overridable. - * Executable DACs are overridable if at least one exec bit is set. - */ - if (!(orgmode & S_IXUSR) || - (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) - if (capable_cred(cr, CAP_DAC_OVERRIDE)) - return 0; - - if ((orgmode == S_IRUSR) || - (S_ISDIR(inode->i_mode) && (!(orgmode & S_IWUSR)))) { - if (capable_cred(cr, CAP_DAC_READ_SEARCH)) - return 0; -#ifdef NOISE - cmn_err(CE_NOTE, "Ick: mode=%o, orgmode=%o", mode, orgmode); -#endif /* NOISE */ - return XFS_ERROR(EACCES); - } - return XFS_ERROR(EACCES); -} - -/* - * xfs_iroundup: round up argument to next power of two - */ -uint -xfs_iroundup( - uint v) -{ - int i; - uint m; - - if ((v & (v - 1)) == 0) - return v; - ASSERT((v & 0x80000000) == 0); - if ((v & (v + 1)) == 0) - return v + 1; - for (i = 0, m = 1; i < 31; i++, m <<= 1) { - if (v & m) - continue; - v |= m; - if ((v & (v + 1)) == 0) - return v + 1; - } - ASSERT(0); - return( 0 ); -} - #ifdef XFS_ILOCK_TRACE ktrace_t *xfs_ilock_trace_buf; @@ -4206,7 +4089,7 @@ xfs_iext_realloc_direct( return; } if (!is_power_of_2(new_size)){ - rnew_size = xfs_iroundup(new_size); + rnew_size = roundup_pow_of_two(new_size); } if (rnew_size != ifp->if_real_bytes) { ifp->if_u1.if_extents = @@ -4229,7 +4112,7 @@ xfs_iext_realloc_direct( else { new_size += ifp->if_bytes; if (!is_power_of_2(new_size)) { - rnew_size = xfs_iroundup(new_size); + rnew_size = roundup_pow_of_two(new_size); } xfs_iext_inline_to_direct(ifp, rnew_size); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index e5aff929cc65..bfcd72cbaeea 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -132,45 +132,6 @@ typedef struct dm_attrs_s { __uint16_t da_pad; /* DMIG extra padding */ } dm_attrs_t; -typedef struct xfs_iocore { - void *io_obj; /* pointer to container - * inode or dcxvn structure */ - struct xfs_mount *io_mount; /* fs mount struct ptr */ -#ifdef DEBUG - mrlock_t *io_lock; /* inode IO lock */ - mrlock_t *io_iolock; /* inode IO lock */ -#endif - - /* I/O state */ - xfs_fsize_t io_new_size; /* sz when write completes */ - - /* Miscellaneous state. */ - unsigned int io_flags; /* IO related flags */ - - /* DMAPI state */ - dm_attrs_t io_dmattrs; - -} xfs_iocore_t; - -#define io_dmevmask io_dmattrs.da_dmevmask -#define io_dmstate io_dmattrs.da_dmstate - -#define XFS_IO_INODE(io) ((xfs_inode_t *) ((io)->io_obj)) -#define XFS_IO_DCXVN(io) ((dcxvn_t *) ((io)->io_obj)) - -/* - * Flags in the flags field - */ - -#define XFS_IOCORE_RT 0x1 - -/* - * xfs_iocore prototypes - */ - -extern void xfs_iocore_inode_init(struct xfs_inode *); -extern void xfs_iocore_inode_reinit(struct xfs_inode *); - /* * This is the xfs inode cluster structure. This structure is used by * xfs_iflush to find inodes that share a cluster and can be flushed to disk at @@ -181,7 +142,7 @@ typedef struct xfs_icluster { xfs_daddr_t icl_blkno; /* starting block number of * the cluster */ struct xfs_buf *icl_buf; /* the inode buffer */ - lock_t icl_lock; /* inode list lock */ + spinlock_t icl_lock; /* inode list lock */ } xfs_icluster_t; /* @@ -283,9 +244,6 @@ typedef struct xfs_inode { struct xfs_inode **i_refcache; /* ptr to entry in ref cache */ struct xfs_inode *i_release; /* inode to unref */ #endif - /* I/O state */ - xfs_iocore_t i_iocore; /* I/O core */ - /* Miscellaneous state. */ unsigned short i_flags; /* see defined flags below */ unsigned char i_update_core; /* timestamps/size is dirty */ @@ -298,9 +256,10 @@ typedef struct xfs_inode { struct hlist_node i_cnode; /* cluster link node */ xfs_fsize_t i_size; /* in-memory size */ + xfs_fsize_t i_new_size; /* size when write completes */ atomic_t i_iocount; /* outstanding I/O count */ /* Trace buffers per inode. */ -#ifdef XFS_VNODE_TRACE +#ifdef XFS_INODE_TRACE struct ktrace *i_trace; /* general inode trace */ #endif #ifdef XFS_BMAP_TRACE @@ -382,17 +341,42 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) /* * Fork handling. */ -#define XFS_IFORK_PTR(ip,w) \ - ((w) == XFS_DATA_FORK ? &(ip)->i_df : (ip)->i_afp) -#define XFS_IFORK_Q(ip) XFS_CFORK_Q(&(ip)->i_d) -#define XFS_IFORK_DSIZE(ip) XFS_CFORK_DSIZE(&ip->i_d, ip->i_mount) -#define XFS_IFORK_ASIZE(ip) XFS_CFORK_ASIZE(&ip->i_d, ip->i_mount) -#define XFS_IFORK_SIZE(ip,w) XFS_CFORK_SIZE(&ip->i_d, ip->i_mount, w) -#define XFS_IFORK_FORMAT(ip,w) XFS_CFORK_FORMAT(&ip->i_d, w) -#define XFS_IFORK_FMT_SET(ip,w,n) XFS_CFORK_FMT_SET(&ip->i_d, w, n) -#define XFS_IFORK_NEXTENTS(ip,w) XFS_CFORK_NEXTENTS(&ip->i_d, w) -#define XFS_IFORK_NEXT_SET(ip,w,n) XFS_CFORK_NEXT_SET(&ip->i_d, w, n) +#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) +#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) + +#define XFS_IFORK_PTR(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + &(ip)->i_df : \ + (ip)->i_afp) +#define XFS_IFORK_DSIZE(ip) \ + (XFS_IFORK_Q(ip) ? \ + XFS_IFORK_BOFF(ip) : \ + XFS_LITINO((ip)->i_mount)) +#define XFS_IFORK_ASIZE(ip) \ + (XFS_IFORK_Q(ip) ? \ + XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ + 0) +#define XFS_IFORK_SIZE(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + XFS_IFORK_DSIZE(ip) : \ + XFS_IFORK_ASIZE(ip)) +#define XFS_IFORK_FORMAT(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + (ip)->i_d.di_format : \ + (ip)->i_d.di_aformat) +#define XFS_IFORK_FMT_SET(ip,w,n) \ + ((w) == XFS_DATA_FORK ? \ + ((ip)->i_d.di_format = (n)) : \ + ((ip)->i_d.di_aformat = (n))) +#define XFS_IFORK_NEXTENTS(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + (ip)->i_d.di_nextents : \ + (ip)->i_d.di_anextents) +#define XFS_IFORK_NEXT_SET(ip,w,n) \ + ((w) == XFS_DATA_FORK ? \ + ((ip)->i_d.di_nextents = (n)) : \ + ((ip)->i_d.di_anextents = (n))) #ifdef __KERNEL__ @@ -509,7 +493,6 @@ void xfs_ihash_init(struct xfs_mount *); void xfs_ihash_free(struct xfs_mount *); xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, struct xfs_trans *); -void xfs_inode_lock_init(xfs_inode_t *, bhv_vnode_t *); int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, uint, uint, xfs_inode_t **, xfs_daddr_t); void xfs_iput(xfs_inode_t *, uint); @@ -545,7 +528,7 @@ void xfs_dinode_to_disk(struct xfs_dinode_core *, struct xfs_icdinode *); uint xfs_ip2xflags(struct xfs_inode *); -uint xfs_dic2xflags(struct xfs_dinode_core *); +uint xfs_dic2xflags(struct xfs_dinode *); int xfs_ifree(struct xfs_trans *, xfs_inode_t *, struct xfs_bmap_free *); int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); @@ -567,13 +550,12 @@ void xfs_iunpin(xfs_inode_t *); int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int); int xfs_iflush(xfs_inode_t *, uint); void xfs_iflush_all(struct xfs_mount *); -int xfs_iaccess(xfs_inode_t *, mode_t, cred_t *); -uint xfs_iroundup(uint); void xfs_ichgtime(xfs_inode_t *, int); xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); void xfs_lock_inodes(xfs_inode_t **, int, int, uint); void xfs_synchronize_atime(xfs_inode_t *); +void xfs_mark_inode_dirty_sync(xfs_inode_t *); xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 565d470a6b4a..034ca7202295 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -274,6 +274,11 @@ xfs_inode_item_format( */ xfs_synchronize_atime(ip); + /* + * make sure the linux inode is dirty + */ + xfs_mark_inode_dirty_sync(ip); + vecp->i_addr = (xfs_caddr_t)&ip->i_d; vecp->i_len = sizeof(xfs_dinode_core_t); XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); @@ -615,7 +620,7 @@ xfs_inode_item_trylock( return XFS_ITEM_PUSHBUF; } else { /* - * We hold the AIL_LOCK, so we must specify the + * We hold the AIL lock, so we must specify the * NONOTIFY flag so that we won't double trip. */ xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); @@ -749,7 +754,7 @@ xfs_inode_item_committed( * marked delayed write. If that's the case, we'll initiate a bawrite on that * buffer to expedite the process. * - * We aren't holding the AIL_LOCK (or the flush lock) when this gets called, + * We aren't holding the AIL lock (or the flush lock) when this gets called, * so it is inherently race-y. */ STATIC void @@ -792,7 +797,7 @@ xfs_inode_item_pushbuf( if (XFS_BUF_ISDELAYWRITE(bp)) { /* * We were racing with iflush because we don't hold - * the AIL_LOCK or the flush lock. However, at this point, + * the AIL lock or the flush lock. However, at this point, * we have the buffer, and we know that it's dirty. * So, it's possible that iflush raced with us, and * this item is already taken off the AIL. @@ -968,7 +973,6 @@ xfs_iflush_done( xfs_inode_log_item_t *iip) { xfs_inode_t *ip; - SPLDECL(s); ip = iip->ili_inode; @@ -983,15 +987,15 @@ xfs_iflush_done( */ if (iip->ili_logged && (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { - AIL_LOCK(ip->i_mount, s); + spin_lock(&ip->i_mount->m_ail_lock); if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { /* * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(ip->i_mount, - (xfs_log_item_t*)iip, s); + (xfs_log_item_t*)iip); } else { - AIL_UNLOCK(ip->i_mount, s); + spin_unlock(&ip->i_mount->m_ail_lock); } } @@ -1025,21 +1029,19 @@ xfs_iflush_abort( { xfs_inode_log_item_t *iip; xfs_mount_t *mp; - SPLDECL(s); iip = ip->i_itemp; mp = ip->i_mount; if (iip) { if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { /* * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip, - s); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip); } else - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } iip->ili_logged = 0; /* diff --git a/fs/xfs/xfs_iocore.c b/fs/xfs/xfs_iocore.c deleted file mode 100644 index b27b5d5be841..000000000000 --- a/fs/xfs/xfs_iocore.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dfrag.h" -#include "xfs_dmapi.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_itable.h" -#include "xfs_btree.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" -#include "xfs_bmap.h" -#include "xfs_error.h" -#include "xfs_rw.h" -#include "xfs_quota.h" -#include "xfs_trans_space.h" -#include "xfs_iomap.h" - - -STATIC xfs_fsize_t -xfs_size_fn( - xfs_inode_t *ip) -{ - return XFS_ISIZE(ip); -} - -STATIC int -xfs_ioinit( - struct xfs_mount *mp, - struct xfs_mount_args *mntargs, - int flags) -{ - return xfs_mountfs(mp, flags); -} - -xfs_ioops_t xfs_iocore_xfs = { - .xfs_ioinit = (xfs_ioinit_t) xfs_ioinit, - .xfs_bmapi_func = (xfs_bmapi_t) xfs_bmapi, - .xfs_bunmapi_func = (xfs_bunmapi_t) xfs_bunmapi, - .xfs_bmap_eof_func = (xfs_bmap_eof_t) xfs_bmap_eof, - .xfs_iomap_write_direct = - (xfs_iomap_write_direct_t) xfs_iomap_write_direct, - .xfs_iomap_write_delay = - (xfs_iomap_write_delay_t) xfs_iomap_write_delay, - .xfs_iomap_write_allocate = - (xfs_iomap_write_allocate_t) xfs_iomap_write_allocate, - .xfs_iomap_write_unwritten = - (xfs_iomap_write_unwritten_t) xfs_iomap_write_unwritten, - .xfs_ilock = (xfs_lock_t) xfs_ilock, - .xfs_lck_map_shared = (xfs_lck_map_shared_t) xfs_ilock_map_shared, - .xfs_ilock_demote = (xfs_lock_demote_t) xfs_ilock_demote, - .xfs_ilock_nowait = (xfs_lock_nowait_t) xfs_ilock_nowait, - .xfs_unlock = (xfs_unlk_t) xfs_iunlock, - .xfs_size_func = (xfs_size_t) xfs_size_fn, - .xfs_iodone = (xfs_iodone_t) fs_noerr, - .xfs_swap_extents_func = (xfs_swap_extents_t) xfs_swap_extents, -}; - -void -xfs_iocore_inode_reinit( - xfs_inode_t *ip) -{ - xfs_iocore_t *io = &ip->i_iocore; - - io->io_flags = 0; - if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) - io->io_flags |= XFS_IOCORE_RT; - io->io_dmevmask = ip->i_d.di_dmevmask; - io->io_dmstate = ip->i_d.di_dmstate; -} - -void -xfs_iocore_inode_init( - xfs_inode_t *ip) -{ - xfs_iocore_t *io = &ip->i_iocore; - xfs_mount_t *mp = ip->i_mount; - - io->io_mount = mp; -#ifdef DEBUG - io->io_lock = &ip->i_lock; - io->io_iolock = &ip->i_iolock; -#endif - - io->io_obj = (void *)ip; - - xfs_iocore_inode_reinit(ip); -} diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 72786e356d56..fde37f87d52f 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -53,12 +53,10 @@ void xfs_iomap_enter_trace( int tag, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, ssize_t count) { - xfs_inode_t *ip = XFS_IO_INODE(io); - if (!ip->i_rwtrace) return; @@ -70,8 +68,8 @@ xfs_iomap_enter_trace( (void *)((unsigned long)((offset >> 32) & 0xffffffff)), (void *)((unsigned long)(offset & 0xffffffff)), (void *)((unsigned long)count), - (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)), - (void *)((unsigned long)(io->io_new_size & 0xffffffff)), + (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)), + (void *)((unsigned long)(ip->i_new_size & 0xffffffff)), (void *)((unsigned long)current_pid()), (void *)NULL, (void *)NULL, @@ -84,15 +82,13 @@ xfs_iomap_enter_trace( void xfs_iomap_map_trace( int tag, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, ssize_t count, xfs_iomap_t *iomapp, xfs_bmbt_irec_t *imapp, int flags) { - xfs_inode_t *ip = XFS_IO_INODE(io); - if (!ip->i_rwtrace) return; @@ -126,7 +122,7 @@ xfs_iomap_map_trace( STATIC int xfs_imap_to_bmap( - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, xfs_bmbt_irec_t *imap, xfs_iomap_t *iomapp, @@ -134,11 +130,10 @@ xfs_imap_to_bmap( int iomaps, /* Number of iomap entries */ int flags) { - xfs_mount_t *mp; + xfs_mount_t *mp = ip->i_mount; int pbm; xfs_fsblock_t start_block; - mp = io->io_mount; for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) { iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff); @@ -146,7 +141,7 @@ xfs_imap_to_bmap( iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount); iomapp->iomap_flags = flags; - if (io->io_flags & XFS_IOCORE_RT) { + if (XFS_IS_REALTIME_INODE(ip)) { iomapp->iomap_flags |= IOMAP_REALTIME; iomapp->iomap_target = mp->m_rtdev_targp; } else { @@ -160,7 +155,7 @@ xfs_imap_to_bmap( iomapp->iomap_bn = IOMAP_DADDR_NULL; iomapp->iomap_flags |= IOMAP_DELAY; } else { - iomapp->iomap_bn = XFS_FSB_TO_DB_IO(io, start_block); + iomapp->iomap_bn = XFS_FSB_TO_DB(ip, start_block); if (ISUNWRITTEN(imap)) iomapp->iomap_flags |= IOMAP_UNWRITTEN; } @@ -172,14 +167,14 @@ xfs_imap_to_bmap( int xfs_iomap( - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, ssize_t count, int flags, xfs_iomap_t *iomapp, int *niomaps) { - xfs_mount_t *mp = io->io_mount; + xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb, end_fsb; int error = 0; int lockmode = 0; @@ -188,45 +183,37 @@ xfs_iomap( int bmapi_flags = 0; int iomap_flags = 0; + ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); + if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - switch (flags & - (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE | - BMAPI_UNWRITTEN | BMAPI_DEVICE)) { + switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { case BMAPI_READ: - xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count); - lockmode = XFS_LCK_MAP_SHARED(mp, io); + xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, ip, offset, count); + lockmode = xfs_ilock_map_shared(ip); bmapi_flags = XFS_BMAPI_ENTIRE; break; case BMAPI_WRITE: - xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count); + xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count); lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; if (flags & BMAPI_IGNSTATE) bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; - XFS_ILOCK(mp, io, lockmode); + xfs_ilock(ip, lockmode); break; case BMAPI_ALLOCATE: - xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, io, offset, count); + xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count); lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD; bmapi_flags = XFS_BMAPI_ENTIRE; + /* Attempt non-blocking lock */ if (flags & BMAPI_TRYLOCK) { - if (!XFS_ILOCK_NOWAIT(mp, io, lockmode)) + if (!xfs_ilock_nowait(ip, lockmode)) return XFS_ERROR(EAGAIN); } else { - XFS_ILOCK(mp, io, lockmode); + xfs_ilock(ip, lockmode); } break; - case BMAPI_UNWRITTEN: - goto phase2; - case BMAPI_DEVICE: - lockmode = XFS_LCK_MAP_SHARED(mp, io); - iomapp->iomap_target = io->io_flags & XFS_IOCORE_RT ? - mp->m_rtdev_targp : mp->m_ddev_targp; - error = 0; - *niomaps = 1; - goto out; default: BUG(); } @@ -237,7 +224,7 @@ xfs_iomap( end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); - error = XFS_BMAPI(mp, NULL, io, offset_fsb, + error = xfs_bmapi(NULL, ip, offset_fsb, (xfs_filblks_t)(end_fsb - offset_fsb), bmapi_flags, NULL, 0, &imap, &nimaps, NULL, NULL); @@ -245,54 +232,48 @@ xfs_iomap( if (error) goto out; -phase2: - switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE|BMAPI_UNWRITTEN)) { + switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { case BMAPI_WRITE: /* If we found an extent, return it */ if (nimaps && (imap.br_startblock != HOLESTARTBLOCK) && (imap.br_startblock != DELAYSTARTBLOCK)) { - xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io, + xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, offset, count, iomapp, &imap, flags); break; } if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { - error = XFS_IOMAP_WRITE_DIRECT(mp, io, offset, - count, flags, &imap, &nimaps, nimaps); + error = xfs_iomap_write_direct(ip, offset, count, flags, + &imap, &nimaps, nimaps); } else { - error = XFS_IOMAP_WRITE_DELAY(mp, io, offset, count, - flags, &imap, &nimaps); + error = xfs_iomap_write_delay(ip, offset, count, flags, + &imap, &nimaps); } if (!error) { - xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, io, + xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, ip, offset, count, iomapp, &imap, flags); } iomap_flags = IOMAP_NEW; break; case BMAPI_ALLOCATE: /* If we found an extent, return it */ - XFS_IUNLOCK(mp, io, lockmode); + xfs_iunlock(ip, lockmode); lockmode = 0; if (nimaps && !ISNULLSTARTBLOCK(imap.br_startblock)) { - xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io, + xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, offset, count, iomapp, &imap, flags); break; } - error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, offset, count, + error = xfs_iomap_write_allocate(ip, offset, count, &imap, &nimaps); break; - case BMAPI_UNWRITTEN: - lockmode = 0; - error = XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count); - nimaps = 0; - break; } if (nimaps) { - *niomaps = xfs_imap_to_bmap(io, offset, &imap, + *niomaps = xfs_imap_to_bmap(ip, offset, &imap, iomapp, nimaps, *niomaps, iomap_flags); } else if (niomaps) { *niomaps = 0; @@ -300,14 +281,15 @@ phase2: out: if (lockmode) - XFS_IUNLOCK(mp, io, lockmode); + xfs_iunlock(ip, lockmode); return XFS_ERROR(error); } + STATIC int xfs_iomap_eof_align_last_fsb( xfs_mount_t *mp, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_fsize_t isize, xfs_extlen_t extsize, xfs_fileoff_t *last_fsb) @@ -316,7 +298,7 @@ xfs_iomap_eof_align_last_fsb( xfs_extlen_t align; int eof, error; - if (io->io_flags & XFS_IOCORE_RT) + if (XFS_IS_REALTIME_INODE(ip)) ; /* * If mounted with the "-o swalloc" option, roundup the allocation @@ -347,7 +329,7 @@ xfs_iomap_eof_align_last_fsb( } if (new_last_fsb) { - error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof); + error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); if (error) return error; if (eof) @@ -416,7 +398,6 @@ xfs_iomap_write_direct( int found) { xfs_mount_t *mp = ip->i_mount; - xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb; xfs_fileoff_t last_fsb; xfs_filblks_t count_fsb, resaligned; @@ -446,13 +427,13 @@ xfs_iomap_write_direct( extsz = xfs_get_extsz_hint(ip); isize = ip->i_size; - if (io->io_new_size > isize) - isize = io->io_new_size; + if (ip->i_new_size > isize) + isize = ip->i_new_size; offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); if ((offset + count) > isize) { - error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz, + error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, &last_fsb); if (error) goto error_out; @@ -519,7 +500,7 @@ xfs_iomap_write_direct( */ XFS_BMAP_INIT(&free_list, &firstfsb); nimaps = 1; - error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, bmapi_flag, + error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, &firstfsb, 0, &imap, &nimaps, &free_list, NULL); if (error) goto error0; @@ -542,7 +523,8 @@ xfs_iomap_write_direct( goto error_out; } - if (unlikely(!imap.br_startblock && !(io->io_flags & XFS_IOCORE_RT))) { + if (unlikely(!imap.br_startblock && + !(XFS_IS_REALTIME_INODE(ip)))) { error = xfs_cmn_err_fsblock_zero(ip, &imap); goto error_out; } @@ -577,7 +559,7 @@ error_out: STATIC int xfs_iomap_eof_want_preallocate( xfs_mount_t *mp, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_fsize_t isize, xfs_off_t offset, size_t count, @@ -604,7 +586,7 @@ xfs_iomap_eof_want_preallocate( while (count_fsb > 0) { imaps = nimaps; firstblock = NULLFSBLOCK; - error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 0, + error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0, &firstblock, 0, imap, &imaps, NULL, NULL); if (error) return error; @@ -630,7 +612,6 @@ xfs_iomap_write_delay( int *nmaps) { xfs_mount_t *mp = ip->i_mount; - xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb; xfs_fileoff_t last_fsb; xfs_off_t aligned_offset; @@ -658,10 +639,10 @@ xfs_iomap_write_delay( retry: isize = ip->i_size; - if (io->io_new_size > isize) - isize = io->io_new_size; + if (ip->i_new_size > isize) + isize = ip->i_new_size; - error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count, + error = xfs_iomap_eof_want_preallocate(mp, ip, isize, offset, count, ioflag, imap, XFS_WRITE_IMAPS, &prealloc); if (error) return error; @@ -675,7 +656,7 @@ retry: } if (prealloc || extsz) { - error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz, + error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, &last_fsb); if (error) return error; @@ -683,7 +664,7 @@ retry: nimaps = XFS_WRITE_IMAPS; firstblock = NULLFSBLOCK; - error = XFS_BMAPI(mp, NULL, io, offset_fsb, + error = xfs_bmapi(NULL, ip, offset_fsb, (xfs_filblks_t)(last_fsb - offset_fsb), XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, @@ -697,7 +678,7 @@ retry: */ if (nimaps == 0) { xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, - io, offset, count); + ip, offset, count); if (xfs_flush_space(ip, &fsynced, &ioflag)) return XFS_ERROR(ENOSPC); @@ -705,7 +686,8 @@ retry: goto retry; } - if (unlikely(!imap[0].br_startblock && !(io->io_flags & XFS_IOCORE_RT))) + if (unlikely(!imap[0].br_startblock && + !(XFS_IS_REALTIME_INODE(ip)))) return xfs_cmn_err_fsblock_zero(ip, &imap[0]); *ret_imap = imap[0]; @@ -720,6 +702,9 @@ retry: * the originating callers request. * * Called without a lock on the inode. + * + * We no longer bother to look at the incoming map - all we have to + * guarantee is that whatever we allocate fills the required range. */ int xfs_iomap_write_allocate( @@ -730,15 +715,14 @@ xfs_iomap_write_allocate( int *retmap) { xfs_mount_t *mp = ip->i_mount; - xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb, last_block; xfs_fileoff_t end_fsb, map_start_fsb; xfs_fsblock_t first_block; xfs_bmap_free_t free_list; xfs_filblks_t count_fsb; - xfs_bmbt_irec_t imap[XFS_STRAT_WRITE_IMAPS]; + xfs_bmbt_irec_t imap; xfs_trans_t *tp; - int i, nimaps, committed; + int nimaps, committed; int error = 0; int nres; @@ -785,13 +769,38 @@ xfs_iomap_write_allocate( XFS_BMAP_INIT(&free_list, &first_block); - nimaps = XFS_STRAT_WRITE_IMAPS; /* - * Ensure we don't go beyond eof - it is possible - * the extents changed since we did the read call, - * we dropped the ilock in the interim. + * it is possible that the extents have changed since + * we did the read call as we dropped the ilock for a + * while. We have to be careful about truncates or hole + * punchs here - we are not allowed to allocate + * non-delalloc blocks here. + * + * The only protection against truncation is the pages + * for the range we are being asked to convert are + * locked and hence a truncate will block on them + * first. + * + * As a result, if we go beyond the range we really + * need and hit an delalloc extent boundary followed by + * a hole while we have excess blocks in the map, we + * will fill the hole incorrectly and overrun the + * transaction reservation. + * + * Using a single map prevents this as we are forced to + * check each map we look for overlap with the desired + * range and abort as soon as we find it. Also, given + * that we only return a single map, having one beyond + * what we can return is probably a bit silly. + * + * We also need to check that we don't go beyond EOF; + * this is a truncate optimisation as a truncate sets + * the new file size before block on the pages we + * currently have locked under writeback. Because they + * are about to be tossed, we don't need to write them + * back.... */ - + nimaps = 1; end_fsb = XFS_B_TO_FSB(mp, ip->i_size); xfs_bmap_last_offset(NULL, ip, &last_block, XFS_DATA_FORK); @@ -805,9 +814,9 @@ xfs_iomap_write_allocate( } /* Go get the actual blocks */ - error = XFS_BMAPI(mp, tp, io, map_start_fsb, count_fsb, + error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, XFS_BMAPI_WRITE, &first_block, 1, - imap, &nimaps, &free_list, NULL); + &imap, &nimaps, &free_list, NULL); if (error) goto trans_cancel; @@ -826,27 +835,24 @@ xfs_iomap_write_allocate( * See if we were able to allocate an extent that * covers at least part of the callers request */ - for (i = 0; i < nimaps; i++) { - if (unlikely(!imap[i].br_startblock && - !(io->io_flags & XFS_IOCORE_RT))) - return xfs_cmn_err_fsblock_zero(ip, &imap[i]); - if ((offset_fsb >= imap[i].br_startoff) && - (offset_fsb < (imap[i].br_startoff + - imap[i].br_blockcount))) { - *map = imap[i]; - *retmap = 1; - XFS_STATS_INC(xs_xstrat_quick); - return 0; - } - count_fsb -= imap[i].br_blockcount; + if (unlikely(!imap.br_startblock && + XFS_IS_REALTIME_INODE(ip))) + return xfs_cmn_err_fsblock_zero(ip, &imap); + if ((offset_fsb >= imap.br_startoff) && + (offset_fsb < (imap.br_startoff + + imap.br_blockcount))) { + *map = imap; + *retmap = 1; + XFS_STATS_INC(xs_xstrat_quick); + return 0; } - /* So far we have not mapped the requested part of the + /* + * So far we have not mapped the requested part of the * file, just surrounding data, try again. */ - nimaps--; - map_start_fsb = imap[nimaps].br_startoff + - imap[nimaps].br_blockcount; + count_fsb -= imap.br_blockcount; + map_start_fsb = imap.br_startoff + imap.br_blockcount; } trans_cancel: @@ -864,7 +870,6 @@ xfs_iomap_write_unwritten( size_t count) { xfs_mount_t *mp = ip->i_mount; - xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb; xfs_filblks_t count_fsb; xfs_filblks_t numblks_fsb; @@ -877,8 +882,7 @@ xfs_iomap_write_unwritten( int committed; int error; - xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, - &ip->i_iocore, offset, count); + xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, ip, offset, count); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); @@ -912,7 +916,7 @@ xfs_iomap_write_unwritten( */ XFS_BMAP_INIT(&free_list, &firstfsb); nimaps = 1; - error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, + error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, 1, &imap, &nimaps, &free_list, NULL); if (error) @@ -928,7 +932,7 @@ xfs_iomap_write_unwritten( return XFS_ERROR(error); if (unlikely(!imap.br_startblock && - !(io->io_flags & XFS_IOCORE_RT))) + !(XFS_IS_REALTIME_INODE(ip)))) return xfs_cmn_err_fsblock_zero(ip, &imap); if ((numblks_fsb = imap.br_blockcount) == 0) { diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index f5c09887fe93..ee1a0c134cc2 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -36,14 +36,12 @@ typedef enum { BMAPI_READ = (1 << 0), /* read extents */ BMAPI_WRITE = (1 << 1), /* create extents */ BMAPI_ALLOCATE = (1 << 2), /* delayed allocate to real extents */ - BMAPI_UNWRITTEN = (1 << 3), /* unwritten extents to real extents */ /* modifiers */ BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ BMAPI_SYNC = (1 << 7), /* sync write to flush delalloc space */ BMAPI_TRYLOCK = (1 << 8), /* non-blocking request */ - BMAPI_DEVICE = (1 << 9), /* we only want to know the device */ } bmapi_flags_t; @@ -73,11 +71,10 @@ typedef struct xfs_iomap { iomap_flags_t iomap_flags; } xfs_iomap_t; -struct xfs_iocore; struct xfs_inode; struct xfs_bmbt_irec; -extern int xfs_iomap(struct xfs_iocore *, xfs_off_t, ssize_t, int, +extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, struct xfs_iomap *, int *); extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, int, struct xfs_bmbt_irec *, int *, int); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 9fc4c2886529..658aab6b1bbf 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -170,7 +170,7 @@ xfs_bulkstat_one_dinode( buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); - buf->bs_xflags = xfs_dic2xflags(dic); + buf->bs_xflags = xfs_dic2xflags(dip); buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; buf->bs_extents = be32_to_cpu(dic->di_nextents); buf->bs_gen = be32_to_cpu(dic->di_gen); @@ -291,7 +291,7 @@ xfs_bulkstat_use_dinode( dip = (xfs_dinode_t *) xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog); /* - * Check the buffer containing the on-disk inode for di_nlink == 0. + * Check the buffer containing the on-disk inode for di_mode == 0. * This is to prevent xfs_bulkstat from picking up just reclaimed * inodes that have their in-core state initialized but not flushed * to disk yet. This is a temporary hack that would require a proper @@ -299,7 +299,7 @@ xfs_bulkstat_use_dinode( */ if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC || !XFS_DINODE_GOOD_VERSION(dip->di_core.di_version) || - !dip->di_core.di_nlink) + !dip->di_core.di_mode) return 0; if (flags & BULKSTAT_FG_QUICK) { *dipp = dip; @@ -307,7 +307,7 @@ xfs_bulkstat_use_dinode( } /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ aformat = dip->di_core.di_aformat; - if ((XFS_CFORK_Q(&dip->di_core) == 0) || + if ((XFS_DFORK_Q(dip) == 0) || (aformat == XFS_DINODE_FMT_LOCAL) || (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) { *dipp = dip; @@ -399,7 +399,7 @@ xfs_bulkstat( (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); nimask = ~(nicluster - 1); nbcluster = nicluster >> mp->m_sb.sb_inopblog; - irbuf = kmem_zalloc_greedy(&irbsize, NBPC, NBPC * 4, + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4, KM_SLEEP | KM_MAYFAIL | KM_LARGE); nirbuf = irbsize / sizeof(*irbuf); @@ -830,7 +830,7 @@ xfs_inumbers( agino = XFS_INO_TO_AGINO(mp, ino); left = *count; *count = 0; - bcount = MIN(left, (int)(NBPP / sizeof(*buffer))); + bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer))); buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); error = bufidx = 0; cur = NULL; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 77c12715a7d0..a75edca1860f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -399,10 +399,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ { xlog_t *log = mp->m_log; xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; - int abortflg, spl; + int abortflg; cb->cb_next = NULL; - spl = LOG_LOCK(log); + spin_lock(&log->l_icloglock); abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); if (!abortflg) { ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || @@ -411,7 +411,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ *(iclog->ic_callback_tail) = cb; iclog->ic_callback_tail = &(cb->cb_next); } - LOG_UNLOCK(log, spl); + spin_unlock(&log->l_icloglock); return abortflg; } /* xfs_log_notify */ @@ -498,11 +498,14 @@ xfs_log_reserve(xfs_mount_t *mp, * Return error or zero. */ int -xfs_log_mount(xfs_mount_t *mp, - xfs_buftarg_t *log_target, - xfs_daddr_t blk_offset, - int num_bblks) +xfs_log_mount( + xfs_mount_t *mp, + xfs_buftarg_t *log_target, + xfs_daddr_t blk_offset, + int num_bblks) { + int error; + if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); else { @@ -515,11 +518,21 @@ xfs_log_mount(xfs_mount_t *mp, mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); /* + * Initialize the AIL now we have a log. + */ + spin_lock_init(&mp->m_ail_lock); + error = xfs_trans_ail_init(mp); + if (error) { + cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); + goto error; + } + + /* * skip log recovery on a norecovery mount. pretend it all * just worked. */ if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { - int error, readonly = (mp->m_flags & XFS_MOUNT_RDONLY); + int readonly = (mp->m_flags & XFS_MOUNT_RDONLY); if (readonly) mp->m_flags &= ~XFS_MOUNT_RDONLY; @@ -530,8 +543,7 @@ xfs_log_mount(xfs_mount_t *mp, mp->m_flags |= XFS_MOUNT_RDONLY; if (error) { cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); - xlog_dealloc_log(mp->m_log); - return error; + goto error; } } @@ -540,6 +552,9 @@ xfs_log_mount(xfs_mount_t *mp, /* End mounting message in xfs_log_mount_finish */ return 0; +error: + xfs_log_unmount_dealloc(mp); + return error; } /* xfs_log_mount */ /* @@ -606,7 +621,6 @@ xfs_log_unmount_write(xfs_mount_t *mp) xfs_log_ticket_t tic = NULL; xfs_lsn_t lsn; int error; - SPLDECL(s); /* the data section must be 32 bit size aligned */ struct { @@ -659,24 +673,24 @@ xfs_log_unmount_write(xfs_mount_t *mp) } - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; iclog->ic_refcnt++; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); (void) xlog_state_release_iclog(log, iclog); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (!(iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY)) { if (!XLOG_FORCED_SHUTDOWN(log)) { sv_wait(&iclog->ic_forcesema, PMEM, &log->l_icloglock, s); } else { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } } else { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } if (tic) { xlog_trace_loggrant(log, tic, "unmount rec"); @@ -697,15 +711,15 @@ xfs_log_unmount_write(xfs_mount_t *mp) * a file system that went into forced_shutdown as * the result of an unmount.. */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; iclog->ic_refcnt++; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); (void) xlog_state_release_iclog(log, iclog); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if ( ! ( iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY @@ -714,7 +728,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) sv_wait(&iclog->ic_forcesema, PMEM, &log->l_icloglock, s); } else { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } } @@ -723,10 +737,14 @@ xfs_log_unmount_write(xfs_mount_t *mp) /* * Deallocate log structures for unmount/relocation. + * + * We need to stop the aild from running before we destroy + * and deallocate the log as the aild references the log. */ void xfs_log_unmount_dealloc(xfs_mount_t *mp) { + xfs_trans_ail_destroy(mp); xlog_dealloc_log(mp->m_log); } @@ -762,20 +780,18 @@ xfs_log_move_tail(xfs_mount_t *mp, xlog_ticket_t *tic; xlog_t *log = mp->m_log; int need_bytes, free_bytes, cycle, bytes; - SPLDECL(s); if (XLOG_FORCED_SHUTDOWN(log)) return; - ASSERT(!XFS_FORCED_SHUTDOWN(mp)); if (tail_lsn == 0) { /* needed since sync_lsn is 64 bits */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); tail_lsn = log->l_last_sync_lsn; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); /* Also an invalid lsn. 1 implies that we aren't passing in a valid * tail_lsn. @@ -824,7 +840,7 @@ xfs_log_move_tail(xfs_mount_t *mp, tic = tic->t_next; } while (tic != log->l_reserve_headq); } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); } /* xfs_log_move_tail */ /* @@ -836,14 +852,13 @@ xfs_log_move_tail(xfs_mount_t *mp, int xfs_log_need_covered(xfs_mount_t *mp) { - SPLDECL(s); int needed = 0, gen; xlog_t *log = mp->m_log; if (!xfs_fs_writable(mp)) return 0; - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || (log->l_covered_state == XLOG_STATE_COVER_NEED2)) && !xfs_trans_first_ail(mp, &gen) @@ -856,7 +871,7 @@ xfs_log_need_covered(xfs_mount_t *mp) } needed = 1; } - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return needed; } @@ -881,17 +896,16 @@ xfs_lsn_t xlog_assign_tail_lsn(xfs_mount_t *mp) { xfs_lsn_t tail_lsn; - SPLDECL(s); xlog_t *log = mp->m_log; tail_lsn = xfs_trans_tail_ail(mp); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); if (tail_lsn != 0) { log->l_tail_lsn = tail_lsn; } else { tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn; } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return tail_lsn; } /* xlog_assign_tail_lsn */ @@ -911,7 +925,7 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) * the tail. The details of this case are described below, but the end * result is that we return the size of the log as the amount of space left. */ -int +STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes) { int free_bytes; @@ -1165,7 +1179,7 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_flags |= XLOG_ACTIVE_RECOVERY; log->l_prev_block = -1; - ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, 1, 0); + log->l_tail_lsn = xlog_assign_lsn(1, 0); /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ log->l_last_sync_lsn = log->l_tail_lsn; log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ @@ -1193,8 +1207,8 @@ xlog_alloc_log(xfs_mount_t *mp, ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); log->l_xbuf = bp; - spinlock_init(&log->l_icloglock, "iclog"); - spinlock_init(&log->l_grant_lock, "grhead_iclog"); + spin_lock_init(&log->l_icloglock); + spin_lock_init(&log->l_grant_lock); initnsema(&log->l_flushsema, 0, "ic-flush"); xlog_state_ticket_alloc(log); /* wait until after icloglock inited */ @@ -1231,12 +1245,12 @@ xlog_alloc_log(xfs_mount_t *mp, head = &iclog->ic_header; memset(head, 0, sizeof(xlog_rec_header_t)); - INT_SET(head->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM); - INT_SET(head->h_version, ARCH_CONVERT, + head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); + head->h_version = cpu_to_be32( XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); - INT_SET(head->h_size, ARCH_CONVERT, log->l_iclog_size); + head->h_size = cpu_to_be32(log->l_iclog_size); /* new fields */ - INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT); + head->h_fmt = cpu_to_be32(XLOG_FMT); memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); @@ -1293,7 +1307,7 @@ xlog_commit_record(xfs_mount_t *mp, * pushes on an lsn which is further along in the log once we reach the high * water mark. In this manner, we would be creating a low water mark. */ -void +STATIC void xlog_grant_push_ail(xfs_mount_t *mp, int need_bytes) { @@ -1305,11 +1319,10 @@ xlog_grant_push_ail(xfs_mount_t *mp, int threshold_block; /* block in lsn we'd like to be at */ int threshold_cycle; /* lsn cycle we'd like to be at */ int free_threshold; - SPLDECL(s); ASSERT(BTOBB(need_bytes) < log->l_logBBsize); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, log->l_grant_reserve_bytes); @@ -1331,8 +1344,7 @@ xlog_grant_push_ail(xfs_mount_t *mp, threshold_block -= log->l_logBBsize; threshold_cycle += 1; } - ASSIGN_ANY_LSN_HOST(threshold_lsn, threshold_cycle, - threshold_block); + threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block); /* Don't pass in an lsn greater than the lsn of the last * log record known to be on disk. @@ -1340,7 +1352,7 @@ xlog_grant_push_ail(xfs_mount_t *mp, if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) threshold_lsn = log->l_last_sync_lsn; } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); /* * Get the transaction layer to kick the dirty buffers out to @@ -1378,19 +1390,18 @@ xlog_grant_push_ail(xfs_mount_t *mp, * is added immediately before calling bwrite(). */ -int +STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog) { xfs_caddr_t dptr; /* pointer to byte sized element */ xfs_buf_t *bp; - int i, ops; + int i; uint count; /* byte count of bwrite */ uint count_init; /* initial count before roundup */ int roundoff; /* roundoff to BB or stripe */ int split = 0; /* split write into two regions */ int error; - SPLDECL(s); int v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb); XFS_STATS_INC(xs_log_writes); @@ -1415,30 +1426,26 @@ xlog_sync(xlog_t *log, roundoff < BBTOB(1))); /* move grant heads by roundoff in sync */ - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_grant_add_space(log, roundoff); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); /* put cycle number in every block */ xlog_pack_data(log, iclog, roundoff); /* real byte length */ if (v2) { - INT_SET(iclog->ic_header.h_len, - ARCH_CONVERT, - iclog->ic_offset + roundoff); + iclog->ic_header.h_len = + cpu_to_be32(iclog->ic_offset + roundoff); } else { - INT_SET(iclog->ic_header.h_len, ARCH_CONVERT, iclog->ic_offset); + iclog->ic_header.h_len = + cpu_to_be32(iclog->ic_offset); } - /* put ops count in correct order */ - ops = iclog->ic_header.h_num_logops; - INT_SET(iclog->ic_header.h_num_logops, ARCH_CONVERT, ops); - bp = iclog->ic_bp; ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); - XFS_BUF_SET_ADDR(bp, BLOCK_LSN(INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT))); + XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); @@ -1501,10 +1508,10 @@ xlog_sync(xlog_t *log, * a new cycle. Watch out for the header magic number * case, though. */ - for (i=0; i<split; i += BBSIZE) { - INT_MOD(*(uint *)dptr, ARCH_CONVERT, +1); - if (INT_GET(*(uint *)dptr, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) - INT_MOD(*(uint *)dptr, ARCH_CONVERT, +1); + for (i = 0; i < split; i += BBSIZE) { + be32_add_cpu((__be32 *)dptr, 1); + if (be32_to_cpu(*(__be32 *)dptr) == XLOG_HEADER_MAGIC_NUM) + be32_add_cpu((__be32 *)dptr, 1); dptr += BBSIZE; } @@ -1527,14 +1534,13 @@ xlog_sync(xlog_t *log, /* * Deallocate a log structure */ -void +STATIC void xlog_dealloc_log(xlog_t *log) { xlog_in_core_t *iclog, *next_iclog; xlog_ticket_t *tic, *next_tic; int i; - iclog = log->l_iclog; for (i=0; i<log->l_iclog_bufs; i++) { sv_destroy(&iclog->ic_forcesema); @@ -1565,7 +1571,7 @@ xlog_dealloc_log(xlog_t *log) tic = log->l_unmount_free; while (tic) { next_tic = tic->t_next; - kmem_free(tic, NBPP); + kmem_free(tic, PAGE_SIZE); tic = next_tic; } } @@ -1592,14 +1598,12 @@ xlog_state_finish_copy(xlog_t *log, int record_cnt, int copy_bytes) { - SPLDECL(s); + spin_lock(&log->l_icloglock); - s = LOG_LOCK(log); - - iclog->ic_header.h_num_logops += record_cnt; + be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt); iclog->ic_offset += copy_bytes; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } /* xlog_state_finish_copy */ @@ -1752,7 +1756,7 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) * we don't update ic_offset until the end when we know exactly how many * bytes have been written out. */ -int +STATIC int xlog_write(xfs_mount_t * mp, xfs_log_iovec_t reg[], int nentries, @@ -1823,7 +1827,7 @@ xlog_write(xfs_mount_t * mp, /* start_lsn is the first lsn written to. That's all we need. */ if (! *start_lsn) - *start_lsn = INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT); + *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn); /* This loop writes out as many regions as can fit in the amount * of space which was allocated by xlog_state_get_iclog_space(). @@ -1839,7 +1843,7 @@ xlog_write(xfs_mount_t * mp, */ if (ticket->t_flags & XLOG_TIC_INITED) { logop_head = (xlog_op_header_t *)ptr; - INT_SET(logop_head->oh_tid, ARCH_CONVERT, ticket->t_tid); + logop_head->oh_tid = cpu_to_be32(ticket->t_tid); logop_head->oh_clientid = ticket->t_clientid; logop_head->oh_len = 0; logop_head->oh_flags = XLOG_START_TRANS; @@ -1853,7 +1857,7 @@ xlog_write(xfs_mount_t * mp, /* Copy log operation header directly into data section */ logop_head = (xlog_op_header_t *)ptr; - INT_SET(logop_head->oh_tid, ARCH_CONVERT, ticket->t_tid); + logop_head->oh_tid = cpu_to_be32(ticket->t_tid); logop_head->oh_clientid = ticket->t_clientid; logop_head->oh_res2 = 0; @@ -1888,13 +1892,14 @@ xlog_write(xfs_mount_t * mp, copy_off = partial_copy_len; if (need_copy <= iclog->ic_size - log_offset) { /*complete write */ - INT_SET(logop_head->oh_len, ARCH_CONVERT, copy_len = need_copy); + copy_len = need_copy; + logop_head->oh_len = cpu_to_be32(copy_len); if (partial_copy) logop_head->oh_flags|= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS); partial_copy_len = partial_copy = 0; } else { /* partial write */ copy_len = iclog->ic_size - log_offset; - INT_SET(logop_head->oh_len, ARCH_CONVERT, copy_len); + logop_head->oh_len = cpu_to_be32(copy_len); logop_head->oh_flags |= XLOG_CONTINUE_TRANS; if (partial_copy) logop_head->oh_flags |= XLOG_WAS_CONT_TRANS; @@ -1992,7 +1997,8 @@ xlog_state_clean_log(xlog_t *log) * We don't need to cover the dummy. */ if (!changed && - (INT_GET(iclog->ic_header.h_num_logops, ARCH_CONVERT) == XLOG_COVER_OPS)) { + (be32_to_cpu(iclog->ic_header.h_num_logops) == + XLOG_COVER_OPS)) { changed = 1; } else { /* @@ -2060,7 +2066,7 @@ xlog_get_lowest_lsn( lowest_lsn = 0; do { if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) { - lsn = INT_GET(lsn_log->ic_header.h_lsn, ARCH_CONVERT); + lsn = be64_to_cpu(lsn_log->ic_header.h_lsn); if ((lsn && !lowest_lsn) || (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) { lowest_lsn = lsn; @@ -2089,9 +2095,8 @@ xlog_state_do_callback( int funcdidcallbacks; /* flag: function did callbacks */ int repeats; /* for issuing console warnings if * looping too many times */ - SPLDECL(s); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); first_iclog = iclog = log->l_iclog; ioerrors = 0; funcdidcallbacks = 0; @@ -2136,7 +2141,7 @@ xlog_state_do_callback( * to DO_CALLBACK, we will not process it when * we retry since a previous iclog is in the * CALLBACK and the state cannot change since - * we are holding the LOG_LOCK. + * we are holding the l_icloglock. */ if (!(iclog->ic_state & (XLOG_STATE_DONE_SYNC | @@ -2162,11 +2167,9 @@ xlog_state_do_callback( */ lowest_lsn = xlog_get_lowest_lsn(log); - if (lowest_lsn && ( - XFS_LSN_CMP( - lowest_lsn, - INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) - )<0)) { + if (lowest_lsn && + XFS_LSN_CMP(lowest_lsn, + be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { iclog = iclog->ic_next; continue; /* Leave this iclog for * another thread */ @@ -2174,19 +2177,18 @@ xlog_state_do_callback( iclog->ic_state = XLOG_STATE_CALLBACK; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* l_last_sync_lsn field protected by - * GRANT_LOCK. Don't worry about iclog's lsn. + * l_grant_lock. Don't worry about iclog's lsn. * No one else can be here except us. */ - s = GRANT_LOCK(log); - ASSERT(XFS_LSN_CMP( - log->l_last_sync_lsn, - INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) - )<=0); - log->l_last_sync_lsn = INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT); - GRANT_UNLOCK(log, s); + spin_lock(&log->l_grant_lock); + ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn, + be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); + log->l_last_sync_lsn = + be64_to_cpu(iclog->ic_header.h_lsn); + spin_unlock(&log->l_grant_lock); /* * Keep processing entries in the callback list @@ -2195,7 +2197,7 @@ xlog_state_do_callback( * empty and change the state to DIRTY so that * we don't miss any more callbacks being added. */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); } else { ioerrors++; } @@ -2204,14 +2206,14 @@ xlog_state_do_callback( while (cb) { iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_callback = NULL; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* perform callbacks in the order given */ for (; cb; cb = cb_next) { cb_next = cb->cb_next; cb->cb_func(cb->cb_arg, aborted); } - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); cb = iclog->ic_callback; } @@ -2258,7 +2260,7 @@ xlog_state_do_callback( * * SYNCING - i/o completion will go through logs * DONE_SYNC - interrupt thread should be waiting for - * LOG_LOCK + * l_icloglock * IOERROR - give up hope all ye who enter here */ if (iclog->ic_state == XLOG_STATE_WANT_SYNC || @@ -2276,7 +2278,7 @@ xlog_state_do_callback( flushcnt = log->l_flushcnt; log->l_flushcnt = 0; } - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); while (flushcnt--) vsema(&log->l_flushsema); } /* xlog_state_do_callback */ @@ -2296,15 +2298,14 @@ xlog_state_do_callback( * global state machine log lock. Assume that the calls to cvsema won't * take a long time. At least we know it won't sleep. */ -void +STATIC void xlog_state_done_syncing( xlog_in_core_t *iclog, int aborted) { xlog_t *log = iclog->ic_log; - SPLDECL(s); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || iclog->ic_state == XLOG_STATE_IOERROR); @@ -2320,7 +2321,7 @@ xlog_state_done_syncing( */ if (iclog->ic_state != XLOG_STATE_IOERROR) { if (--iclog->ic_bwritecnt == 1) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return; } iclog->ic_state = XLOG_STATE_DONE_SYNC; @@ -2332,7 +2333,7 @@ xlog_state_done_syncing( * I/O, the others get to wait for the result. */ sv_broadcast(&iclog->ic_writesema); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ } /* xlog_state_done_syncing */ @@ -2357,7 +2358,7 @@ xlog_state_done_syncing( * needs to be incremented, depending on the amount of data which * is copied. */ -int +STATIC int xlog_state_get_iclog_space(xlog_t *log, int len, xlog_in_core_t **iclogp, @@ -2365,23 +2366,22 @@ xlog_state_get_iclog_space(xlog_t *log, int *continued_write, int *logoffsetp) { - SPLDECL(s); int log_offset; xlog_rec_header_t *head; xlog_in_core_t *iclog; int error; restart: - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (XLOG_FORCED_SHUTDOWN(log)) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } iclog = log->l_iclog; if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) { log->l_flushcnt++; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH); XFS_STATS_INC(xs_log_noiclogs); /* Ensure that log writes happen */ @@ -2404,8 +2404,9 @@ restart: xlog_tic_add_region(ticket, log->l_iclog_hsize, XLOG_REG_TYPE_LRHEADER); - INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle); - ASSIGN_LSN(head->h_lsn, log); + head->h_cycle = cpu_to_be32(log->l_curr_cycle); + head->h_lsn = cpu_to_be64( + xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block)); ASSERT(log->l_curr_block >= 0); } @@ -2423,12 +2424,12 @@ restart: /* If I'm the only one writing to this iclog, sync it to disk */ if (iclog->ic_refcnt == 1) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); if ((error = xlog_state_release_iclog(log, iclog))) return error; } else { iclog->ic_refcnt--; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } goto restart; } @@ -2449,7 +2450,7 @@ restart: *iclogp = iclog; ASSERT(iclog->ic_offset <= iclog->ic_size); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); *logoffsetp = log_offset; return 0; @@ -2467,7 +2468,6 @@ xlog_grant_log_space(xlog_t *log, { int free_bytes; int need_bytes; - SPLDECL(s); #ifdef DEBUG xfs_lsn_t tail_lsn; #endif @@ -2479,7 +2479,7 @@ xlog_grant_log_space(xlog_t *log, #endif /* Is there space or do we need to sleep? */ - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_trace_loggrant(log, tic, "xlog_grant_log_space: enter"); /* something is already sleeping; insert new transaction at end */ @@ -2502,7 +2502,7 @@ xlog_grant_log_space(xlog_t *log, */ xlog_trace_loggrant(log, tic, "xlog_grant_log_space: wake 1"); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); } if (tic->t_flags & XFS_LOG_PERM_RESERV) need_bytes = tic->t_unit_res*tic->t_ocnt; @@ -2524,14 +2524,14 @@ redo: sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); if (XLOG_FORCED_SHUTDOWN(log)) { - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto error_return; } xlog_trace_loggrant(log, tic, "xlog_grant_log_space: wake 2"); xlog_grant_push_ail(log->l_mp, need_bytes); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_reserve_headq, tic); @@ -2553,7 +2553,7 @@ redo: #endif xlog_trace_loggrant(log, tic, "xlog_grant_log_space: exit"); xlog_verify_grant_head(log, 1); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return 0; error_return: @@ -2567,7 +2567,7 @@ redo: */ tic->t_curr_res = 0; tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return XFS_ERROR(EIO); } /* xlog_grant_log_space */ @@ -2581,7 +2581,6 @@ STATIC int xlog_regrant_write_log_space(xlog_t *log, xlog_ticket_t *tic) { - SPLDECL(s); int free_bytes, need_bytes; xlog_ticket_t *ntic; #ifdef DEBUG @@ -2599,7 +2598,7 @@ xlog_regrant_write_log_space(xlog_t *log, panic("regrant Recovery problem"); #endif - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: enter"); if (XLOG_FORCED_SHUTDOWN(log)) @@ -2638,14 +2637,14 @@ xlog_regrant_write_log_space(xlog_t *log, /* If we're shutting down, this tic is already * off the queue */ if (XLOG_FORCED_SHUTDOWN(log)) { - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto error_return; } xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: wake 1"); xlog_grant_push_ail(log->l_mp, tic->t_unit_res); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); } } @@ -2665,14 +2664,14 @@ redo: /* If we're shutting down, this tic is already off the queue */ if (XLOG_FORCED_SHUTDOWN(log)) { - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto error_return; } xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: wake 2"); xlog_grant_push_ail(log->l_mp, need_bytes); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_write_headq, tic); @@ -2689,7 +2688,7 @@ redo: xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit"); xlog_verify_grant_head(log, 1); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return 0; @@ -2704,7 +2703,7 @@ redo: */ tic->t_curr_res = 0; tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return XFS_ERROR(EIO); } /* xlog_regrant_write_log_space */ @@ -2720,14 +2719,12 @@ STATIC void xlog_regrant_reserve_log_space(xlog_t *log, xlog_ticket_t *ticket) { - SPLDECL(s); - xlog_trace_loggrant(log, ticket, "xlog_regrant_reserve_log_space: enter"); if (ticket->t_cnt > 0) ticket->t_cnt--; - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_grant_sub_space(log, ticket->t_curr_res); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); @@ -2737,7 +2734,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, /* just return if we still have some of the pre-reserved space */ if (ticket->t_cnt > 0) { - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return; } @@ -2745,7 +2742,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, xlog_trace_loggrant(log, ticket, "xlog_regrant_reserve_log_space: exit"); xlog_verify_grant_head(log, 0); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); } /* xlog_regrant_reserve_log_space */ @@ -2769,12 +2766,10 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, xlog_ticket_t *ticket) { - SPLDECL(s); - if (ticket->t_cnt > 0) ticket->t_cnt--; - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); xlog_grant_sub_space(log, ticket->t_curr_res); @@ -2791,7 +2786,7 @@ xlog_ungrant_log_space(xlog_t *log, xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit"); xlog_verify_grant_head(log, 1); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); xfs_log_move_tail(log->l_mp, 1); } /* xlog_ungrant_log_space */ @@ -2799,15 +2794,13 @@ xlog_ungrant_log_space(xlog_t *log, /* * Atomically put back used ticket. */ -void +STATIC void xlog_state_put_ticket(xlog_t *log, xlog_ticket_t *tic) { - unsigned long s; - - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); xlog_ticket_put(log, tic); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } /* xlog_state_put_ticket */ /* @@ -2819,19 +2812,18 @@ xlog_state_put_ticket(xlog_t *log, * * */ -int +STATIC int xlog_state_release_iclog(xlog_t *log, xlog_in_core_t *iclog) { - SPLDECL(s); int sync = 0; /* do we sync? */ xlog_assign_tail_lsn(log->l_mp); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } @@ -2843,12 +2835,12 @@ xlog_state_release_iclog(xlog_t *log, iclog->ic_state == XLOG_STATE_WANT_SYNC) { sync++; iclog->ic_state = XLOG_STATE_SYNCING; - INT_SET(iclog->ic_header.h_tail_lsn, ARCH_CONVERT, log->l_tail_lsn); + iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); /* cycle incremented when incrementing curr_block */ } - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* * We let the log lock go, so it's possible that we hit a log I/O @@ -2881,7 +2873,7 @@ xlog_state_switch_iclogs(xlog_t *log, if (!eventual_size) eventual_size = iclog->ic_offset; iclog->ic_state = XLOG_STATE_WANT_SYNC; - INT_SET(iclog->ic_header.h_prev_block, ARCH_CONVERT, log->l_prev_block); + iclog->ic_header.h_prev_block = cpu_to_be32(log->l_prev_block); log->l_prev_block = log->l_curr_block; log->l_prev_cycle = log->l_curr_cycle; @@ -2939,13 +2931,12 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) { xlog_in_core_t *iclog; xfs_lsn_t lsn; - SPLDECL(s); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } @@ -2978,15 +2969,15 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) * the previous sync. */ iclog->ic_refcnt++; - lsn = INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT); + lsn = be64_to_cpu(iclog->ic_header.h_lsn); xlog_state_switch_iclogs(log, iclog, 0); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) return XFS_ERROR(EIO); *log_flushed = 1; - s = LOG_LOCK(log); - if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) == lsn && + spin_lock(&log->l_icloglock); + if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && iclog->ic_state != XLOG_STATE_DIRTY) goto maybe_sleep; else @@ -3011,12 +3002,12 @@ maybe_sleep: if (flags & XFS_LOG_SYNC) { /* * We must check if we're shutting down here, before - * we wait, while we're holding the LOG_LOCK. + * we wait, while we're holding the l_icloglock. * Then we check again after waking up, in case our * sleep was disturbed by a bad news. */ if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); @@ -3033,7 +3024,7 @@ maybe_sleep: } else { no_sleep: - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } return 0; } /* xlog_state_sync_all */ @@ -3051,7 +3042,7 @@ no_sleep: * If filesystem activity goes to zero, the iclog will get flushed only by * bdflush(). */ -int +STATIC int xlog_state_sync(xlog_t *log, xfs_lsn_t lsn, uint flags, @@ -3059,26 +3050,24 @@ xlog_state_sync(xlog_t *log, { xlog_in_core_t *iclog; int already_slept = 0; - SPLDECL(s); - try_again: - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } do { - if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) != lsn) { - iclog = iclog->ic_next; - continue; + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { + iclog = iclog->ic_next; + continue; } if (iclog->ic_state == XLOG_STATE_DIRTY) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return 0; } @@ -3113,11 +3102,11 @@ try_again: } else { iclog->ic_refcnt++; xlog_state_switch_iclogs(log, iclog, 0); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) return XFS_ERROR(EIO); *log_flushed = 1; - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); } } @@ -3129,7 +3118,7 @@ try_again: * gotten a log write error. */ if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); @@ -3143,13 +3132,13 @@ try_again: return XFS_ERROR(EIO); *log_flushed = 1; } else { /* just return */ - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } return 0; } while (iclog != log->l_iclog); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return 0; } /* xlog_state_sync */ @@ -3158,12 +3147,10 @@ try_again: * Called when we want to mark the current iclog as being ready to sync to * disk. */ -void +STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) { - SPLDECL(s); - - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (iclog->ic_state == XLOG_STATE_ACTIVE) { xlog_state_switch_iclogs(log, iclog, 0); @@ -3172,7 +3159,7 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR)); } - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } /* xlog_state_want_sync */ @@ -3193,16 +3180,15 @@ xlog_state_ticket_alloc(xlog_t *log) xlog_ticket_t *t_list; xlog_ticket_t *next; xfs_caddr_t buf; - uint i = (NBPP / sizeof(xlog_ticket_t)) - 2; - SPLDECL(s); + uint i = (PAGE_SIZE / sizeof(xlog_ticket_t)) - 2; /* * The kmem_zalloc may sleep, so we shouldn't be holding the * global lock. XXXmiken: may want to use zone allocator. */ - buf = (xfs_caddr_t) kmem_zalloc(NBPP, KM_SLEEP); + buf = (xfs_caddr_t) kmem_zalloc(PAGE_SIZE, KM_SLEEP); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); /* Attach 1st ticket to Q, so we can keep track of allocated memory */ t_list = (xlog_ticket_t *)buf; @@ -3231,7 +3217,7 @@ xlog_state_ticket_alloc(xlog_t *log) } t_list->t_next = NULL; log->l_tail = t_list; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } /* xlog_state_ticket_alloc */ @@ -3273,7 +3259,7 @@ xlog_ticket_put(xlog_t *log, /* * Grab ticket off freelist or allocation some more */ -xlog_ticket_t * +STATIC xlog_ticket_t * xlog_ticket_get(xlog_t *log, int unit_bytes, int cnt, @@ -3282,15 +3268,14 @@ xlog_ticket_get(xlog_t *log, { xlog_ticket_t *tic; uint num_headers; - SPLDECL(s); alloc: if (log->l_freelist == NULL) xlog_state_ticket_alloc(log); /* potentially sleep */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (log->l_freelist == NULL) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); goto alloc; } tic = log->l_freelist; @@ -3298,7 +3283,7 @@ xlog_ticket_get(xlog_t *log, if (log->l_freelist == NULL) log->l_tail = NULL; log->l_ticket_cnt--; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* * Permanent reservations have up to 'cnt'-1 active log operations @@ -3473,10 +3458,9 @@ xlog_verify_iclog(xlog_t *log, __uint8_t clientid; int len, i, j, k, op_len; int idx; - SPLDECL(s); /* check validity of iclog pointers */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); icptr = log->l_iclog; for (i=0; i < log->l_iclog_bufs; i++) { if (icptr == NULL) @@ -3485,21 +3469,21 @@ xlog_verify_iclog(xlog_t *log, } if (icptr != log->l_iclog) xlog_panic("xlog_verify_iclog: corrupt iclog ring"); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* check log magic numbers */ - ptr = (xfs_caddr_t) &(iclog->ic_header); - if (INT_GET(*(uint *)ptr, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) + if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) xlog_panic("xlog_verify_iclog: invalid magic num"); - for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&(iclog->ic_header))+count; + ptr = (xfs_caddr_t) &iclog->ic_header; + for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; ptr += BBSIZE) { - if (INT_GET(*(uint *)ptr, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) + if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) xlog_panic("xlog_verify_iclog: unexpected magic num"); } /* check fields */ - len = INT_GET(iclog->ic_header.h_num_logops, ARCH_CONVERT); + len = be32_to_cpu(iclog->ic_header.h_num_logops); ptr = iclog->ic_datap; base_ptr = ptr; ophead = (xlog_op_header_t *)ptr; @@ -3517,9 +3501,11 @@ xlog_verify_iclog(xlog_t *log, if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - clientid = GET_CLIENT_ID(xhdr[j].hic_xheader.xh_cycle_data[k], ARCH_CONVERT); + clientid = xlog_get_client_id( + xhdr[j].hic_xheader.xh_cycle_data[k]); } else { - clientid = GET_CLIENT_ID(iclog->ic_header.h_cycle_data[idx], ARCH_CONVERT); + clientid = xlog_get_client_id( + iclog->ic_header.h_cycle_data[idx]); } } if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) @@ -3531,16 +3517,16 @@ xlog_verify_iclog(xlog_t *log, field_offset = (__psint_t) ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); if (syncing == B_FALSE || (field_offset & 0x1ff)) { - op_len = INT_GET(ophead->oh_len, ARCH_CONVERT); + op_len = be32_to_cpu(ophead->oh_len); } else { idx = BTOBBT((__psint_t)&ophead->oh_len - (__psint_t)iclog->ic_datap); if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - op_len = INT_GET(xhdr[j].hic_xheader.xh_cycle_data[k], ARCH_CONVERT); + op_len = be32_to_cpu(xhdr[j].hic_xheader.xh_cycle_data[k]); } else { - op_len = INT_GET(iclog->ic_header.h_cycle_data[idx], ARCH_CONVERT); + op_len = be32_to_cpu(iclog->ic_header.h_cycle_data[idx]); } } ptr += sizeof(xlog_op_header_t) + op_len; @@ -3549,7 +3535,7 @@ xlog_verify_iclog(xlog_t *log, #endif /* - * Mark all iclogs IOERROR. LOG_LOCK is held by the caller. + * Mark all iclogs IOERROR. l_icloglock is held by the caller. */ STATIC int xlog_state_ioerror( @@ -3597,8 +3583,6 @@ xfs_log_force_umount( xlog_t *log; int retval; int dummy; - SPLDECL(s); - SPLDECL(s2); log = mp->m_log; @@ -3627,8 +3611,8 @@ xfs_log_force_umount( * before we mark the filesystem SHUTDOWN and wake * everybody up to tell the bad news. */ - s = GRANT_LOCK(log); - s2 = LOG_LOCK(log); + spin_lock(&log->l_grant_lock); + spin_lock(&log->l_icloglock); mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; XFS_BUF_DONE(mp->m_sb_bp); /* @@ -3644,7 +3628,7 @@ xfs_log_force_umount( */ if (logerror) retval = xlog_state_ioerror(log); - LOG_UNLOCK(log, s2); + spin_unlock(&log->l_icloglock); /* * We don't want anybody waiting for log reservations @@ -3667,7 +3651,7 @@ xfs_log_force_umount( tic = tic->t_next; } while (tic != log->l_write_headq); } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { ASSERT(!logerror); @@ -3676,9 +3660,9 @@ xfs_log_force_umount( * log down completely. */ xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); - s2 = LOG_LOCK(log); + spin_lock(&log->l_icloglock); retval = xlog_state_ioerror(log); - LOG_UNLOCK(log, s2); + spin_unlock(&log->l_icloglock); } /* * Wake up everybody waiting on xfs_log_force. @@ -3691,13 +3675,13 @@ xfs_log_force_umount( { xlog_in_core_t *iclog; - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; do { ASSERT(iclog->ic_callback == 0); iclog = iclog->ic_next; } while (iclog != log->l_iclog); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } #endif /* return non-zero if log IOERROR transition had already happened */ diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index ebbe93f4f97b..4cdac048df5e 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -22,8 +22,9 @@ #define CYCLE_LSN(lsn) ((uint)((lsn)>>32)) #define BLOCK_LSN(lsn) ((uint)(lsn)) + /* this is used in a spot where we might otherwise double-endian-flip */ -#define CYCLE_LSN_DISK(lsn) (((uint *)&(lsn))[0]) +#define CYCLE_LSN_DISK(lsn) (((__be32 *)&(lsn))[0]) #ifdef __KERNEL__ /* diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 752f964b3699..e008233ee249 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -55,32 +55,21 @@ struct xfs_mount; BTOBB(XLOG_MAX_ICLOGS << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \ XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) -/* - * set lsns - */ -#define ASSIGN_ANY_LSN_HOST(lsn,cycle,block) \ - { \ - (lsn) = ((xfs_lsn_t)(cycle)<<32)|(block); \ - } -#define ASSIGN_ANY_LSN_DISK(lsn,cycle,block) \ - { \ - INT_SET(((uint *)&(lsn))[0], ARCH_CONVERT, (cycle)); \ - INT_SET(((uint *)&(lsn))[1], ARCH_CONVERT, (block)); \ - } -#define ASSIGN_LSN(lsn,log) \ - ASSIGN_ANY_LSN_DISK(lsn,(log)->l_curr_cycle,(log)->l_curr_block); - -#define XLOG_SET(f,b) (((f) & (b)) == (b)) - -#define GET_CYCLE(ptr, arch) \ - (INT_GET(*(uint *)(ptr), arch) == XLOG_HEADER_MAGIC_NUM ? \ - INT_GET(*((uint *)(ptr)+1), arch) : \ - INT_GET(*(uint *)(ptr), arch) \ - ) +static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) +{ + return ((xfs_lsn_t)cycle << 32) | block; +} -#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) +static inline uint xlog_get_cycle(char *ptr) +{ + if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) + return be32_to_cpu(*((__be32 *)ptr + 1)); + else + return be32_to_cpu(*(__be32 *)ptr); +} +#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) #ifdef __KERNEL__ @@ -96,19 +85,10 @@ struct xfs_mount; * * this has endian issues, of course. */ - -#ifndef XFS_NATIVE_HOST -#define GET_CLIENT_ID(i,arch) \ - ((i) & 0xff) -#else -#define GET_CLIENT_ID(i,arch) \ - ((i) >> 24) -#endif - -#define GRANT_LOCK(log) mutex_spinlock(&(log)->l_grant_lock) -#define GRANT_UNLOCK(log, s) mutex_spinunlock(&(log)->l_grant_lock, s) -#define LOG_LOCK(log) mutex_spinlock(&(log)->l_icloglock) -#define LOG_UNLOCK(log, s) mutex_spinunlock(&(log)->l_icloglock, s) +static inline uint xlog_get_client_id(__be32 i) +{ + return be32_to_cpu(i) >> 24; +} #define xlog_panic(args...) cmn_err(CE_PANIC, ## args) #define xlog_exit(args...) cmn_err(CE_PANIC, ## args) @@ -285,11 +265,11 @@ typedef struct xlog_ticket { typedef struct xlog_op_header { - xlog_tid_t oh_tid; /* transaction id of operation : 4 b */ - int oh_len; /* bytes in data region : 4 b */ - __uint8_t oh_clientid; /* who sent me this : 1 b */ - __uint8_t oh_flags; /* : 1 b */ - ushort oh_res2; /* 32 bit align : 2 b */ + __be32 oh_tid; /* transaction id of operation : 4 b */ + __be32 oh_len; /* bytes in data region : 4 b */ + __u8 oh_clientid; /* who sent me this : 1 b */ + __u8 oh_flags; /* : 1 b */ + __u16 oh_res2; /* 32 bit align : 2 b */ } xlog_op_header_t; @@ -307,25 +287,25 @@ typedef struct xlog_op_header { #endif typedef struct xlog_rec_header { - uint h_magicno; /* log record (LR) identifier : 4 */ - uint h_cycle; /* write cycle of log : 4 */ - int h_version; /* LR version : 4 */ - int h_len; /* len in bytes; should be 64-bit aligned: 4 */ - xfs_lsn_t h_lsn; /* lsn of this LR : 8 */ - xfs_lsn_t h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */ - uint h_chksum; /* may not be used; non-zero if used : 4 */ - int h_prev_block; /* block number to previous LR : 4 */ - int h_num_logops; /* number of log operations in this LR : 4 */ - uint h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; + __be32 h_magicno; /* log record (LR) identifier : 4 */ + __be32 h_cycle; /* write cycle of log : 4 */ + __be32 h_version; /* LR version : 4 */ + __be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */ + __be64 h_lsn; /* lsn of this LR : 8 */ + __be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */ + __be32 h_chksum; /* may not be used; non-zero if used : 4 */ + __be32 h_prev_block; /* block number to previous LR : 4 */ + __be32 h_num_logops; /* number of log operations in this LR : 4 */ + __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* new fields */ - int h_fmt; /* format of log record : 4 */ - uuid_t h_fs_uuid; /* uuid of FS : 16 */ - int h_size; /* iclog size : 4 */ + __be32 h_fmt; /* format of log record : 4 */ + uuid_t h_fs_uuid; /* uuid of FS : 16 */ + __be32 h_size; /* iclog size : 4 */ } xlog_rec_header_t; typedef struct xlog_rec_ext_header { - uint xh_cycle; /* write cycle of log : 4 */ - uint xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */ + __be32 xh_cycle; /* write cycle of log : 4 */ + __be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */ } xlog_rec_ext_header_t; #ifdef __KERNEL__ @@ -415,7 +395,7 @@ typedef struct log { xlog_ticket_t *l_unmount_free;/* kmem_free these addresses */ xlog_ticket_t *l_tail; /* free list of tickets */ xlog_in_core_t *l_iclog; /* head log queue */ - lock_t l_icloglock; /* grab to change iclog state */ + spinlock_t l_icloglock; /* grab to change iclog state */ xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed * buffers */ xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ @@ -439,7 +419,7 @@ typedef struct log { char *l_iclog_bak[XLOG_MAX_ICLOGS]; /* The following block of fields are changed while holding grant_lock */ - lock_t l_grant_lock; + spinlock_t l_grant_lock; xlog_ticket_t *l_reserve_headq; xlog_ticket_t *l_write_headq; int l_grant_reserve_cycle; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 851eca8a7150..b82d5d4d2462 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -198,7 +198,7 @@ xlog_header_check_dump( cmn_err(CE_DEBUG, " log : uuid = "); for (b = 0; b < 16; b++) cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); - cmn_err(CE_DEBUG, ", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT)); + cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); } #else #define xlog_header_check_dump(mp, head) @@ -212,14 +212,14 @@ xlog_header_check_recover( xfs_mount_t *mp, xlog_rec_header_t *head) { - ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); + ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); /* * IRIX doesn't write the h_fmt field and leaves it zeroed * (XLOG_FMT_UNKNOWN). This stops us from trying to recover * a dirty log created in IRIX. */ - if (unlikely(INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT)) { + if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { xlog_warn( "XFS: dirty log written in incompatible format - can't recover"); xlog_header_check_dump(mp, head); @@ -245,7 +245,7 @@ xlog_header_check_mount( xfs_mount_t *mp, xlog_rec_header_t *head) { - ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); + ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); if (uuid_is_nil(&head->h_fs_uuid)) { /* @@ -293,7 +293,7 @@ xlog_recover_iodone( * Note that the algorithm can not be perfect because the disk will not * necessarily be perfect. */ -int +STATIC int xlog_find_cycle_start( xlog_t *log, xfs_buf_t *bp, @@ -311,7 +311,7 @@ xlog_find_cycle_start( if ((error = xlog_bread(log, mid_blk, 1, bp))) return error; offset = xlog_align(log, mid_blk, 1, bp); - mid_cycle = GET_CYCLE(offset, ARCH_CONVERT); + mid_cycle = xlog_get_cycle(offset); if (mid_cycle == cycle) { *last_blk = mid_blk; /* last_half_cycle == mid_cycle */ @@ -371,7 +371,7 @@ xlog_find_verify_cycle( buf = xlog_align(log, i, bcount, bp); for (j = 0; j < bcount; j++) { - cycle = GET_CYCLE(buf, ARCH_CONVERT); + cycle = xlog_get_cycle(buf); if (cycle == stop_on_cycle_no) { *new_blk = i+j; goto out; @@ -447,8 +447,7 @@ xlog_find_verify_log_record( head = (xlog_rec_header_t *)offset; - if (XLOG_HEADER_MAGIC_NUM == - INT_GET(head->h_magicno, ARCH_CONVERT)) + if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno)) break; if (!smallmem) @@ -480,7 +479,7 @@ xlog_find_verify_log_record( * record do we update last_blk. */ if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { - uint h_size = INT_GET(head->h_size, ARCH_CONVERT); + uint h_size = be32_to_cpu(head->h_size); xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; if (h_size % XLOG_HEADER_CYCLE_SIZE) @@ -489,8 +488,8 @@ xlog_find_verify_log_record( xhdrs = 1; } - if (*last_blk - i + extra_bblks - != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs) + if (*last_blk - i + extra_bblks != + BTOBB(be32_to_cpu(head->h_len)) + xhdrs) *last_blk = i; out: @@ -550,13 +549,13 @@ xlog_find_head( if ((error = xlog_bread(log, 0, 1, bp))) goto bp_err; offset = xlog_align(log, 0, 1, bp); - first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); + first_half_cycle = xlog_get_cycle(offset); last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ if ((error = xlog_bread(log, last_blk, 1, bp))) goto bp_err; offset = xlog_align(log, last_blk, 1, bp); - last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); + last_half_cycle = xlog_get_cycle(offset); ASSERT(last_half_cycle != 0); /* @@ -808,7 +807,7 @@ xlog_find_tail( if ((error = xlog_bread(log, 0, 1, bp))) goto bread_err; offset = xlog_align(log, 0, 1, bp); - if (GET_CYCLE(offset, ARCH_CONVERT) == 0) { + if (xlog_get_cycle(offset) == 0) { *tail_blk = 0; /* leave all other log inited values alone */ goto exit; @@ -823,8 +822,7 @@ xlog_find_tail( if ((error = xlog_bread(log, i, 1, bp))) goto bread_err; offset = xlog_align(log, i, 1, bp); - if (XLOG_HEADER_MAGIC_NUM == - INT_GET(*(uint *)offset, ARCH_CONVERT)) { + if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { found = 1; break; } @@ -841,7 +839,7 @@ xlog_find_tail( goto bread_err; offset = xlog_align(log, i, 1, bp); if (XLOG_HEADER_MAGIC_NUM == - INT_GET(*(uint*)offset, ARCH_CONVERT)) { + be32_to_cpu(*(__be32 *)offset)) { found = 2; break; } @@ -855,7 +853,7 @@ xlog_find_tail( /* find blk_no of tail of log */ rhead = (xlog_rec_header_t *)offset; - *tail_blk = BLOCK_LSN(INT_GET(rhead->h_tail_lsn, ARCH_CONVERT)); + *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); /* * Reset log values according to the state of the log when we @@ -869,11 +867,11 @@ xlog_find_tail( */ log->l_prev_block = i; log->l_curr_block = (int)*head_blk; - log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT); + log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); if (found == 2) log->l_curr_cycle++; - log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT); - log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT); + log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); + log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); log->l_grant_reserve_cycle = log->l_curr_cycle; log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); log->l_grant_write_cycle = log->l_curr_cycle; @@ -891,8 +889,8 @@ xlog_find_tail( * unmount record rather than the block after it. */ if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { - int h_size = INT_GET(rhead->h_size, ARCH_CONVERT); - int h_version = INT_GET(rhead->h_version, ARCH_CONVERT); + int h_size = be32_to_cpu(rhead->h_size); + int h_version = be32_to_cpu(rhead->h_version); if ((h_version & XLOG_VERSION_2) && (h_size > XLOG_HEADER_CYCLE_SIZE)) { @@ -906,10 +904,10 @@ xlog_find_tail( hblks = 1; } after_umount_blk = (i + hblks + (int) - BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize; + BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; tail_lsn = log->l_tail_lsn; if (*head_blk == after_umount_blk && - INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) { + be32_to_cpu(rhead->h_num_logops) == 1) { umount_data_blk = (i + hblks) % log->l_logBBsize; if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { goto bread_err; @@ -922,10 +920,12 @@ xlog_find_tail( * log records will point recovery to after the * current unmount record. */ - ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, log->l_curr_cycle, - after_umount_blk); - ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle, - after_umount_blk); + log->l_tail_lsn = + xlog_assign_lsn(log->l_curr_cycle, + after_umount_blk); + log->l_last_sync_lsn = + xlog_assign_lsn(log->l_curr_cycle, + after_umount_blk); *tail_blk = after_umount_blk; /* @@ -986,7 +986,7 @@ exit: * -1 => use *blk_no as the first block of the log * >0 => error has occurred */ -int +STATIC int xlog_find_zeroed( xlog_t *log, xfs_daddr_t *blk_no) @@ -1007,7 +1007,7 @@ xlog_find_zeroed( if ((error = xlog_bread(log, 0, 1, bp))) goto bp_err; offset = xlog_align(log, 0, 1, bp); - first_cycle = GET_CYCLE(offset, ARCH_CONVERT); + first_cycle = xlog_get_cycle(offset); if (first_cycle == 0) { /* completely zeroed log */ *blk_no = 0; xlog_put_bp(bp); @@ -1018,7 +1018,7 @@ xlog_find_zeroed( if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) goto bp_err; offset = xlog_align(log, log_bbnum-1, 1, bp); - last_cycle = GET_CYCLE(offset, ARCH_CONVERT); + last_cycle = xlog_get_cycle(offset); if (last_cycle != 0) { /* log completely written to */ xlog_put_bp(bp); return 0; @@ -1098,13 +1098,13 @@ xlog_add_record( xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; memset(buf, 0, BBSIZE); - INT_SET(recp->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM); - INT_SET(recp->h_cycle, ARCH_CONVERT, cycle); - INT_SET(recp->h_version, ARCH_CONVERT, + recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); + recp->h_cycle = cpu_to_be32(cycle); + recp->h_version = cpu_to_be32( XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); - ASSIGN_ANY_LSN_DISK(recp->h_lsn, cycle, block); - ASSIGN_ANY_LSN_DISK(recp->h_tail_lsn, tail_cycle, tail_block); - INT_SET(recp->h_fmt, ARCH_CONVERT, XLOG_FMT); + recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); + recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); + recp->h_fmt = cpu_to_be32(XLOG_FMT); memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); } @@ -2211,7 +2211,7 @@ xlog_recover_do_buffer_trans( * overlap with future reads of those inodes. */ if (XFS_DINODE_MAGIC == - INT_GET(*((__uint16_t *)(xfs_buf_offset(bp, 0))), ARCH_CONVERT) && + be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { XFS_BUF_STALE(bp); @@ -2581,8 +2581,7 @@ xlog_recover_do_dquot_trans( /* * This type of quotas was turned off, so ignore this record. */ - type = INT_GET(recddq->d_flags, ARCH_CONVERT) & - (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); + type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); ASSERT(type); if (log->l_quotaoffs_flag & type) return (0); @@ -2660,7 +2659,6 @@ xlog_recover_do_efi_trans( xfs_mount_t *mp; xfs_efi_log_item_t *efip; xfs_efi_log_format_t *efi_formatp; - SPLDECL(s); if (pass == XLOG_RECOVER_PASS1) { return 0; @@ -2678,11 +2676,11 @@ xlog_recover_do_efi_trans( efip->efi_next_extent = efi_formatp->efi_nextents; efip->efi_flags |= XFS_EFI_COMMITTED; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); /* * xfs_trans_update_ail() drops the AIL lock. */ - xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn, s); + xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn); return 0; } @@ -2707,7 +2705,6 @@ xlog_recover_do_efd_trans( xfs_log_item_t *lip; int gen; __uint64_t efi_id; - SPLDECL(s); if (pass == XLOG_RECOVER_PASS1) { return; @@ -2725,7 +2722,7 @@ xlog_recover_do_efd_trans( * in the AIL. */ mp = log->l_mp; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); lip = xfs_trans_first_ail(mp, &gen); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { @@ -2735,22 +2732,14 @@ xlog_recover_do_efd_trans( * xfs_trans_delete_ail() drops the * AIL lock. */ - xfs_trans_delete_ail(mp, lip, s); - break; + xfs_trans_delete_ail(mp, lip); + xfs_efi_item_free(efip); + return; } } lip = xfs_trans_next_ail(mp, lip, &gen, NULL); } - - /* - * If we found it, then free it up. If it wasn't there, it - * must have been overwritten in the log. Oh well. - */ - if (lip != NULL) { - xfs_efi_item_free(efip); - } else { - AIL_UNLOCK(mp, s); - } + spin_unlock(&mp->m_ail_lock); } /* @@ -2897,8 +2886,8 @@ xlog_recover_process_data( unsigned long hash; uint flags; - lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT); - num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT); + lp = dp + be32_to_cpu(rhead->h_len); + num_logops = be32_to_cpu(rhead->h_num_logops); /* check the log format matches our own - else we can't recover */ if (xlog_header_check_recover(log->l_mp, rhead)) @@ -2915,15 +2904,20 @@ xlog_recover_process_data( ASSERT(0); return (XFS_ERROR(EIO)); } - tid = INT_GET(ohead->oh_tid, ARCH_CONVERT); + tid = be32_to_cpu(ohead->oh_tid); hash = XLOG_RHASH(tid); trans = xlog_recover_find_tid(rhash[hash], tid); if (trans == NULL) { /* not found; add new tid */ if (ohead->oh_flags & XLOG_START_TRANS) xlog_recover_new_tid(&rhash[hash], tid, - INT_GET(rhead->h_lsn, ARCH_CONVERT)); + be64_to_cpu(rhead->h_lsn)); } else { - ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp); + if (dp + be32_to_cpu(ohead->oh_len) > lp) { + xlog_warn( + "XFS: xlog_recover_process_data: bad length"); + WARN_ON(1); + return (XFS_ERROR(EIO)); + } flags = ohead->oh_flags & ~XLOG_END_TRANS; if (flags & XLOG_WAS_CONT_TRANS) flags &= ~XLOG_CONTINUE_TRANS; @@ -2937,8 +2931,7 @@ xlog_recover_process_data( break; case XLOG_WAS_CONT_TRANS: error = xlog_recover_add_to_cont_trans(trans, - dp, INT_GET(ohead->oh_len, - ARCH_CONVERT)); + dp, be32_to_cpu(ohead->oh_len)); break; case XLOG_START_TRANS: xlog_warn( @@ -2949,8 +2942,7 @@ xlog_recover_process_data( case 0: case XLOG_CONTINUE_TRANS: error = xlog_recover_add_to_trans(trans, - dp, INT_GET(ohead->oh_len, - ARCH_CONVERT)); + dp, be32_to_cpu(ohead->oh_len)); break; default: xlog_warn( @@ -2962,7 +2954,7 @@ xlog_recover_process_data( if (error) return error; } - dp += INT_GET(ohead->oh_len, ARCH_CONVERT); + dp += be32_to_cpu(ohead->oh_len); num_logops--; } return 0; @@ -3075,10 +3067,9 @@ xlog_recover_process_efis( xfs_efi_log_item_t *efip; int gen; xfs_mount_t *mp; - SPLDECL(s); mp = log->l_mp; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); lip = xfs_trans_first_ail(mp, &gen); while (lip != NULL) { @@ -3099,12 +3090,12 @@ xlog_recover_process_efis( continue; } - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); xlog_recover_process_efi(mp, efip); - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); lip = xfs_trans_next_ail(mp, lip, &gen, NULL); } - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } /* @@ -3315,16 +3306,16 @@ xlog_pack_data_checksum( int size) { int i; - uint *up; + __be32 *up; uint chksum = 0; - up = (uint *)iclog->ic_datap; + up = (__be32 *)iclog->ic_datap; /* divide length by 4 to get # words */ for (i = 0; i < (size >> 2); i++) { - chksum ^= INT_GET(*up, ARCH_CONVERT); + chksum ^= be32_to_cpu(*up); up++; } - INT_SET(iclog->ic_header.h_chksum, ARCH_CONVERT, chksum); + iclog->ic_header.h_chksum = cpu_to_be32(chksum); } #else #define xlog_pack_data_checksum(log, iclog, size) @@ -3341,7 +3332,7 @@ xlog_pack_data( { int i, j, k; int size = iclog->ic_offset + roundoff; - uint cycle_lsn; + __be32 cycle_lsn; xfs_caddr_t dp; xlog_in_core_2_t *xhdr; @@ -3352,8 +3343,8 @@ xlog_pack_data( dp = iclog->ic_datap; for (i = 0; i < BTOBB(size) && i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { - iclog->ic_header.h_cycle_data[i] = *(uint *)dp; - *(uint *)dp = cycle_lsn; + iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; + *(__be32 *)dp = cycle_lsn; dp += BBSIZE; } @@ -3362,8 +3353,8 @@ xlog_pack_data( for ( ; i < BTOBB(size); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - xhdr[j].hic_xheader.xh_cycle_data[k] = *(uint *)dp; - *(uint *)dp = cycle_lsn; + xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; + *(__be32 *)dp = cycle_lsn; dp += BBSIZE; } @@ -3380,21 +3371,21 @@ xlog_unpack_data_checksum( xfs_caddr_t dp, xlog_t *log) { - uint *up = (uint *)dp; + __be32 *up = (__be32 *)dp; uint chksum = 0; int i; /* divide length by 4 to get # words */ - for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) { - chksum ^= INT_GET(*up, ARCH_CONVERT); + for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) { + chksum ^= be32_to_cpu(*up); up++; } - if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) { + if (chksum != be32_to_cpu(rhead->h_chksum)) { if (rhead->h_chksum || ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { cmn_err(CE_DEBUG, "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n", - INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum); + be32_to_cpu(rhead->h_chksum), chksum); cmn_err(CE_DEBUG, "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { @@ -3418,18 +3409,18 @@ xlog_unpack_data( int i, j, k; xlog_in_core_2_t *xhdr; - for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) && + for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { - *(uint *)dp = *(uint *)&rhead->h_cycle_data[i]; + *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; dp += BBSIZE; } if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { xhdr = (xlog_in_core_2_t *)rhead; - for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) { + for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; + *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; dp += BBSIZE; } } @@ -3445,24 +3436,21 @@ xlog_valid_rec_header( { int hlen; - if (unlikely( - (INT_GET(rhead->h_magicno, ARCH_CONVERT) != - XLOG_HEADER_MAGIC_NUM))) { + if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) { XFS_ERROR_REPORT("xlog_valid_rec_header(1)", XFS_ERRLEVEL_LOW, log->l_mp); return XFS_ERROR(EFSCORRUPTED); } if (unlikely( (!rhead->h_version || - (INT_GET(rhead->h_version, ARCH_CONVERT) & - (~XLOG_VERSION_OKBITS)) != 0))) { + (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { xlog_warn("XFS: %s: unrecognised log version (%d).", - __FUNCTION__, INT_GET(rhead->h_version, ARCH_CONVERT)); + __FUNCTION__, be32_to_cpu(rhead->h_version)); return XFS_ERROR(EIO); } /* LR body must have data or it wouldn't have been written */ - hlen = INT_GET(rhead->h_len, ARCH_CONVERT); + hlen = be32_to_cpu(rhead->h_len); if (unlikely( hlen <= 0 || hlen > INT_MAX )) { XFS_ERROR_REPORT("xlog_valid_rec_header(2)", XFS_ERRLEVEL_LOW, log->l_mp); @@ -3522,9 +3510,8 @@ xlog_do_recovery_pass( error = xlog_valid_rec_header(log, rhead, tail_blk); if (error) goto bread_err1; - h_size = INT_GET(rhead->h_size, ARCH_CONVERT); - if ((INT_GET(rhead->h_version, ARCH_CONVERT) - & XLOG_VERSION_2) && + h_size = be32_to_cpu(rhead->h_size); + if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && (h_size > XLOG_HEADER_CYCLE_SIZE)) { hblks = h_size / XLOG_HEADER_CYCLE_SIZE; if (h_size % XLOG_HEADER_CYCLE_SIZE) @@ -3561,7 +3548,7 @@ xlog_do_recovery_pass( goto bread_err2; /* blocks in data section */ - bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); + bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); error = xlog_bread(log, blk_no + hblks, bblks, dbp); if (error) goto bread_err2; @@ -3636,7 +3623,7 @@ xlog_do_recovery_pass( if (error) goto bread_err2; - bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); + bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); blk_no += hblks; /* Read in data for log record */ @@ -3707,7 +3694,7 @@ xlog_do_recovery_pass( error = xlog_valid_rec_header(log, rhead, blk_no); if (error) goto bread_err2; - bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); + bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) goto bread_err2; offset = xlog_align(log, blk_no+hblks, bblks, dbp); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index ebdb76da527c..6409b3762995 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -136,15 +136,9 @@ xfs_mount_init(void) mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; } - AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); - spinlock_init(&mp->m_sb_lock, "xfs_sb"); + spin_lock_init(&mp->m_sb_lock); mutex_init(&mp->m_ilock); mutex_init(&mp->m_growlock); - /* - * Initialize the AIL. - */ - xfs_trans_ail_init(mp); - atomic_set(&mp->m_active_trans, 0); return mp; @@ -171,7 +165,7 @@ xfs_mount_free( sizeof(xfs_perag_t) * mp->m_sb.sb_agcount); } - AIL_LOCK_DESTROY(&mp->m_ail_lock); + spinlock_destroy(&mp->m_ail_lock); spinlock_destroy(&mp->m_sb_lock); mutex_destroy(&mp->m_ilock); mutex_destroy(&mp->m_growlock); @@ -616,7 +610,7 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) int i; mp->m_agfrotor = mp->m_agirotor = 0; - spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock"); + spin_lock_init(&mp->m_agirotor_lock); mp->m_maxagi = mp->m_sb.sb_agcount; mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; @@ -696,7 +690,6 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) uint64_t bfreelst = 0; uint64_t btree = 0; int error; - int s; for (index = 0; index < agcount; index++) { /* @@ -721,11 +714,11 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) /* * Overwrite incore superblock counters with just-read data */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); sbp->sb_ifree = ifree; sbp->sb_icount = ialloc; sbp->sb_fdblocks = bfree + bfreelst + btree; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); /* Fixup the per-cpu counters as well. */ xfs_icsb_reinit_counters(mp); @@ -734,49 +727,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) } /* - * xfs_mountfs - * - * This function does the following on an initial mount of a file system: - * - reads the superblock from disk and init the mount struct - * - if we're a 32-bit kernel, do a size check on the superblock - * so we don't mount terabyte filesystems - * - init mount struct realtime fields - * - allocate inode hash table for fs - * - init directory manager - * - perform recovery and init the log manager + * Update alignment values based on mount options and sb values */ -int -xfs_mountfs( - xfs_mount_t *mp, - int mfsi_flags) +STATIC int +xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags) { - xfs_buf_t *bp; xfs_sb_t *sbp = &(mp->m_sb); - xfs_inode_t *rip; - bhv_vnode_t *rvp = NULL; - int readio_log, writeio_log; - xfs_daddr_t d; - __uint64_t resblks; - __int64_t update_flags; - uint quotamount, quotaflags; - int agno; - int uuid_mounted = 0; - int error = 0; - if (mp->m_sb_bp == NULL) { - if ((error = xfs_readsb(mp, mfsi_flags))) { - return error; - } - } - xfs_mount_common(mp, sbp); - - /* - * Check if sb_agblocks is aligned at stripe boundary - * If sb_agblocks is NOT aligned turn off m_dalign since - * allocator alignment is within an ag, therefore ag has - * to be aligned at stripe boundary. - */ - update_flags = 0LL; if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { /* * If stripe unit and stripe width are not multiples @@ -787,8 +744,7 @@ xfs_mountfs( if (mp->m_flags & XFS_MOUNT_RETERR) { cmn_err(CE_WARN, "XFS: alignment check 1 failed"); - error = XFS_ERROR(EINVAL); - goto error1; + return XFS_ERROR(EINVAL); } mp->m_dalign = mp->m_swidth = 0; } else { @@ -798,8 +754,7 @@ xfs_mountfs( mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { if (mp->m_flags & XFS_MOUNT_RETERR) { - error = XFS_ERROR(EINVAL); - goto error1; + return XFS_ERROR(EINVAL); } xfs_fs_cmn_err(CE_WARN, mp, "stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", @@ -816,8 +771,7 @@ xfs_mountfs( "stripe alignment turned off: sunit(%d) less than bsize(%d)", mp->m_dalign, mp->m_blockmask +1); - error = XFS_ERROR(EINVAL); - goto error1; + return XFS_ERROR(EINVAL); } mp->m_swidth = 0; } @@ -830,11 +784,11 @@ xfs_mountfs( if (XFS_SB_VERSION_HASDALIGN(sbp)) { if (sbp->sb_unit != mp->m_dalign) { sbp->sb_unit = mp->m_dalign; - update_flags |= XFS_SB_UNIT; + *update_flags |= XFS_SB_UNIT; } if (sbp->sb_width != mp->m_swidth) { sbp->sb_width = mp->m_swidth; - update_flags |= XFS_SB_WIDTH; + *update_flags |= XFS_SB_WIDTH; } } } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && @@ -843,49 +797,45 @@ xfs_mountfs( mp->m_swidth = sbp->sb_width; } - xfs_alloc_compute_maxlevels(mp); - xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); - xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); - xfs_ialloc_compute_maxlevels(mp); + return 0; +} - if (sbp->sb_imax_pct) { - __uint64_t icount; +/* + * Set the maximum inode count for this filesystem + */ +STATIC void +xfs_set_maxicount(xfs_mount_t *mp) +{ + xfs_sb_t *sbp = &(mp->m_sb); + __uint64_t icount; - /* Make sure the maximum inode count is a multiple of the - * units we allocate inodes in. + if (sbp->sb_imax_pct) { + /* + * Make sure the maximum inode count is a multiple + * of the units we allocate inodes in. */ - icount = sbp->sb_dblocks * sbp->sb_imax_pct; do_div(icount, 100); do_div(icount, mp->m_ialloc_blks); mp->m_maxicount = (icount * mp->m_ialloc_blks) << sbp->sb_inopblog; - } else + } else { mp->m_maxicount = 0; - - mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); - - /* - * XFS uses the uuid from the superblock as the unique - * identifier for fsid. We can not use the uuid from the volume - * since a single partition filesystem is identical to a single - * partition volume/filesystem. - */ - if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && - (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { - if (xfs_uuid_mount(mp)) { - error = XFS_ERROR(EINVAL); - goto error1; - } - uuid_mounted=1; } +} + +/* + * Set the default minimum read and write sizes unless + * already specified in a mount option. + * We use smaller I/O sizes when the file system + * is being used for NFS service (wsync mount option). + */ +STATIC void +xfs_set_rw_sizes(xfs_mount_t *mp) +{ + xfs_sb_t *sbp = &(mp->m_sb); + int readio_log, writeio_log; - /* - * Set the default minimum read and write sizes unless - * already specified in a mount option. - * We use smaller I/O sizes when the file system - * is being used for NFS service (wsync mount option). - */ if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { if (mp->m_flags & XFS_MOUNT_WSYNC) { readio_log = XFS_WSYNC_READIO_LOG; @@ -911,17 +861,14 @@ xfs_mountfs( mp->m_writeio_log = writeio_log; } mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); +} - /* - * Set the inode cluster size. - * This may still be overridden by the file system - * block size if it is larger than the chosen cluster size. - */ - mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; - - /* - * Set whether we're using inode alignment. - */ +/* + * Set whether we're using inode alignment. + */ +STATIC void +xfs_set_inoalignment(xfs_mount_t *mp) +{ if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) @@ -937,14 +884,22 @@ xfs_mountfs( mp->m_sinoalign = mp->m_dalign; else mp->m_sinoalign = 0; - /* - * Check that the data (and log if separate) are an ok size. - */ +} + +/* + * Check that the data (and log if separate) are an ok size. + */ +STATIC int +xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) +{ + xfs_buf_t *bp; + xfs_daddr_t d; + int error; + d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { cmn_err(CE_WARN, "XFS: size check 1 failed"); - error = XFS_ERROR(E2BIG); - goto error1; + return XFS_ERROR(E2BIG); } error = xfs_read_buf(mp, mp->m_ddev_targp, d - XFS_FSS_TO_BB(mp, 1), @@ -953,10 +908,9 @@ xfs_mountfs( xfs_buf_relse(bp); } else { cmn_err(CE_WARN, "XFS: size check 2 failed"); - if (error == ENOSPC) { + if (error == ENOSPC) error = XFS_ERROR(E2BIG); - } - goto error1; + return error; } if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && @@ -964,8 +918,7 @@ xfs_mountfs( d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { cmn_err(CE_WARN, "XFS: size check 3 failed"); - error = XFS_ERROR(E2BIG); - goto error1; + return XFS_ERROR(E2BIG); } error = xfs_read_buf(mp, mp->m_logdev_targp, d - XFS_FSB_TO_BB(mp, 1), @@ -974,17 +927,111 @@ xfs_mountfs( xfs_buf_relse(bp); } else { cmn_err(CE_WARN, "XFS: size check 3 failed"); - if (error == ENOSPC) { + if (error == ENOSPC) error = XFS_ERROR(E2BIG); - } + return error; + } + } + return 0; +} + +/* + * xfs_mountfs + * + * This function does the following on an initial mount of a file system: + * - reads the superblock from disk and init the mount struct + * - if we're a 32-bit kernel, do a size check on the superblock + * so we don't mount terabyte filesystems + * - init mount struct realtime fields + * - allocate inode hash table for fs + * - init directory manager + * - perform recovery and init the log manager + */ +int +xfs_mountfs( + xfs_mount_t *mp, + int mfsi_flags) +{ + xfs_sb_t *sbp = &(mp->m_sb); + xfs_inode_t *rip; + bhv_vnode_t *rvp = NULL; + __uint64_t resblks; + __int64_t update_flags = 0LL; + uint quotamount, quotaflags; + int agno; + int uuid_mounted = 0; + int error = 0; + + if (mp->m_sb_bp == NULL) { + error = xfs_readsb(mp, mfsi_flags); + if (error) + return error; + } + xfs_mount_common(mp, sbp); + + /* + * Check if sb_agblocks is aligned at stripe boundary + * If sb_agblocks is NOT aligned turn off m_dalign since + * allocator alignment is within an ag, therefore ag has + * to be aligned at stripe boundary. + */ + error = xfs_update_alignment(mp, mfsi_flags, &update_flags); + if (error) + goto error1; + + xfs_alloc_compute_maxlevels(mp); + xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); + xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); + xfs_ialloc_compute_maxlevels(mp); + + xfs_set_maxicount(mp); + + mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); + + /* + * XFS uses the uuid from the superblock as the unique + * identifier for fsid. We can not use the uuid from the volume + * since a single partition filesystem is identical to a single + * partition volume/filesystem. + */ + if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && + (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { + if (xfs_uuid_mount(mp)) { + error = XFS_ERROR(EINVAL); goto error1; } + uuid_mounted=1; } /* + * Set the minimum read and write sizes + */ + xfs_set_rw_sizes(mp); + + /* + * Set the inode cluster size. + * This may still be overridden by the file system + * block size if it is larger than the chosen cluster size. + */ + mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; + + /* + * Set inode alignment fields + */ + xfs_set_inoalignment(mp); + + /* + * Check that the data (and log if separate) are an ok size. + */ + error = xfs_check_sizes(mp, mfsi_flags); + if (error) + goto error1; + + /* * Initialize realtime fields in the mount structure */ - if ((error = xfs_rtmount_init(mp))) { + error = xfs_rtmount_init(mp); + if (error) { cmn_err(CE_WARN, "XFS: RT mount failed"); goto error1; } @@ -1102,7 +1149,8 @@ xfs_mountfs( /* * Initialize realtime inode pointers in the mount structure */ - if ((error = xfs_rtmount_inodes(mp))) { + error = xfs_rtmount_inodes(mp); + if (error) { /* * Free up the root inode. */ @@ -1120,7 +1168,8 @@ xfs_mountfs( /* * Initialise the XFS quota management subsystem for this mount */ - if ((error = XFS_QM_INIT(mp, "amount, "aflags))) + error = XFS_QM_INIT(mp, "amount, "aflags); + if (error) goto error4; /* @@ -1137,7 +1186,8 @@ xfs_mountfs( /* * Complete the quota initialisation, post-log-replay component. */ - if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) + error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags); + if (error) goto error4; /* @@ -1255,7 +1305,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) #if defined(DEBUG) || defined(INDUCE_IO_ERROR) xfs_errortag_clearall(mp, 0); #endif - XFS_IODONE(mp); xfs_mount_free(mp); return 0; } @@ -1441,7 +1490,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) * Fields are not allowed to dip below zero, so if the delta would * do this do not apply it and return EINVAL. * - * The SB_LOCK must be held when this routine is called. + * The m_sb_lock must be held when this routine is called. */ int xfs_mod_incore_sb_unlocked( @@ -1606,7 +1655,7 @@ xfs_mod_incore_sb_unlocked( /* * xfs_mod_incore_sb() is used to change a field in the in-core * superblock structure by the specified delta. This modification - * is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked() + * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked() * routine to do the work. */ int @@ -1616,7 +1665,6 @@ xfs_mod_incore_sb( int64_t delta, int rsvd) { - unsigned long s; int status; /* check for per-cpu counters */ @@ -1633,9 +1681,9 @@ xfs_mod_incore_sb( /* FALLTHROUGH */ #endif default: - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); break; } @@ -1656,7 +1704,6 @@ xfs_mod_incore_sb( int xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) { - unsigned long s; int status=0; xfs_mod_sb_t *msbp; @@ -1664,10 +1711,10 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) * Loop through the array of mod structures and apply each * individually. If any fail, then back out all those * which have already been applied. Do all of this within - * the scope of the SB_LOCK so that all of the changes will + * the scope of the m_sb_lock so that all of the changes will * be atomic. */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); msbp = &msb[0]; for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { /* @@ -1681,11 +1728,11 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) case XFS_SBS_IFREE: case XFS_SBS_FDBLOCKS: if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); status = xfs_icsb_modify_counters(mp, msbp->msb_field, msbp->msb_delta, rsvd); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); break; } /* FALLTHROUGH */ @@ -1719,12 +1766,12 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) case XFS_SBS_IFREE: case XFS_SBS_FDBLOCKS: if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); status = xfs_icsb_modify_counters(mp, msbp->msb_field, -(msbp->msb_delta), rsvd); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); break; } /* FALLTHROUGH */ @@ -1740,7 +1787,7 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) msbp--; } } - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); return status; } @@ -1888,12 +1935,12 @@ xfs_mount_log_sbunit( * * Locking rules: * - * 1. XFS_SB_LOCK() before picking up per-cpu locks + * 1. m_sb_lock before picking up per-cpu locks * 2. per-cpu locks always picked up via for_each_online_cpu() order - * 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks + * 3. accurate counter sync requires m_sb_lock + per cpu locks * 4. modifying per-cpu counters requires holding per-cpu lock - * 5. modifying global counters requires holding XFS_SB_LOCK - * 6. enabling or disabling a counter requires holding the XFS_SB_LOCK + * 5. modifying global counters requires holding m_sb_lock + * 6. enabling or disabling a counter requires holding the m_sb_lock * and _none_ of the per-cpu locks. * * Disabled counters are only ever re-enabled by a balance operation @@ -1920,7 +1967,6 @@ xfs_icsb_cpu_notify( { xfs_icsb_cnts_t *cntp; xfs_mount_t *mp; - int s; mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); cntp = (xfs_icsb_cnts_t *) @@ -1946,7 +1992,7 @@ xfs_icsb_cpu_notify( * count into the total on the global superblock and * re-enable the counters. */ xfs_icsb_lock(mp); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); @@ -1963,7 +2009,7 @@ xfs_icsb_cpu_notify( XFS_ICSB_SB_LOCKED, 0); xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED, 0); - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_icsb_unlock(mp); break; } @@ -2194,11 +2240,10 @@ xfs_icsb_sync_counters_flags( int flags) { xfs_icsb_cnts_t cnt; - int s; /* Pass 1: lock all counters */ if ((flags & XFS_ICSB_SB_LOCKED) == 0) - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); xfs_icsb_count(mp, &cnt, flags); @@ -2211,7 +2256,7 @@ xfs_icsb_sync_counters_flags( mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; if ((flags & XFS_ICSB_SB_LOCKED) == 0) - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } /* @@ -2252,11 +2297,10 @@ xfs_icsb_balance_counter( { uint64_t count, resid; int weight = num_online_cpus(); - int s; uint64_t min = (uint64_t)min_per_cpu; if (!(flags & XFS_ICSB_SB_LOCKED)) - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); /* disable counter and sync counter */ xfs_icsb_disable_counter(mp, field); @@ -2290,10 +2334,10 @@ xfs_icsb_balance_counter( xfs_icsb_enable_counter(mp, field, count, resid); out: if (!(flags & XFS_ICSB_SB_LOCKED)) - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } -int +STATIC int xfs_icsb_modify_counters( xfs_mount_t *mp, xfs_sb_field_t field, @@ -2302,7 +2346,7 @@ xfs_icsb_modify_counters( { xfs_icsb_cnts_t *icsbp; long long lcounter; /* long counter for 64 bit fields */ - int cpu, ret = 0, s; + int cpu, ret = 0; might_sleep(); again: @@ -2380,15 +2424,15 @@ slow_path: * running atomically here, we know a rebalance cannot * be in progress. Hence we can go straight to operating * on the global superblock. We do not call xfs_mod_incore_sb() - * here even though we need to get the SB_LOCK. Doing so + * here even though we need to get the m_sb_lock. Doing so * will cause us to re-enter this function and deadlock. - * Hence we get the SB_LOCK ourselves and then call + * Hence we get the m_sb_lock ourselves and then call * xfs_mod_incore_sb_unlocked() as the unlocked path operates * directly on the global counters. */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); /* * Now that we've modified the global superblock, we diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index c618f7cb5f0e..f7c620ec6e69 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -56,20 +56,12 @@ struct cred; struct log; struct xfs_mount_args; struct xfs_inode; -struct xfs_iocore; struct xfs_bmbt_irec; struct xfs_bmap_free; struct xfs_extdelta; struct xfs_swapext; struct xfs_mru_cache; -#define AIL_LOCK_T lock_t -#define AIL_LOCKINIT(x,y) spinlock_init(x,y) -#define AIL_LOCK_DESTROY(x) spinlock_destroy(x) -#define AIL_LOCK(mp,s) s=mutex_spinlock(&(mp)->m_ail_lock) -#define AIL_UNLOCK(mp,s) mutex_spinunlock(&(mp)->m_ail_lock, s) - - /* * Prototypes and functions for the Data Migration subsystem. */ @@ -196,105 +188,6 @@ typedef struct xfs_qmops { #define XFS_QM_QUOTACTL(mp, cmd, id, addr) \ (*(mp)->m_qm_ops->xfs_quotactl)(mp, cmd, id, addr) - -/* - * Prototypes and functions for I/O core modularization. - */ - -typedef int (*xfs_ioinit_t)(struct xfs_mount *, - struct xfs_mount_args *, int); -typedef int (*xfs_bmapi_t)(struct xfs_trans *, void *, - xfs_fileoff_t, xfs_filblks_t, int, - xfs_fsblock_t *, xfs_extlen_t, - struct xfs_bmbt_irec *, int *, - struct xfs_bmap_free *, struct xfs_extdelta *); -typedef int (*xfs_bunmapi_t)(struct xfs_trans *, - void *, xfs_fileoff_t, - xfs_filblks_t, int, xfs_extnum_t, - xfs_fsblock_t *, struct xfs_bmap_free *, - struct xfs_extdelta *, int *); -typedef int (*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *); -typedef int (*xfs_iomap_write_direct_t)( - void *, xfs_off_t, size_t, int, - struct xfs_bmbt_irec *, int *, int); -typedef int (*xfs_iomap_write_delay_t)( - void *, xfs_off_t, size_t, int, - struct xfs_bmbt_irec *, int *); -typedef int (*xfs_iomap_write_allocate_t)( - void *, xfs_off_t, size_t, - struct xfs_bmbt_irec *, int *); -typedef int (*xfs_iomap_write_unwritten_t)( - void *, xfs_off_t, size_t); -typedef uint (*xfs_lck_map_shared_t)(void *); -typedef void (*xfs_lock_t)(void *, uint); -typedef void (*xfs_lock_demote_t)(void *, uint); -typedef int (*xfs_lock_nowait_t)(void *, uint); -typedef void (*xfs_unlk_t)(void *, unsigned int); -typedef xfs_fsize_t (*xfs_size_t)(void *); -typedef xfs_fsize_t (*xfs_iodone_t)(struct xfs_mount *); -typedef int (*xfs_swap_extents_t)(void *, void *, - struct xfs_swapext*); - -typedef struct xfs_ioops { - xfs_ioinit_t xfs_ioinit; - xfs_bmapi_t xfs_bmapi_func; - xfs_bunmapi_t xfs_bunmapi_func; - xfs_bmap_eof_t xfs_bmap_eof_func; - xfs_iomap_write_direct_t xfs_iomap_write_direct; - xfs_iomap_write_delay_t xfs_iomap_write_delay; - xfs_iomap_write_allocate_t xfs_iomap_write_allocate; - xfs_iomap_write_unwritten_t xfs_iomap_write_unwritten; - xfs_lock_t xfs_ilock; - xfs_lck_map_shared_t xfs_lck_map_shared; - xfs_lock_demote_t xfs_ilock_demote; - xfs_lock_nowait_t xfs_ilock_nowait; - xfs_unlk_t xfs_unlock; - xfs_size_t xfs_size_func; - xfs_iodone_t xfs_iodone; - xfs_swap_extents_t xfs_swap_extents_func; -} xfs_ioops_t; - -#define XFS_IOINIT(mp, args, flags) \ - (*(mp)->m_io_ops.xfs_ioinit)(mp, args, flags) -#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist,delta) \ - (*(mp)->m_io_ops.xfs_bmapi_func) \ - (trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist,delta) -#define XFS_BUNMAPI(mp, trans,io,bno,len,f,nexts,first,flist,delta,done) \ - (*(mp)->m_io_ops.xfs_bunmapi_func) \ - (trans,(io)->io_obj,bno,len,f,nexts,first,flist,delta,done) -#define XFS_BMAP_EOF(mp, io, endoff, whichfork, eof) \ - (*(mp)->m_io_ops.xfs_bmap_eof_func) \ - ((io)->io_obj, endoff, whichfork, eof) -#define XFS_IOMAP_WRITE_DIRECT(mp, io, offset, count, flags, mval, nmap, found)\ - (*(mp)->m_io_ops.xfs_iomap_write_direct) \ - ((io)->io_obj, offset, count, flags, mval, nmap, found) -#define XFS_IOMAP_WRITE_DELAY(mp, io, offset, count, flags, mval, nmap) \ - (*(mp)->m_io_ops.xfs_iomap_write_delay) \ - ((io)->io_obj, offset, count, flags, mval, nmap) -#define XFS_IOMAP_WRITE_ALLOCATE(mp, io, offset, count, mval, nmap) \ - (*(mp)->m_io_ops.xfs_iomap_write_allocate) \ - ((io)->io_obj, offset, count, mval, nmap) -#define XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count) \ - (*(mp)->m_io_ops.xfs_iomap_write_unwritten) \ - ((io)->io_obj, offset, count) -#define XFS_LCK_MAP_SHARED(mp, io) \ - (*(mp)->m_io_ops.xfs_lck_map_shared)((io)->io_obj) -#define XFS_ILOCK(mp, io, mode) \ - (*(mp)->m_io_ops.xfs_ilock)((io)->io_obj, mode) -#define XFS_ILOCK_NOWAIT(mp, io, mode) \ - (*(mp)->m_io_ops.xfs_ilock_nowait)((io)->io_obj, mode) -#define XFS_IUNLOCK(mp, io, mode) \ - (*(mp)->m_io_ops.xfs_unlock)((io)->io_obj, mode) -#define XFS_ILOCK_DEMOTE(mp, io, mode) \ - (*(mp)->m_io_ops.xfs_ilock_demote)((io)->io_obj, mode) -#define XFS_SIZE(mp, io) \ - (*(mp)->m_io_ops.xfs_size_func)((io)->io_obj) -#define XFS_IODONE(mp) \ - (*(mp)->m_io_ops.xfs_iodone)(mp) -#define XFS_SWAP_EXTENTS(mp, io, tio, sxp) \ - (*(mp)->m_io_ops.xfs_swap_extents_func) \ - ((io)->io_obj, (tio)->io_obj, sxp) - #ifdef HAVE_PERCPU_SB /* @@ -326,14 +219,20 @@ extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int); #define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0) #endif +typedef struct xfs_ail { + xfs_ail_entry_t xa_ail; + uint xa_gen; + struct task_struct *xa_task; + xfs_lsn_t xa_target; +} xfs_ail_t; + typedef struct xfs_mount { struct super_block *m_super; xfs_tid_t m_tid; /* next unused tid for fs */ - AIL_LOCK_T m_ail_lock; /* fs AIL mutex */ - xfs_ail_entry_t m_ail; /* fs active log item list */ - uint m_ail_gen; /* fs AIL generation count */ + spinlock_t m_ail_lock; /* fs AIL mutex */ + xfs_ail_t m_ail; /* fs active log item list */ xfs_sb_t m_sb; /* copy of fs superblock */ - lock_t m_sb_lock; /* sb counter mutex */ + spinlock_t m_sb_lock; /* sb counter lock */ struct xfs_buf *m_sb_bp; /* buffer for superblock */ char *m_fsname; /* filesystem name */ int m_fsname_len; /* strlen of fs name */ @@ -342,7 +241,7 @@ typedef struct xfs_mount { int m_bsize; /* fs logical block size */ xfs_agnumber_t m_agfrotor; /* last ag where space found */ xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ - lock_t m_agirotor_lock;/* .. and lock protecting it */ + spinlock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ struct xfs_inode *m_inodes; /* active inode list */ struct list_head m_del_inodes; /* inodes to reclaim */ @@ -423,7 +322,6 @@ typedef struct xfs_mount { * hash table */ struct xfs_dmops *m_dm_ops; /* vector of DMI ops */ struct xfs_qmops *m_qm_ops; /* vector of XQM ops */ - struct xfs_ioops m_io_ops; /* vector of I/O ops */ atomic_t m_active_trans; /* number trans frozen */ #ifdef HAVE_PERCPU_SB xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */ @@ -610,8 +508,6 @@ typedef struct xfs_mod_sb { #define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) #define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) -#define XFS_SB_LOCK(mp) mutex_spinlock(&(mp)->m_sb_lock) -#define XFS_SB_UNLOCK(mp,s) mutex_spinunlock(&(mp)->m_sb_lock,(s)) extern xfs_mount_t *xfs_mount_init(void); extern void xfs_mod_sb(xfs_trans_t *, __int64_t); @@ -646,7 +542,6 @@ extern int xfs_qmops_get(struct xfs_mount *, struct xfs_mount_args *); extern void xfs_qmops_put(struct xfs_mount *); extern struct xfs_dmops xfs_dmcore_xfs; -extern struct xfs_ioops xfs_iocore_xfs; extern int xfs_init(void); extern void xfs_cleanup(void); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index e0b358c1c533..a0b2c0a2589a 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -225,10 +225,14 @@ _xfs_mru_cache_list_insert( * list need to be deleted. For each element this involves removing it from the * data store, removing it from the reap list, calling the client's free * function and deleting the element from the element zone. + * + * We get called holding the mru->lock, which we drop and then reacquire. + * Sparse need special help with this to tell it we know what we are doing. */ STATIC void _xfs_mru_cache_clear_reap_list( - xfs_mru_cache_t *mru) + xfs_mru_cache_t *mru) __releases(mru->lock) __acquires(mru->lock) + { xfs_mru_cache_elem_t *elem, *next; struct list_head tmp; @@ -245,7 +249,7 @@ _xfs_mru_cache_clear_reap_list( */ list_move(&elem->list_node, &tmp); } - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); list_for_each_entry_safe(elem, next, &tmp, list_node) { @@ -259,7 +263,7 @@ _xfs_mru_cache_clear_reap_list( kmem_zone_free(xfs_mru_elem_zone, elem); } - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); } /* @@ -280,7 +284,7 @@ _xfs_mru_cache_reap( if (!mru || !mru->lists) return; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); next = _xfs_mru_cache_migrate(mru, jiffies); _xfs_mru_cache_clear_reap_list(mru); @@ -294,7 +298,7 @@ _xfs_mru_cache_reap( queue_delayed_work(xfs_mru_reap_wq, &mru->work, next); } - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); } int @@ -368,7 +372,7 @@ xfs_mru_cache_create( */ INIT_RADIX_TREE(&mru->store, GFP_ATOMIC); INIT_LIST_HEAD(&mru->reap_list); - spinlock_init(&mru->lock, "xfs_mru_cache"); + spin_lock_init(&mru->lock); INIT_DELAYED_WORK(&mru->work, _xfs_mru_cache_reap); mru->grp_time = grp_time; @@ -398,17 +402,17 @@ xfs_mru_cache_flush( if (!mru || !mru->lists) return; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); if (mru->queued) { - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); } _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time); _xfs_mru_cache_clear_reap_list(mru); - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); } void @@ -454,13 +458,13 @@ xfs_mru_cache_insert( elem->key = key; elem->value = value; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); radix_tree_insert(&mru->store, key, elem); radix_tree_preload_end(); _xfs_mru_cache_list_insert(mru, elem); - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); return 0; } @@ -483,14 +487,14 @@ xfs_mru_cache_remove( if (!mru || !mru->lists) return NULL; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); elem = radix_tree_delete(&mru->store, key); if (elem) { value = elem->value; list_del(&elem->list_node); } - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); if (elem) kmem_zone_free(xfs_mru_elem_zone, elem); @@ -528,6 +532,10 @@ xfs_mru_cache_delete( * * If the element isn't found, this function returns NULL and the spinlock is * released. xfs_mru_cache_done() should NOT be called when this occurs. + * + * Because sparse isn't smart enough to know about conditional lock return + * status, we need to help it get it right by annotating the path that does + * not release the lock. */ void * xfs_mru_cache_lookup( @@ -540,14 +548,14 @@ xfs_mru_cache_lookup( if (!mru || !mru->lists) return NULL; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); elem = radix_tree_lookup(&mru->store, key); if (elem) { list_del(&elem->list_node); _xfs_mru_cache_list_insert(mru, elem); - } - else - mutex_spinunlock(&mru->lock, 0); + __release(mru_lock); /* help sparse not be stupid */ + } else + spin_unlock(&mru->lock); return elem ? elem->value : NULL; } @@ -571,10 +579,12 @@ xfs_mru_cache_peek( if (!mru || !mru->lists) return NULL; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); elem = radix_tree_lookup(&mru->store, key); if (!elem) - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); + else + __release(mru_lock); /* help sparse not be stupid */ return elem ? elem->value : NULL; } @@ -586,7 +596,7 @@ xfs_mru_cache_peek( */ void xfs_mru_cache_done( - xfs_mru_cache_t *mru) + xfs_mru_cache_t *mru) __releases(mru->lock) { - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); } diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c index 2ec1d8a27352..a294e58db8dd 100644 --- a/fs/xfs/xfs_qmops.c +++ b/fs/xfs/xfs_qmops.c @@ -49,18 +49,17 @@ xfs_mount_reset_sbqflags(xfs_mount_t *mp) { int error; xfs_trans_t *tp; - unsigned long s; mp->m_qflags = 0; /* * It is OK to look at sb_qflags here in mount path, - * without SB_LOCK. + * without m_sb_lock. */ if (mp->m_sb.sb_qflags == 0) return 0; - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); mp->m_sb.sb_qflags = 0; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); /* * if the fs is readonly, let the incore superblock run diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 44ea0ba36476..7eb157a59f9e 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -39,6 +39,7 @@ #include "xfs_refcache.h" #include "xfs_utils.h" #include "xfs_trans_space.h" +#include "xfs_vnodeops.h" /* @@ -118,7 +119,7 @@ xfs_lock_for_rename( inum1 = ip1->i_ino; ASSERT(ip1); - ITRACE(ip1); + xfs_itrace_ref(ip1); /* * Unlock dp1 and lock dp2 if they are different. @@ -141,7 +142,7 @@ xfs_lock_for_rename( IRELE (ip1); return error; } else { - ITRACE(ip2); + xfs_itrace_ref(ip2); } /* @@ -247,8 +248,8 @@ xfs_rename( int src_namelen = VNAMELEN(src_vname); int target_namelen = VNAMELEN(target_vname); - vn_trace_entry(src_dp, "xfs_rename", (inst_t *)__return_address); - vn_trace_entry(xfs_vtoi(target_dir_vp), "xfs_rename", (inst_t *)__return_address); + xfs_itrace_entry(src_dp); + xfs_itrace_entry(xfs_vtoi(target_dir_vp)); /* * Find the XFS behavior descriptor for the target directory diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 47082c01872d..ca83ddf72af4 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -73,18 +73,6 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int, */ /* - * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set. - */ -STATIC int -xfs_lowbit32( - __uint32_t v) -{ - if (v) - return ffs(v) - 1; - return -1; -} - -/* * Allocate space to the bitmap or summary file, and zero it, for growfs. */ STATIC int /* error */ @@ -444,6 +432,7 @@ xfs_rtallocate_extent_near( } bbno = XFS_BITTOBLOCK(mp, bno); i = 0; + ASSERT(minlen != 0); log2len = xfs_highbit32(minlen); /* * Loop over all bitmap blocks (bbno + i is current block). @@ -612,6 +601,8 @@ xfs_rtallocate_extent_size( xfs_suminfo_t sum; /* summary information for extents */ ASSERT(minlen % prod == 0 && maxlen % prod == 0); + ASSERT(maxlen != 0); + /* * Loop over all the levels starting with maxlen. * At each level, look at all the bitmap blocks, to see if there @@ -669,6 +660,9 @@ xfs_rtallocate_extent_size( *rtblock = NULLRTBLOCK; return 0; } + ASSERT(minlen != 0); + ASSERT(maxlen != 0); + /* * Loop over sizes, from maxlen down to minlen. * This time, when we do the allocations, allow smaller ones @@ -1954,6 +1948,7 @@ xfs_growfs_rt( nsbp->sb_blocksize * nsbp->sb_rextsize); nsbp->sb_rextents = nsbp->sb_rblocks; do_div(nsbp->sb_rextents, nsbp->sb_rextsize); + ASSERT(nsbp->sb_rextents != 0); nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; nrsumsize = diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 799c1f871263..8d8dcd215716 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -21,8 +21,6 @@ struct xfs_mount; struct xfs_trans; -#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) - /* Min and max rt extent sizes, specified in bytes */ #define XFS_MAX_RTEXTSIZE (1024 * 1024 * 1024) /* 1GB */ #define XFS_DFL_RTEXTSIZE (64 * 1024) /* 64KB */ diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index 49875e1d129f..f87db5344ce6 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -32,18 +32,10 @@ struct xfs_mount; static inline xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) { - return (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) ? \ + return (XFS_IS_REALTIME_INODE(ip) ? \ (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); } -#define XFS_FSB_TO_DB_IO(io,fsb) xfs_fsb_to_db_io(io,fsb) -static inline xfs_daddr_t -xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb) -{ - return (((io)->io_flags & XFS_IOCORE_RT) ? \ - XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \ - XFS_FSB_TO_DADDR((io)->io_mount, (fsb))); -} /* * Flags for xfs_free_eofblocks @@ -61,7 +53,7 @@ xfs_get_extsz_hint( { xfs_extlen_t extsz; - if (unlikely(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { + if (unlikely(XFS_IS_REALTIME_INODE(ip))) { extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) ? ip->i_d.di_extsize : ip->i_mount->m_sb.sb_rextsize; diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 8878322ee793..140386434aa3 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -567,26 +567,26 @@ xfs_trans_apply_sb_deltas( */ if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) { if (tp->t_icount_delta) - be64_add(&sbp->sb_icount, tp->t_icount_delta); + be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta); if (tp->t_ifree_delta) - be64_add(&sbp->sb_ifree, tp->t_ifree_delta); + be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta); if (tp->t_fdblocks_delta) - be64_add(&sbp->sb_fdblocks, tp->t_fdblocks_delta); + be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta); if (tp->t_res_fdblocks_delta) - be64_add(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta); + be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta); } if (tp->t_frextents_delta) - be64_add(&sbp->sb_frextents, tp->t_frextents_delta); + be64_add_cpu(&sbp->sb_frextents, tp->t_frextents_delta); if (tp->t_res_frextents_delta) - be64_add(&sbp->sb_frextents, tp->t_res_frextents_delta); + be64_add_cpu(&sbp->sb_frextents, tp->t_res_frextents_delta); if (tp->t_dblocks_delta) { - be64_add(&sbp->sb_dblocks, tp->t_dblocks_delta); + be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta); whole = 1; } if (tp->t_agcount_delta) { - be32_add(&sbp->sb_agcount, tp->t_agcount_delta); + be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta); whole = 1; } if (tp->t_imaxpct_delta) { @@ -594,19 +594,19 @@ xfs_trans_apply_sb_deltas( whole = 1; } if (tp->t_rextsize_delta) { - be32_add(&sbp->sb_rextsize, tp->t_rextsize_delta); + be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta); whole = 1; } if (tp->t_rbmblocks_delta) { - be32_add(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta); + be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta); whole = 1; } if (tp->t_rblocks_delta) { - be64_add(&sbp->sb_rblocks, tp->t_rblocks_delta); + be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta); whole = 1; } if (tp->t_rextents_delta) { - be64_add(&sbp->sb_rextents, tp->t_rextents_delta); + be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta); whole = 1; } if (tp->t_rextslog_delta) { @@ -1322,7 +1322,6 @@ xfs_trans_chunk_committed( xfs_lsn_t item_lsn; struct xfs_mount *mp; int i; - SPLDECL(s); lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { @@ -1363,7 +1362,7 @@ xfs_trans_chunk_committed( * the test below. */ mp = lip->li_mountp; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { /* * This will set the item's lsn to item_lsn @@ -1372,9 +1371,9 @@ xfs_trans_chunk_committed( * * xfs_trans_update_ail() drops the AIL lock. */ - xfs_trans_update_ail(mp, lip, item_lsn, s); + xfs_trans_update_ail(mp, lip, item_lsn); } else { - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 0e26e729023e..7f40628d85c7 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -992,8 +992,9 @@ int _xfs_trans_commit(xfs_trans_t *, int *); #define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) void xfs_trans_cancel(xfs_trans_t *, int); -void xfs_trans_ail_init(struct xfs_mount *); -xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); +int xfs_trans_ail_init(struct xfs_mount *); +void xfs_trans_ail_destroy(struct xfs_mount *); +void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *); void xfs_trans_unlocked_item(struct xfs_mount *, xfs_log_item_t *); @@ -1001,6 +1002,8 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx); +extern kmem_zone_t *xfs_trans_zone; + #endif /* __KERNEL__ */ #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 5b2ff59f19cf..4d6330eddc8d 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -34,9 +34,9 @@ STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_entry_t *); STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_entry_t *, xfs_log_item_t *); #ifdef DEBUG -STATIC void xfs_ail_check(xfs_ail_entry_t *); +STATIC void xfs_ail_check(xfs_ail_entry_t *, xfs_log_item_t *); #else -#define xfs_ail_check(a) +#define xfs_ail_check(a,l) #endif /* DEBUG */ @@ -55,16 +55,15 @@ xfs_trans_tail_ail( { xfs_lsn_t lsn; xfs_log_item_t *lip; - SPLDECL(s); - AIL_LOCK(mp,s); - lip = xfs_ail_min(&(mp->m_ail)); + spin_lock(&mp->m_ail_lock); + lip = xfs_ail_min(&(mp->m_ail.xa_ail)); if (lip == NULL) { lsn = (xfs_lsn_t)0; } else { lsn = lip->li_lsn; } - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); return lsn; } @@ -72,120 +71,185 @@ xfs_trans_tail_ail( /* * xfs_trans_push_ail * - * This routine is called to move the tail of the AIL - * forward. It does this by trying to flush items in the AIL - * whose lsns are below the given threshold_lsn. + * This routine is called to move the tail of the AIL forward. It does this by + * trying to flush items in the AIL whose lsns are below the given + * threshold_lsn. * - * The routine returns the lsn of the tail of the log. + * the push is run asynchronously in a separate thread, so we return the tail + * of the log right now instead of the tail after the push. This means we will + * either continue right away, or we will sleep waiting on the async thread to + * do it's work. + * + * We do this unlocked - we only need to know whether there is anything in the + * AIL at the time we are called. We don't need to access the contents of + * any of the objects, so the lock is not needed. */ -xfs_lsn_t +void xfs_trans_push_ail( xfs_mount_t *mp, xfs_lsn_t threshold_lsn) { - xfs_lsn_t lsn; xfs_log_item_t *lip; - int gen; - int restarts; - int lock_result; - int flush_log; - SPLDECL(s); -#define XFS_TRANS_PUSH_AIL_RESTARTS 1000 + lip = xfs_ail_min(&mp->m_ail.xa_ail); + if (lip && !XFS_FORCED_SHUTDOWN(mp)) { + if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) + xfsaild_wakeup(mp, threshold_lsn); + } +} + +/* + * Return the item in the AIL with the current lsn. + * Return the current tree generation number for use + * in calls to xfs_trans_next_ail(). + */ +STATIC xfs_log_item_t * +xfs_trans_first_push_ail( + xfs_mount_t *mp, + int *gen, + xfs_lsn_t lsn) +{ + xfs_log_item_t *lip; + + lip = xfs_ail_min(&(mp->m_ail.xa_ail)); + *gen = (int)mp->m_ail.xa_gen; + if (lsn == 0) + return lip; + + while (lip && (XFS_LSN_CMP(lip->li_lsn, lsn) < 0)) + lip = lip->li_ail.ail_forw; - AIL_LOCK(mp,s); - lip = xfs_trans_first_ail(mp, &gen); - if (lip == NULL || XFS_FORCED_SHUTDOWN(mp)) { + return lip; +} + +/* + * Function that does the work of pushing on the AIL + */ +long +xfsaild_push( + xfs_mount_t *mp, + xfs_lsn_t *last_lsn) +{ + long tout = 1000; /* milliseconds */ + xfs_lsn_t last_pushed_lsn = *last_lsn; + xfs_lsn_t target = mp->m_ail.xa_target; + xfs_lsn_t lsn; + xfs_log_item_t *lip; + int gen; + int restarts; + int flush_log, count, stuck; + +#define XFS_TRANS_PUSH_AIL_RESTARTS 10 + + spin_lock(&mp->m_ail_lock); + lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn); + if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* - * Just return if the AIL is empty. + * AIL is empty or our push has reached the end. */ - AIL_UNLOCK(mp, s); - return (xfs_lsn_t)0; + spin_unlock(&mp->m_ail_lock); + last_pushed_lsn = 0; + goto out; } XFS_STATS_INC(xs_push_ail); /* * While the item we are looking at is below the given threshold - * try to flush it out. Make sure to limit the number of times - * we allow xfs_trans_next_ail() to restart scanning from the - * beginning of the list. We'd like not to stop until we've at least + * try to flush it out. We'd like not to stop until we've at least * tried to push on everything in the AIL with an LSN less than - * the given threshold. However, we may give up before that if - * we realize that we've been holding the AIL_LOCK for 'too long', - * blocking interrupts. Currently, too long is < 500us roughly. + * the given threshold. + * + * However, we will stop after a certain number of pushes and wait + * for a reduced timeout to fire before pushing further. This + * prevents use from spinning when we can't do anything or there is + * lots of contention on the AIL lists. */ - flush_log = 0; - restarts = 0; - while (((restarts < XFS_TRANS_PUSH_AIL_RESTARTS) && - (XFS_LSN_CMP(lip->li_lsn, threshold_lsn) < 0))) { + tout = 10; + lsn = lip->li_lsn; + flush_log = stuck = count = restarts = 0; + while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { + int lock_result; /* - * If we can lock the item without sleeping, unlock - * the AIL lock and flush the item. Then re-grab the - * AIL lock so we can look for the next item on the - * AIL. Since we unlock the AIL while we flush the - * item, the next routine may start over again at the - * the beginning of the list if anything has changed. - * That is what the generation count is for. + * If we can lock the item without sleeping, unlock the AIL + * lock and flush the item. Then re-grab the AIL lock so we + * can look for the next item on the AIL. List changes are + * handled by the AIL lookup functions internally * - * If we can't lock the item, either its holder will flush - * it or it is already being flushed or it is being relogged. - * In any of these case it is being taken care of and we - * can just skip to the next item in the list. + * If we can't lock the item, either its holder will flush it + * or it is already being flushed or it is being relogged. In + * any of these case it is being taken care of and we can just + * skip to the next item in the list. */ lock_result = IOP_TRYLOCK(lip); + spin_unlock(&mp->m_ail_lock); switch (lock_result) { - case XFS_ITEM_SUCCESS: - AIL_UNLOCK(mp, s); + case XFS_ITEM_SUCCESS: XFS_STATS_INC(xs_push_ail_success); IOP_PUSH(lip); - AIL_LOCK(mp,s); + last_pushed_lsn = lsn; break; - case XFS_ITEM_PUSHBUF: - AIL_UNLOCK(mp, s); + case XFS_ITEM_PUSHBUF: XFS_STATS_INC(xs_push_ail_pushbuf); -#ifdef XFSRACEDEBUG - delay_for_intr(); - delay(300); -#endif - ASSERT(lip->li_ops->iop_pushbuf); - ASSERT(lip); IOP_PUSHBUF(lip); - AIL_LOCK(mp,s); + last_pushed_lsn = lsn; break; - case XFS_ITEM_PINNED: + case XFS_ITEM_PINNED: XFS_STATS_INC(xs_push_ail_pinned); + stuck++; flush_log = 1; break; - case XFS_ITEM_LOCKED: + case XFS_ITEM_LOCKED: XFS_STATS_INC(xs_push_ail_locked); + last_pushed_lsn = lsn; + stuck++; break; - case XFS_ITEM_FLUSHING: + case XFS_ITEM_FLUSHING: XFS_STATS_INC(xs_push_ail_flushing); + last_pushed_lsn = lsn; + stuck++; break; - default: + default: ASSERT(0); break; } - lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); - if (lip == NULL) { + spin_lock(&mp->m_ail_lock); + /* should we bother continuing? */ + if (XFS_FORCED_SHUTDOWN(mp)) + break; + ASSERT(mp->m_log); + + count++; + + /* + * Are there too many items we can't do anything with? + * If we we are skipping too many items because we can't flush + * them or they are already being flushed, we back off and + * given them time to complete whatever operation is being + * done. i.e. remove pressure from the AIL while we can't make + * progress so traversals don't slow down further inserts and + * removals to/from the AIL. + * + * The value of 100 is an arbitrary magic number based on + * observation. + */ + if (stuck > 100) break; - } - if (XFS_FORCED_SHUTDOWN(mp)) { - /* - * Just return if we shut down during the last try. - */ - AIL_UNLOCK(mp, s); - return (xfs_lsn_t)0; - } + lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); + if (lip == NULL) + break; + if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS) + break; + lsn = lip->li_lsn; } + spin_unlock(&mp->m_ail_lock); if (flush_log) { /* @@ -193,22 +257,35 @@ xfs_trans_push_ail( * push out the log so it will become unpinned and * move forward in the AIL. */ - AIL_UNLOCK(mp, s); XFS_STATS_INC(xs_push_ail_flush); xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - AIL_LOCK(mp, s); } - lip = xfs_ail_min(&(mp->m_ail)); - if (lip == NULL) { - lsn = (xfs_lsn_t)0; - } else { - lsn = lip->li_lsn; + /* + * We reached the target so wait a bit longer for I/O to complete and + * remove pushed items from the AIL before we start the next scan from + * the start of the AIL. + */ + if ((XFS_LSN_CMP(lsn, target) >= 0)) { + tout += 20; + last_pushed_lsn = 0; + } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) || + (count && ((stuck * 100) / count > 90))) { + /* + * Either there is a lot of contention on the AIL or we + * are stuck due to operations in progress. "Stuck" in this + * case is defined as >90% of the items we tried to push + * were stuck. + * + * Backoff a bit more to allow some I/O to complete before + * continuing from where we were. + */ + tout += 10; } - - AIL_UNLOCK(mp, s); - return lsn; -} /* xfs_trans_push_ail */ +out: + *last_lsn = last_pushed_lsn; + return tout; +} /* xfsaild_push */ /* @@ -249,7 +326,7 @@ xfs_trans_unlocked_item( * the call to xfs_log_move_tail() doesn't do anything if there's * not enough free space to wake people up so we're safe calling it. */ - min_lip = xfs_ail_min(&mp->m_ail); + min_lip = xfs_ail_min(&mp->m_ail.xa_ail); if (min_lip == lip) xfs_log_move_tail(mp, 1); @@ -269,21 +346,19 @@ xfs_trans_unlocked_item( * has changed. * * This function must be called with the AIL lock held. The lock - * is dropped before returning, so the caller must pass in the - * cookie returned by AIL_LOCK. + * is dropped before returning. */ void xfs_trans_update_ail( xfs_mount_t *mp, xfs_log_item_t *lip, - xfs_lsn_t lsn, - unsigned long s) __releases(mp->m_ail_lock) + xfs_lsn_t lsn) __releases(mp->m_ail_lock) { xfs_ail_entry_t *ailp; xfs_log_item_t *dlip=NULL; xfs_log_item_t *mlip; /* ptr to minimum lip */ - ailp = &(mp->m_ail); + ailp = &(mp->m_ail.xa_ail); mlip = xfs_ail_min(ailp); if (lip->li_flags & XFS_LI_IN_AIL) { @@ -296,14 +371,14 @@ xfs_trans_update_ail( lip->li_lsn = lsn; xfs_ail_insert(ailp, lip); - mp->m_ail_gen++; + mp->m_ail.xa_gen++; if (mlip == dlip) { - mlip = xfs_ail_min(&(mp->m_ail)); - AIL_UNLOCK(mp, s); + mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); + spin_unlock(&mp->m_ail_lock); xfs_log_move_tail(mp, mlip->li_lsn); } else { - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } @@ -322,21 +397,19 @@ xfs_trans_update_ail( * has changed. * * This function must be called with the AIL lock held. The lock - * is dropped before returning, so the caller must pass in the - * cookie returned by AIL_LOCK. + * is dropped before returning. */ void xfs_trans_delete_ail( xfs_mount_t *mp, - xfs_log_item_t *lip, - unsigned long s) __releases(mp->m_ail_lock) + xfs_log_item_t *lip) __releases(mp->m_ail_lock) { xfs_ail_entry_t *ailp; xfs_log_item_t *dlip; xfs_log_item_t *mlip; if (lip->li_flags & XFS_LI_IN_AIL) { - ailp = &(mp->m_ail); + ailp = &(mp->m_ail.xa_ail); mlip = xfs_ail_min(ailp); dlip = xfs_ail_delete(ailp, lip); ASSERT(dlip == lip); @@ -344,14 +417,14 @@ xfs_trans_delete_ail( lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; - mp->m_ail_gen++; + mp->m_ail.xa_gen++; if (mlip == dlip) { - mlip = xfs_ail_min(&(mp->m_ail)); - AIL_UNLOCK(mp, s); + mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); + spin_unlock(&mp->m_ail_lock); xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); } else { - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } } else { @@ -360,12 +433,12 @@ xfs_trans_delete_ail( * serious trouble if we get to this stage. */ if (XFS_FORCED_SHUTDOWN(mp)) - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); else { xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, "%s: attempting to delete a log item that is not in the AIL", __FUNCTION__); - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } } @@ -385,10 +458,10 @@ xfs_trans_first_ail( { xfs_log_item_t *lip; - lip = xfs_ail_min(&(mp->m_ail)); - *gen = (int)mp->m_ail_gen; + lip = xfs_ail_min(&(mp->m_ail.xa_ail)); + *gen = (int)mp->m_ail.xa_gen; - return (lip); + return lip; } /* @@ -408,11 +481,11 @@ xfs_trans_next_ail( xfs_log_item_t *nlip; ASSERT(mp && lip && gen); - if (mp->m_ail_gen == *gen) { - nlip = xfs_ail_next(&(mp->m_ail), lip); + if (mp->m_ail.xa_gen == *gen) { + nlip = xfs_ail_next(&(mp->m_ail.xa_ail), lip); } else { - nlip = xfs_ail_min(&(mp->m_ail)); - *gen = (int)mp->m_ail_gen; + nlip = xfs_ail_min(&(mp->m_ail).xa_ail); + *gen = (int)mp->m_ail.xa_gen; if (restarts != NULL) { XFS_STATS_INC(xs_push_ail_restarts); (*restarts)++; @@ -437,12 +510,20 @@ xfs_trans_next_ail( /* * Initialize the doubly linked list to point only to itself. */ -void +int xfs_trans_ail_init( xfs_mount_t *mp) { - mp->m_ail.ail_forw = (xfs_log_item_t*)&(mp->m_ail); - mp->m_ail.ail_back = (xfs_log_item_t*)&(mp->m_ail); + mp->m_ail.xa_ail.ail_forw = (xfs_log_item_t*)&mp->m_ail.xa_ail; + mp->m_ail.xa_ail.ail_back = (xfs_log_item_t*)&mp->m_ail.xa_ail; + return xfsaild_start(mp); +} + +void +xfs_trans_ail_destroy( + xfs_mount_t *mp) +{ + xfsaild_stop(mp); } /* @@ -482,7 +563,7 @@ xfs_ail_insert( next_lip->li_ail.ail_forw = lip; lip->li_ail.ail_forw->li_ail.ail_back = lip; - xfs_ail_check(base); + xfs_ail_check(base, lip); return; } @@ -496,12 +577,12 @@ xfs_ail_delete( xfs_log_item_t *lip) /* ARGSUSED */ { + xfs_ail_check(base, lip); lip->li_ail.ail_forw->li_ail.ail_back = lip->li_ail.ail_back; lip->li_ail.ail_back->li_ail.ail_forw = lip->li_ail.ail_forw; lip->li_ail.ail_forw = NULL; lip->li_ail.ail_back = NULL; - xfs_ail_check(base); return lip; } @@ -545,13 +626,13 @@ xfs_ail_next( */ STATIC void xfs_ail_check( - xfs_ail_entry_t *base) + xfs_ail_entry_t *base, + xfs_log_item_t *lip) { - xfs_log_item_t *lip; xfs_log_item_t *prev_lip; - lip = base->ail_forw; - if (lip == (xfs_log_item_t*)base) { + prev_lip = base->ail_forw; + if (prev_lip == (xfs_log_item_t*)base) { /* * Make sure the pointers are correct when the list * is empty. @@ -561,9 +642,27 @@ xfs_ail_check( } /* + * Check the next and previous entries are valid. + */ + ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); + prev_lip = lip->li_ail.ail_back; + if (prev_lip != (xfs_log_item_t*)base) { + ASSERT(prev_lip->li_ail.ail_forw == lip); + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); + } + prev_lip = lip->li_ail.ail_forw; + if (prev_lip != (xfs_log_item_t*)base) { + ASSERT(prev_lip->li_ail.ail_back == lip); + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); + } + + +#ifdef XFS_TRANS_DEBUG + /* * Walk the list checking forward and backward pointers, * lsn ordering, and that every entry has the XFS_LI_IN_AIL - * flag set. + * flag set. This is really expensive, so only do it when + * specifically debugging the transaction subsystem. */ prev_lip = (xfs_log_item_t*)base; while (lip != (xfs_log_item_t*)base) { @@ -578,5 +677,6 @@ xfs_ail_check( } ASSERT(lip == (xfs_log_item_t*)base); ASSERT(base->ail_back == prev_lip); +#endif /* XFS_TRANS_DEBUG */ } #endif /* DEBUG */ diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index 2912aac07c7b..66a09f0d894b 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c @@ -21,6 +21,7 @@ #include "xfs_log.h" #include "xfs_inum.h" #include "xfs_trans.h" +#include "xfs_trans_priv.h" STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *, int, int, xfs_lsn_t); diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 447ac4308c91..3c748c456ed4 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -47,15 +47,22 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, * From xfs_trans_ail.c */ void xfs_trans_update_ail(struct xfs_mount *mp, - struct xfs_log_item *lip, xfs_lsn_t lsn, - unsigned long s) + struct xfs_log_item *lip, xfs_lsn_t lsn) __releases(mp->m_ail_lock); void xfs_trans_delete_ail(struct xfs_mount *mp, - struct xfs_log_item *lip, unsigned long s) + struct xfs_log_item *lip) __releases(mp->m_ail_lock); struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *); struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *, struct xfs_log_item *, int *, int *); +/* + * AIL push thread support + */ +long xfsaild_push(struct xfs_mount *, xfs_lsn_t *); +void xfsaild_wakeup(struct xfs_mount *, xfs_lsn_t); +int xfsaild_start(struct xfs_mount *); +void xfsaild_stop(struct xfs_mount *); + #endif /* __XFS_TRANS_PRIV_H__ */ diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 673b405eaa31..45d740df53b7 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -73,7 +73,7 @@ xfs_dir_lookup_int( { int error; - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); error = xfs_dir_lookup(NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum); if (!error) { @@ -302,6 +302,7 @@ xfs_droplink( ASSERT (ip->i_d.di_nlink > 0); ip->i_d.di_nlink--; + drop_nlink(ip->i_vnode); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = 0; @@ -330,7 +331,6 @@ xfs_bump_ino_vers2( xfs_inode_t *ip) { xfs_mount_t *mp; - unsigned long s; ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); @@ -340,13 +340,13 @@ xfs_bump_ino_vers2( memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); mp = tp->t_mountp; if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { XFS_SB_VERSION_ADDNLINK(&mp->m_sb); - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, XFS_SB_VERSIONNUM); } else { - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } } /* Caller must log the inode */ @@ -366,6 +366,7 @@ xfs_bumplink( ASSERT(ip->i_d.di_nlink > 0); ip->i_d.di_nlink++; + inc_nlink(ip->i_vnode); if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && (ip->i_d.di_nlink > XFS_MAXLINK_1)) { /* diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index a00b26d8840e..f857fcccb723 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h @@ -20,8 +20,6 @@ #define IRELE(ip) VN_RELE(XFS_ITOV(ip)) #define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) -#define ITRACE(ip) vn_trace_ref(ip, __FILE__, __LINE__, \ - (inst_t *)__return_address) extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **); extern int xfs_dir_lookup_int (xfs_inode_t *, uint, bhv_vname_t *, xfs_ino_t *, diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index a1544597bcd3..413587f02155 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -58,17 +58,12 @@ #include "xfs_vfsops.h" -int +int __init xfs_init(void) { - extern kmem_zone_t *xfs_bmap_free_item_zone; - extern kmem_zone_t *xfs_btree_cur_zone; - extern kmem_zone_t *xfs_trans_zone; - extern kmem_zone_t *xfs_buf_item_zone; - extern kmem_zone_t *xfs_dabuf_zone; #ifdef XFS_DABUF_DEBUG - extern lock_t xfs_dabuf_global_lock; - spinlock_init(&xfs_dabuf_global_lock, "xfsda"); + extern spinlock_t xfs_dabuf_global_lock; + spin_lock_init(&xfs_dabuf_global_lock); #endif /* @@ -152,18 +147,12 @@ xfs_init(void) return 0; } -void +void __exit xfs_cleanup(void) { - extern kmem_zone_t *xfs_bmap_free_item_zone; - extern kmem_zone_t *xfs_btree_cur_zone; extern kmem_zone_t *xfs_inode_zone; - extern kmem_zone_t *xfs_trans_zone; - extern kmem_zone_t *xfs_da_state_zone; - extern kmem_zone_t *xfs_dabuf_zone; extern kmem_zone_t *xfs_efd_zone; extern kmem_zone_t *xfs_efi_zone; - extern kmem_zone_t *xfs_buf_item_zone; extern kmem_zone_t *xfs_icluster_zone; xfs_cleanup_procfs(); @@ -449,8 +438,6 @@ xfs_mount( if (error) return error; - mp->m_io_ops = xfs_iocore_xfs; - if (args->flags & XFSMNT_QUIET) flags |= XFS_MFSI_QUIET; @@ -544,7 +531,7 @@ xfs_mount( if ((error = xfs_filestream_mount(mp))) goto error2; - error = XFS_IOINIT(mp, args, flags); + error = xfs_mountfs(mp, flags); if (error) goto error2; @@ -694,7 +681,7 @@ xfs_quiesce_fs( * care of the metadata. New transactions are already blocked, so we need to * wait for any remaining transactions to drain out before proceding. */ -STATIC void +void xfs_attr_quiesce( xfs_mount_t *mp) { @@ -821,80 +808,6 @@ fscorrupt_out2: } /* - * xfs_root extracts the root vnode from a vfs. - * - * vfsp -- the vfs struct for the desired file system - * vpp -- address of the caller's vnode pointer which should be - * set to the desired fs root vnode - */ -int -xfs_root( - xfs_mount_t *mp, - bhv_vnode_t **vpp) -{ - bhv_vnode_t *vp; - - vp = XFS_ITOV(mp->m_rootip); - VN_HOLD(vp); - *vpp = vp; - return 0; -} - -/* - * xfs_statvfs - * - * Fill in the statvfs structure for the given file system. We use - * the superblock lock in the mount structure to ensure a consistent - * snapshot of the counters returned. - */ -int -xfs_statvfs( - xfs_mount_t *mp, - bhv_statvfs_t *statp, - bhv_vnode_t *vp) -{ - __uint64_t fakeinos; - xfs_extlen_t lsize; - xfs_sb_t *sbp; - unsigned long s; - - sbp = &(mp->m_sb); - - statp->f_type = XFS_SB_MAGIC; - - xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); - s = XFS_SB_LOCK(mp); - statp->f_bsize = sbp->sb_blocksize; - lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; - statp->f_blocks = sbp->sb_dblocks - lsize; - statp->f_bfree = statp->f_bavail = - sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); - fakeinos = statp->f_bfree << sbp->sb_inopblog; -#if XFS_BIG_INUMS - fakeinos += mp->m_inoadd; -#endif - statp->f_files = - MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); - if (mp->m_maxicount) -#if XFS_BIG_INUMS - if (!mp->m_inoadd) -#endif - statp->f_files = min_t(typeof(statp->f_files), - statp->f_files, - mp->m_maxicount); - statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); - XFS_SB_UNLOCK(mp, s); - - xfs_statvfs_fsid(statp, mp); - statp->f_namelen = MAXNAMELEN - 1; - - if (vp) - XFS_QM_DQSTATVFS(xfs_vtoi(vp), statp); - return 0; -} - - -/* * xfs_sync flushes any pending I/O to file system vfsp. * * This routine is called by vfs_sync() to make sure that things make it @@ -981,8 +894,6 @@ xfs_sync_inodes( int *bypassed) { xfs_inode_t *ip = NULL; - xfs_inode_t *ip_next; - xfs_buf_t *bp; bhv_vnode_t *vp = NULL; int error; int last_error; @@ -992,7 +903,6 @@ xfs_sync_inodes( boolean_t mount_locked; boolean_t vnode_refed; int preempt; - xfs_dinode_t *dip; xfs_iptr_t *ipointer; #ifdef DEBUG boolean_t ipointer_in = B_FALSE; @@ -1045,6 +955,8 @@ xfs_sync_inodes( #define XFS_PREEMPT_MASK 0x7f + ASSERT(!(flags & SYNC_BDFLUSH)); + if (bypassed) *bypassed = 0; if (mp->m_flags & XFS_MOUNT_RDONLY) @@ -1057,7 +969,7 @@ xfs_sync_inodes( ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); fflag = XFS_B_ASYNC; /* default is don't wait */ - if (flags & (SYNC_BDFLUSH | SYNC_DELWRI)) + if (flags & SYNC_DELWRI) fflag = XFS_B_DELWRI; if (flags & SYNC_WAIT) fflag = 0; /* synchronous overrides all */ @@ -1147,24 +1059,6 @@ xfs_sync_inodes( } /* - * If this is just vfs_sync() or pflushd() calling - * then we can skip inodes for which it looks like - * there is nothing to do. Since we don't have the - * inode locked this is racy, but these are periodic - * calls so it doesn't matter. For the others we want - * to know for sure, so we at least try to lock them. - */ - if (flags & SYNC_BDFLUSH) { - if (((ip->i_itemp == NULL) || - !(ip->i_itemp->ili_format.ilf_fields & - XFS_ILOG_ALL)) && - (ip->i_update_core == 0)) { - ip = ip->i_mnext; - continue; - } - } - - /* * Try to lock without sleeping. We're out of order with * the inode list lock here, so if we fail we need to drop * the mount lock and try again. If we're called from @@ -1181,7 +1075,7 @@ xfs_sync_inodes( * it. */ if (xfs_ilock_nowait(ip, lock_flags) == 0) { - if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { + if (vp == NULL) { ip = ip->i_mnext; continue; } @@ -1242,160 +1136,27 @@ xfs_sync_inodes( xfs_ilock(ip, XFS_ILOCK_SHARED); } - if (flags & SYNC_BDFLUSH) { - if ((flags & SYNC_ATTR) && - ((ip->i_update_core) || - ((ip->i_itemp != NULL) && - (ip->i_itemp->ili_format.ilf_fields != 0)))) { - - /* Insert marker and drop lock if not already - * done. - */ - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - - /* - * We don't want the periodic flushing of the - * inodes by vfs_sync() to interfere with - * I/O to the file, especially read I/O - * where it is only the access time stamp - * that is being flushed out. To prevent - * long periods where we have both inode - * locks held shared here while reading the - * inode's buffer in from disk, we drop the - * inode lock while reading in the inode - * buffer. We have to release the buffer - * and reacquire the inode lock so that they - * are acquired in the proper order (inode - * locks first). The buffer will go at the - * end of the lru chain, though, so we can - * expect it to still be there when we go - * for it again in xfs_iflush(). - */ - if ((xfs_ipincount(ip) == 0) && - xfs_iflock_nowait(ip)) { - - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - error = xfs_itobp(mp, NULL, ip, - &dip, &bp, 0, 0); - if (!error) { - xfs_buf_relse(bp); - } else { - /* Bailing out, remove the - * marker and free it. - */ - XFS_MOUNT_ILOCK(mp); - IPOINTER_REMOVE(ip, mp); - XFS_MOUNT_IUNLOCK(mp); - - ASSERT(!(lock_flags & - XFS_IOLOCK_SHARED)); - - kmem_free(ipointer, - sizeof(xfs_iptr_t)); - return (0); - } - - /* - * Since we dropped the inode lock, - * the inode may have been reclaimed. - * Therefore, we reacquire the mount - * lock and check to see if we were the - * inode reclaimed. If this happened - * then the ipointer marker will no - * longer point back at us. In this - * case, move ip along to the inode - * after the marker, remove the marker - * and continue. - */ - XFS_MOUNT_ILOCK(mp); - mount_locked = B_TRUE; - - if (ip != ipointer->ip_mprev) { - IPOINTER_REMOVE(ip, mp); - - ASSERT(!vnode_refed); - ASSERT(!(lock_flags & - XFS_IOLOCK_SHARED)); - continue; - } - - ASSERT(ip->i_mount == mp); - - if (xfs_ilock_nowait(ip, - XFS_ILOCK_SHARED) == 0) { - ASSERT(ip->i_mount == mp); - /* - * We failed to reacquire - * the inode lock without - * sleeping, so just skip - * the inode for now. We - * clear the ILOCK bit from - * the lock_flags so that we - * won't try to drop a lock - * we don't hold below. - */ - lock_flags &= ~XFS_ILOCK_SHARED; - IPOINTER_REMOVE(ip_next, mp); - } else if ((xfs_ipincount(ip) == 0) && - xfs_iflock_nowait(ip)) { - ASSERT(ip->i_mount == mp); - /* - * Since this is vfs_sync() - * calling we only flush the - * inode out if we can lock - * it without sleeping and - * it is not pinned. Drop - * the mount lock here so - * that we don't hold it for - * too long. We already have - * a marker in the list here. - */ - XFS_MOUNT_IUNLOCK(mp); - mount_locked = B_FALSE; - error = xfs_iflush(ip, - XFS_IFLUSH_DELWRI); - } else { - ASSERT(ip->i_mount == mp); - IPOINTER_REMOVE(ip_next, mp); - } - } - - } + if ((flags & SYNC_ATTR) && + (ip->i_update_core || + (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) { + if (mount_locked) + IPOINTER_INSERT(ip, mp); - } else { - if ((flags & SYNC_ATTR) && - ((ip->i_update_core) || - ((ip->i_itemp != NULL) && - (ip->i_itemp->ili_format.ilf_fields != 0)))) { - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } + if (flags & SYNC_WAIT) { + xfs_iflock(ip); + error = xfs_iflush(ip, XFS_IFLUSH_SYNC); - if (flags & SYNC_WAIT) { - xfs_iflock(ip); - error = xfs_iflush(ip, - XFS_IFLUSH_SYNC); - } else { - /* - * If we can't acquire the flush - * lock, then the inode is already - * being flushed so don't bother - * waiting. If we can lock it then - * do a delwri flush so we can - * combine multiple inode flushes - * in each disk write. - */ - if (xfs_iflock_nowait(ip)) { - error = xfs_iflush(ip, - XFS_IFLUSH_DELWRI); - } - else if (bypassed) - (*bypassed)++; - } + /* + * If we can't acquire the flush lock, then the inode + * is already being flushed so don't bother waiting. + * + * If we can lock it then do a delwri flush so we can + * combine multiple inode flushes in each disk write. + */ + } else if (xfs_iflock_nowait(ip)) { + error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); + } else if (bypassed) { + (*bypassed)++; } } @@ -1627,499 +1388,3 @@ xfs_syncsub( return XFS_ERROR(last_error); } - -/* - * xfs_vget - called by DMAPI and NFSD to get vnode from file handle - */ -int -xfs_vget( - xfs_mount_t *mp, - bhv_vnode_t **vpp, - xfs_fid_t *xfid) -{ - xfs_inode_t *ip; - int error; - xfs_ino_t ino; - unsigned int igen; - - /* - * Invalid. Since handles can be created in user space and passed in - * via gethandle(), this is not cause for a panic. - */ - if (xfid->fid_len != sizeof(*xfid) - sizeof(xfid->fid_len)) - return XFS_ERROR(EINVAL); - - ino = xfid->fid_ino; - igen = xfid->fid_gen; - - /* - * NFS can sometimes send requests for ino 0. Fail them gracefully. - */ - if (ino == 0) - return XFS_ERROR(ESTALE); - - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); - if (error) { - *vpp = NULL; - return error; - } - - if (ip == NULL) { - *vpp = NULL; - return XFS_ERROR(EIO); - } - - if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { - xfs_iput_new(ip, XFS_ILOCK_SHARED); - *vpp = NULL; - return XFS_ERROR(ENOENT); - } - - *vpp = XFS_ITOV(ip); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return 0; -} - - -#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ -#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ -#define MNTOPT_LOGDEV "logdev" /* log device */ -#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ -#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ -#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ -#define MNTOPT_INO64 "ino64" /* force inodes into 64-bit range */ -#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ -#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ -#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ -#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ -#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ -#define MNTOPT_MTPT "mtpt" /* filesystem mount point */ -#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ -#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ -#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ -#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ -#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ -#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ -#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and - * unwritten extent conversion */ -#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ -#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ -#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ -#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ -#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ -#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ -#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes - * in stat(). */ -#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ -#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ -#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */ -#define MNTOPT_QUOTA "quota" /* disk quotas (user) */ -#define MNTOPT_NOQUOTA "noquota" /* no quotas */ -#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */ -#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */ -#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */ -#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */ -#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */ -#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */ -#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */ -#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ -#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ -#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ -#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ -#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ -#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ - -STATIC unsigned long -suffix_strtoul(char *s, char **endp, unsigned int base) -{ - int last, shift_left_factor = 0; - char *value = s; - - last = strlen(value) - 1; - if (value[last] == 'K' || value[last] == 'k') { - shift_left_factor = 10; - value[last] = '\0'; - } - if (value[last] == 'M' || value[last] == 'm') { - shift_left_factor = 20; - value[last] = '\0'; - } - if (value[last] == 'G' || value[last] == 'g') { - shift_left_factor = 30; - value[last] = '\0'; - } - - return simple_strtoul((const char *)s, endp, base) << shift_left_factor; -} - -int -xfs_parseargs( - struct xfs_mount *mp, - char *options, - struct xfs_mount_args *args, - int update) -{ - char *this_char, *value, *eov; - int dsunit, dswidth, vol_dsunit, vol_dswidth; - int iosize; - int ikeep = 0; - - args->flags |= XFSMNT_BARRIER; - args->flags2 |= XFSMNT2_COMPAT_IOSIZE; - - if (!options) - goto done; - - iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0; - - while ((this_char = strsep(&options, ",")) != NULL) { - if (!*this_char) - continue; - if ((value = strchr(this_char, '=')) != NULL) - *value++ = 0; - - if (!strcmp(this_char, MNTOPT_LOGBUFS)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - args->logbufs = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - args->logbufsize = suffix_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - strncpy(args->logname, value, MAXNAMELEN); - } else if (!strcmp(this_char, MNTOPT_MTPT)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - strncpy(args->mtpt, value, MAXNAMELEN); - } else if (!strcmp(this_char, MNTOPT_RTDEV)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - strncpy(args->rtname, value, MAXNAMELEN); - } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - iosize = simple_strtoul(value, &eov, 10); - args->flags |= XFSMNT_IOSIZE; - args->iosizelog = (uint8_t) iosize; - } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - iosize = suffix_strtoul(value, &eov, 10); - args->flags |= XFSMNT_IOSIZE; - args->iosizelog = ffs(iosize) - 1; - } else if (!strcmp(this_char, MNTOPT_GRPID) || - !strcmp(this_char, MNTOPT_BSDGROUPS)) { - mp->m_flags |= XFS_MOUNT_GRPID; - } else if (!strcmp(this_char, MNTOPT_NOGRPID) || - !strcmp(this_char, MNTOPT_SYSVGROUPS)) { - mp->m_flags &= ~XFS_MOUNT_GRPID; - } else if (!strcmp(this_char, MNTOPT_WSYNC)) { - args->flags |= XFSMNT_WSYNC; - } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { - args->flags |= XFSMNT_OSYNCISOSYNC; - } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { - args->flags |= XFSMNT_NORECOVERY; - } else if (!strcmp(this_char, MNTOPT_INO64)) { - args->flags |= XFSMNT_INO64; -#if !XFS_BIG_INUMS - cmn_err(CE_WARN, - "XFS: %s option not allowed on this system", - this_char); - return EINVAL; -#endif - } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { - args->flags |= XFSMNT_NOALIGN; - } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { - args->flags |= XFSMNT_SWALLOC; - } else if (!strcmp(this_char, MNTOPT_SUNIT)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - dsunit = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - dswidth = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { - args->flags &= ~XFSMNT_32BITINODES; -#if !XFS_BIG_INUMS - cmn_err(CE_WARN, - "XFS: %s option not allowed on this system", - this_char); - return EINVAL; -#endif - } else if (!strcmp(this_char, MNTOPT_NOUUID)) { - args->flags |= XFSMNT_NOUUID; - } else if (!strcmp(this_char, MNTOPT_BARRIER)) { - args->flags |= XFSMNT_BARRIER; - } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { - args->flags &= ~XFSMNT_BARRIER; - } else if (!strcmp(this_char, MNTOPT_IKEEP)) { - ikeep = 1; - args->flags &= ~XFSMNT_IDELETE; - } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { - args->flags |= XFSMNT_IDELETE; - } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { - args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; - } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { - args->flags2 |= XFSMNT2_COMPAT_IOSIZE; - } else if (!strcmp(this_char, MNTOPT_ATTR2)) { - args->flags |= XFSMNT_ATTR2; - } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { - args->flags &= ~XFSMNT_ATTR2; - } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { - args->flags2 |= XFSMNT2_FILESTREAMS; - } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { - args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA); - args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA); - } else if (!strcmp(this_char, MNTOPT_QUOTA) || - !strcmp(this_char, MNTOPT_UQUOTA) || - !strcmp(this_char, MNTOPT_USRQUOTA)) { - args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || - !strcmp(this_char, MNTOPT_UQUOTANOENF)) { - args->flags |= XFSMNT_UQUOTA; - args->flags &= ~XFSMNT_UQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_PQUOTA) || - !strcmp(this_char, MNTOPT_PRJQUOTA)) { - args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { - args->flags |= XFSMNT_PQUOTA; - args->flags &= ~XFSMNT_PQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_GQUOTA) || - !strcmp(this_char, MNTOPT_GRPQUOTA)) { - args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { - args->flags |= XFSMNT_GQUOTA; - args->flags &= ~XFSMNT_GQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_DMAPI)) { - args->flags |= XFSMNT_DMAPI; - } else if (!strcmp(this_char, MNTOPT_XDSM)) { - args->flags |= XFSMNT_DMAPI; - } else if (!strcmp(this_char, MNTOPT_DMI)) { - args->flags |= XFSMNT_DMAPI; - } else if (!strcmp(this_char, "ihashsize")) { - cmn_err(CE_WARN, - "XFS: ihashsize no longer used, option is deprecated."); - } else if (!strcmp(this_char, "osyncisdsync")) { - /* no-op, this is now the default */ - cmn_err(CE_WARN, - "XFS: osyncisdsync is now the default, option is deprecated."); - } else if (!strcmp(this_char, "irixsgid")) { - cmn_err(CE_WARN, - "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); - } else { - cmn_err(CE_WARN, - "XFS: unknown mount option [%s].", this_char); - return EINVAL; - } - } - - if (args->flags & XFSMNT_NORECOVERY) { - if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) { - cmn_err(CE_WARN, - "XFS: no-recovery mounts must be read-only."); - return EINVAL; - } - } - - if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { - cmn_err(CE_WARN, - "XFS: sunit and swidth options incompatible with the noalign option"); - return EINVAL; - } - - if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { - cmn_err(CE_WARN, - "XFS: cannot mount with both project and group quota"); - return EINVAL; - } - - if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') { - printk("XFS: %s option needs the mount point option as well\n", - MNTOPT_DMAPI); - return EINVAL; - } - - if ((dsunit && !dswidth) || (!dsunit && dswidth)) { - cmn_err(CE_WARN, - "XFS: sunit and swidth must be specified together"); - return EINVAL; - } - - if (dsunit && (dswidth % dsunit != 0)) { - cmn_err(CE_WARN, - "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", - dswidth, dsunit); - return EINVAL; - } - - /* - * Applications using DMI filesystems often expect the - * inode generation number to be monotonically increasing. - * If we delete inode chunks we break this assumption, so - * keep unused inode chunks on disk for DMI filesystems - * until we come up with a better solution. - * Note that if "ikeep" or "noikeep" mount options are - * supplied, then they are honored. - */ - if (!(args->flags & XFSMNT_DMAPI) && !ikeep) - args->flags |= XFSMNT_IDELETE; - - if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { - if (dsunit) { - args->sunit = dsunit; - args->flags |= XFSMNT_RETERR; - } else { - args->sunit = vol_dsunit; - } - dswidth ? (args->swidth = dswidth) : - (args->swidth = vol_dswidth); - } else { - args->sunit = args->swidth = 0; - } - -done: - if (args->flags & XFSMNT_32BITINODES) - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - if (args->flags2) - args->flags |= XFSMNT_FLAGS2; - return 0; -} - -int -xfs_showargs( - struct xfs_mount *mp, - struct seq_file *m) -{ - static struct proc_xfs_info { - int flag; - char *str; - } xfs_info[] = { - /* the few simple ones we can get from the mount struct */ - { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, - { XFS_MOUNT_INO64, "," MNTOPT_INO64 }, - { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, - { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, - { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, - { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, - { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, - { 0, NULL } - }; - struct proc_xfs_info *xfs_infop; - - for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { - if (mp->m_flags & xfs_infop->flag) - seq_puts(m, xfs_infop->str); - } - - if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) - seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", - (int)(1 << mp->m_writeio_log) >> 10); - - if (mp->m_logbufs > 0) - seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); - if (mp->m_logbsize > 0) - seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); - - if (mp->m_logname) - seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); - if (mp->m_rtname) - seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); - - if (mp->m_dalign > 0) - seq_printf(m, "," MNTOPT_SUNIT "=%d", - (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); - if (mp->m_swidth > 0) - seq_printf(m, "," MNTOPT_SWIDTH "=%d", - (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); - - if (!(mp->m_flags & XFS_MOUNT_IDELETE)) - seq_printf(m, "," MNTOPT_IKEEP); - if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)) - seq_printf(m, "," MNTOPT_LARGEIO); - - if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS)) - seq_printf(m, "," MNTOPT_64BITINODE); - if (mp->m_flags & XFS_MOUNT_GRPID) - seq_printf(m, "," MNTOPT_GRPID); - - if (mp->m_qflags & XFS_UQUOTA_ACCT) { - if (mp->m_qflags & XFS_UQUOTA_ENFD) - seq_puts(m, "," MNTOPT_USRQUOTA); - else - seq_puts(m, "," MNTOPT_UQUOTANOENF); - } - - if (mp->m_qflags & XFS_PQUOTA_ACCT) { - if (mp->m_qflags & XFS_OQUOTA_ENFD) - seq_puts(m, "," MNTOPT_PRJQUOTA); - else - seq_puts(m, "," MNTOPT_PQUOTANOENF); - } - - if (mp->m_qflags & XFS_GQUOTA_ACCT) { - if (mp->m_qflags & XFS_OQUOTA_ENFD) - seq_puts(m, "," MNTOPT_GRPQUOTA); - else - seq_puts(m, "," MNTOPT_GQUOTANOENF); - } - - if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) - seq_puts(m, "," MNTOPT_NOQUOTA); - - if (mp->m_flags & XFS_MOUNT_DMAPI) - seq_puts(m, "," MNTOPT_DMAPI); - return 0; -} - -/* - * Second stage of a freeze. The data is already frozen so we only - * need to take care of themetadata. Once that's done write a dummy - * record to dirty the log in case of a crash while frozen. - */ -void -xfs_freeze( - xfs_mount_t *mp) -{ - xfs_attr_quiesce(mp); - xfs_fs_log_dummy(mp); -} diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h index a592fe02a339..1688817c55ed 100644 --- a/fs/xfs/xfs_vfsops.h +++ b/fs/xfs/xfs_vfsops.h @@ -13,16 +13,9 @@ int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args, int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp); int xfs_mntupdate(struct xfs_mount *mp, int *flags, struct xfs_mount_args *args); -int xfs_root(struct xfs_mount *mp, bhv_vnode_t **vpp); -int xfs_statvfs(struct xfs_mount *mp, struct kstatfs *statp, - bhv_vnode_t *vp); int xfs_sync(struct xfs_mount *mp, int flags); -int xfs_vget(struct xfs_mount *mp, bhv_vnode_t **vpp, struct xfs_fid *xfid); -int xfs_parseargs(struct xfs_mount *mp, char *options, - struct xfs_mount_args *args, int update); -int xfs_showargs(struct xfs_mount *mp, struct seq_file *m); -void xfs_freeze(struct xfs_mount *mp); void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, int lnnum); +void xfs_attr_quiesce(struct xfs_mount *mp); #endif /* _XFS_VFSOPS_H */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index efd5aff9eaf6..51305242ff8c 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -88,7 +88,7 @@ xfs_getattr( bhv_vnode_t *vp = XFS_ITOV(ip); xfs_mount_t *mp = ip->i_mount; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -136,7 +136,7 @@ xfs_getattr( default: vap->va_rdev = 0; - if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { + if (!(XFS_IS_REALTIME_INODE(ip))) { vap->va_blocksize = xfs_preferred_iosize(mp); } else { @@ -228,7 +228,7 @@ xfs_setattr( int file_owner; int need_iolock = 1; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); @@ -508,7 +508,7 @@ xfs_setattr( */ if ((ip->i_d.di_nextents || ip->i_delayed_blks) && (mask & XFS_AT_XFLAGS) && - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != + (XFS_IS_REALTIME_INODE(ip)) != (vap->va_xflags & XFS_XFLAG_REALTIME)) { code = XFS_ERROR(EINVAL); /* EFBIG? */ goto error_return; @@ -520,7 +520,7 @@ xfs_setattr( if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) { xfs_extlen_t size; - if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || + if (XFS_IS_REALTIME_INODE(ip) || ((mask & XFS_AT_XFLAGS) && (vap->va_xflags & XFS_XFLAG_REALTIME))) { size = mp->m_sb.sb_rextsize << @@ -804,12 +804,8 @@ xfs_setattr( if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) di_flags |= XFS_DIFLAG_EXTSZINHERIT; } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { - if (vap->va_xflags & XFS_XFLAG_REALTIME) { + if (vap->va_xflags & XFS_XFLAG_REALTIME) di_flags |= XFS_DIFLAG_REALTIME; - ip->i_iocore.io_flags |= XFS_IOCORE_RT; - } else { - ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; - } if (vap->va_xflags & XFS_XFLAG_EXTSIZE) di_flags |= XFS_DIFLAG_EXTSIZE; } @@ -902,28 +898,6 @@ xfs_setattr( return code; } - -/* - * xfs_access - * Null conversion from vnode mode bits to inode mode bits, as in efs. - */ -int -xfs_access( - xfs_inode_t *ip, - int mode, - cred_t *credp) -{ - int error; - - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - - xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_iaccess(ip, mode, credp); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return error; -} - - /* * The maximum pathlen is 1024 bytes. Since the minimum file system * blocksize is 512 bytes, we can get a max of 2 extents back from @@ -987,7 +961,7 @@ xfs_readlink( int pathlen; int error = 0; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -1033,7 +1007,7 @@ xfs_fsync( int error; int log_flushed = 0, changed = 1; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); ASSERT(start >= 0 && stop >= -1); @@ -1149,7 +1123,7 @@ xfs_fsync( * If this inode is on the RT dev we need to flush that * cache as well. */ - if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) + if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); } @@ -1188,7 +1162,7 @@ xfs_free_eofblocks( nimaps = 1; xfs_ilock(ip, XFS_ILOCK_SHARED); - error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, + error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, NULL, 0, &imap, &nimaps, NULL, NULL); xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -1562,9 +1536,6 @@ xfs_release( error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); if (error) return error; - /* Update linux inode block count after free above */ - vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, - ip->i_d.di_nblocks + ip->i_delayed_blks); } } @@ -1592,7 +1563,7 @@ xfs_inactive( int error; int truncate; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); /* * If the inode is already free, then there can be nothing @@ -1638,9 +1609,6 @@ xfs_inactive( error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); if (error) return VN_INACTIVE_CACHE; - /* Update linux inode block count after free above */ - vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, - ip->i_d.di_nblocks + ip->i_delayed_blks); } goto out; } @@ -1805,7 +1773,7 @@ xfs_lookup( int error; uint lock_mode; - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return XFS_ERROR(EIO); @@ -1814,7 +1782,7 @@ xfs_lookup( error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip); if (!error) { *vpp = XFS_ITOV(ip); - ITRACE(ip); + xfs_itrace_ref(ip); } xfs_iunlock_map_shared(dp, lock_mode); return error; @@ -1848,7 +1816,7 @@ xfs_create( int namelen; ASSERT(!*vpp); - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); namelen = VNAMELEN(dentry); @@ -1930,7 +1898,7 @@ xfs_create( goto error_return; goto abort_return; } - ITRACE(ip); + xfs_itrace_ref(ip); /* * At this point, we've gotten a newly allocated inode. @@ -2098,7 +2066,7 @@ again: e_inum = ip->i_ino; - ITRACE(ip); + xfs_itrace_ref(ip); /* * We want to lock in increasing inum. Since we've already @@ -2321,7 +2289,7 @@ xfs_remove( uint resblks; int namelen; - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -2364,9 +2332,8 @@ xfs_remove( dm_di_mode = ip->i_d.di_mode; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - - ITRACE(ip); + xfs_itrace_entry(ip); + xfs_itrace_ref(ip); error = XFS_QM_DQATTACH(mp, dp, 0); if (!error && dp != ip) @@ -2498,8 +2465,7 @@ xfs_remove( if (link_zero && xfs_inode_is_filestream(ip)) xfs_filestream_deassociate(ip); - vn_trace_exit(ip, __FUNCTION__, (inst_t *)__return_address); - + xfs_itrace_exit(ip); IRELE(ip); /* Fall through to std_return with error = 0 */ @@ -2562,8 +2528,8 @@ xfs_link( char *target_name = VNAME(dentry); int target_namelen; - vn_trace_entry(tdp, __FUNCTION__, (inst_t *)__return_address); - vn_trace_entry(xfs_vtoi(src_vp), __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(tdp); + xfs_itrace_entry(xfs_vtoi(src_vp)); target_namelen = VNAMELEN(dentry); ASSERT(!VN_ISDIR(src_vp)); @@ -2744,7 +2710,7 @@ xfs_mkdir( /* Return through std_return after this point. */ - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); mp = dp->i_mount; udqp = gdqp = NULL; @@ -2810,7 +2776,7 @@ xfs_mkdir( goto error_return; goto abort_return; } - ITRACE(cdp); + xfs_itrace_ref(cdp); /* * Now we add the directory inode to the transaction. @@ -2936,7 +2902,7 @@ xfs_rmdir( int last_cdp_link; uint resblks; - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -3041,7 +3007,7 @@ xfs_rmdir( VN_HOLD(dir_vp); } - ITRACE(cdp); + xfs_itrace_ref(cdp); xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); ASSERT(cdp->i_d.di_nlink >= 2); @@ -3189,8 +3155,7 @@ xfs_symlink( ip = NULL; tp = NULL; - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); - + xfs_itrace_entry(dp); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -3317,7 +3282,7 @@ xfs_symlink( goto error_return; goto error1; } - ITRACE(ip); + xfs_itrace_ref(ip); /* * An error after we've joined dp to the transaction will result in the @@ -3465,27 +3430,6 @@ std_return: goto std_return; } - -int -xfs_fid2( - xfs_inode_t *ip, - xfs_fid_t *xfid) -{ - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - - xfid->fid_len = sizeof(xfs_fid_t) - sizeof(xfid->fid_len); - xfid->fid_pad = 0; - /* - * use memcpy because the inode is a long long and there's no - * assurance that xfid->fid_ino is properly aligned. - */ - memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino)); - xfid->fid_gen = ip->i_d.di_gen; - - return 0; -} - - int xfs_rwlock( xfs_inode_t *ip, @@ -3558,11 +3502,11 @@ xfs_inode_flush( if (iip && iip->ili_last_lsn) { xlog_t *log = mp->m_log; xfs_lsn_t sync_lsn; - int s, log_flags = XFS_LOG_FORCE; + int log_flags = XFS_LOG_FORCE; - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); sync_lsn = log->l_last_sync_lsn; - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { if (flags & FLUSH_SYNC) @@ -3637,8 +3581,8 @@ xfs_set_dmattrs( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - ip->i_iocore.io_dmevmask = ip->i_d.di_dmevmask = evmask; - ip->i_iocore.io_dmstate = ip->i_d.di_dmstate = state; + ip->i_d.di_dmevmask = evmask; + ip->i_d.di_dmstate = state; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); IHOLD(ip); @@ -3653,7 +3597,7 @@ xfs_reclaim( { bhv_vnode_t *vp = XFS_ITOV(ip); - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); ASSERT(!VN_MAPPED(vp)); @@ -3871,7 +3815,7 @@ xfs_alloc_file_space( int committed; int error; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -3976,7 +3920,7 @@ retry: * Issue the xfs_bmapi() call to allocate the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, + error = xfs_bmapi(tp, ip, startoffset_fsb, allocatesize_fsb, bmapi_flag, &firstfsb, 0, imapp, &nimaps, &free_list, NULL); @@ -4052,13 +3996,13 @@ xfs_zero_remaining_bytes( int error = 0; bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, - ip->i_d.di_flags & XFS_DIFLAG_REALTIME ? + XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp); for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; - error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, + error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error || nimap < 1) break; @@ -4141,7 +4085,7 @@ xfs_free_file_space( vp = XFS_ITOV(ip); mp = ip->i_mount; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; @@ -4149,7 +4093,7 @@ xfs_free_file_space( error = 0; if (len <= 0) /* if nothing being freed */ return error; - rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME); + rt = XFS_IS_REALTIME_INODE(ip); startoffset_fsb = XFS_B_TO_FSB(mp, offset); end_dmi_offset = offset + len; endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); @@ -4172,15 +4116,12 @@ xfs_free_file_space( vn_iowait(ip); /* wait for the completion of any pending DIOs */ } - rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP); + rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); ioffset = offset & ~(rounding - 1); if (VN_CACHED(vp) != 0) { - xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, - ctooff(offtoct(ioffset)), -1); - error = xfs_flushinval_pages(ip, - ctooff(offtoct(ioffset)), - -1, FI_REMAPF_LOCKED); + xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1); + error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); if (error) goto out_unlock_iolock; } @@ -4193,7 +4134,7 @@ xfs_free_file_space( */ if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { nimap = 1; - error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, + error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; @@ -4208,7 +4149,7 @@ xfs_free_file_space( startoffset_fsb += mp->m_sb.sb_rextsize - mod; } nimap = 1; - error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, + error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; @@ -4284,7 +4225,7 @@ xfs_free_file_space( * issue the bunmapi() call to free the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, + error = xfs_bunmapi(tp, ip, startoffset_fsb, endoffset_fsb - startoffset_fsb, 0, 2, &firstfsb, &free_list, NULL, &done); if (error) { @@ -4347,23 +4288,11 @@ xfs_change_file_space( xfs_trans_t *tp; bhv_vattr_t va; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); - /* - * must be a regular file and have write permission - */ if (!S_ISREG(ip->i_d.di_mode)) return XFS_ERROR(EINVAL); - xfs_ilock(ip, XFS_ILOCK_SHARED); - - if ((error = xfs_iaccess(ip, S_IWUSR, credp))) { - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return error; - } - - xfs_iunlock(ip, XFS_ILOCK_SHARED); - switch (bf->l_whence) { case 0: /*SEEK_SET*/ break; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index b7e461c40cfb..4e3970f0e5e3 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -18,7 +18,6 @@ int xfs_open(struct xfs_inode *ip); int xfs_getattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags); int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, struct cred *credp); -int xfs_access(struct xfs_inode *ip, int mode, struct cred *credp); int xfs_readlink(struct xfs_inode *ip, char *link); int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start, xfs_off_t stop); @@ -39,7 +38,6 @@ int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry, char *target_path, mode_t mode, bhv_vnode_t **vpp, struct cred *credp); -int xfs_fid2(struct xfs_inode *ip, struct xfs_fid *xfid); int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); int xfs_inode_flush(struct xfs_inode *ip, int flags); |