diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-04-30 09:56:41 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-04-30 09:56:44 +0200 |
commit | 3ca50496c2677a2b3fdd3ede86660fd1433beac6 (patch) | |
tree | 97a76d8479a8d8a96e04ed0694b8dbf89457bfcc /fs | |
parent | Merge branch 'perf' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/lin... (diff) | |
parent | Linux 2.6.34-rc6 (diff) | |
download | linux-3ca50496c2677a2b3fdd3ede86660fd1433beac6.tar.xz linux-3ca50496c2677a2b3fdd3ede86660fd1433beac6.zip |
Merge commit 'v2.6.34-rc6' into perf/core
Merge reason: update to the latest -rc.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
45 files changed, 370 insertions, 144 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 5c5bc8480070..f8b86e92cd66 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -238,6 +238,13 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, return ERR_PTR(-ENOMEM); } + rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY); + if (rc) { + __putname(v9ses->aname); + __putname(v9ses->uname); + return ERR_PTR(rc); + } + spin_lock(&v9fs_sessionlist_lock); list_add(&v9ses->slist, &v9fs_sessionlist); spin_unlock(&v9fs_sessionlist_lock); @@ -301,6 +308,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, return fid; error: + bdi_destroy(&v9ses->bdi); return ERR_PTR(retval); } @@ -326,6 +334,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) __putname(v9ses->uname); __putname(v9ses->aname); + bdi_destroy(&v9ses->bdi); + spin_lock(&v9fs_sessionlist_lock); list_del(&v9ses->slist); spin_unlock(&v9fs_sessionlist_lock); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index a0a8d3dd1361..bec4d0bcb458 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -20,6 +20,7 @@ * Boston, MA 02111-1301 USA * */ +#include <linux/backing-dev.h> /** * enum p9_session_flags - option flags for each 9P session @@ -102,6 +103,7 @@ struct v9fs_session_info { u32 uid; /* if ACCESS_SINGLE, the uid that has access */ struct p9_client *clnt; /* 9p client */ struct list_head slist; /* list of sessions registered with v9fs */ + struct backing_dev_info bdi; }; struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 491108bd6e0d..806da5d3b3a0 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -77,6 +77,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, sb->s_blocksize = 1 << sb->s_blocksize_bits; sb->s_magic = V9FS_MAGIC; sb->s_op = &v9fs_super_ops; + sb->s_bdi = &v9ses->bdi; sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | MS_NOATIME; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index c54dad4e6063..a10f2582844f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -19,6 +19,7 @@ #include <linux/workqueue.h> #include <linux/sched.h> #include <linux/fscache.h> +#include <linux/backing-dev.h> #include "afs.h" #include "afs_vl.h" @@ -313,6 +314,7 @@ struct afs_volume { unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ struct rw_semaphore server_sem; /* lock for accessing current server */ + struct backing_dev_info bdi; }; /* diff --git a/fs/afs/super.c b/fs/afs/super.c index 14f6431598ad..e932e5a3a0c1 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -311,6 +311,7 @@ static int afs_fill_super(struct super_block *sb, void *data) sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; sb->s_fs_info = as; + sb->s_bdi = &as->volume->bdi; /* allocate the root inode and dentry */ fid.vid = as->volume->vid; diff --git a/fs/afs/volume.c b/fs/afs/volume.c index a353e69e2391..401eeb21869f 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -106,6 +106,10 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) volume->cell = params->cell; volume->vid = vlocation->vldb.vid[params->type]; + ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY); + if (ret) + goto error_bdi; + init_rwsem(&volume->server_sem); /* look up all the applicable server records */ @@ -151,6 +155,8 @@ error: return ERR_PTR(ret); error_discard: + bdi_destroy(&volume->bdi); +error_bdi: up_write(¶ms->cell->vl_sem); for (loop = volume->nservers - 1; loop >= 0; loop--) @@ -200,6 +206,7 @@ void afs_put_volume(struct afs_volume *volume) for (loop = volume->nservers - 1; loop >= 0; loop--) afs_put_server(volume->servers[loop]); + bdi_destroy(&volume->bdi); kfree(volume); _leave(" [destroyed]"); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 7ab23e006e4c..2c5f9a0e5d72 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1005,15 +1005,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux( } } else if (!mm->start_data) { mm->start_data = seg->addr; -#ifndef CONFIG_MMU mm->end_data = seg->addr + phdr->p_memsz; -#endif } - -#ifdef CONFIG_MMU - if (seg->addr + phdr->p_memsz > mm->end_data) - mm->end_data = seg->addr + phdr->p_memsz; -#endif } seg++; diff --git a/fs/block_dev.c b/fs/block_dev.c index 2a6d0193f139..6dcee88c2e5d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -406,16 +406,23 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) { - struct block_device *bdev = I_BDEV(filp->f_mapping->host); + struct inode *bd_inode = filp->f_mapping->host; + struct block_device *bdev = I_BDEV(bd_inode); int error; - error = sync_blockdev(bdev); - if (error) - return error; - + /* + * There is no need to serialise calls to blkdev_issue_flush with + * i_mutex and doing so causes performance issues with concurrent + * O_SYNC writers to a block device. + */ + mutex_unlock(&bd_inode->i_mutex); + error = blkdev_issue_flush(bdev, NULL); if (error == -EOPNOTSUPP) error = 0; + + mutex_lock(&bd_inode->i_mutex); + return error; } EXPORT_SYMBOL(blkdev_fsync); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e7b8f2c89ccb..feca04197d02 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -44,8 +44,6 @@ static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); -static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); - /* * end_io_wq structs are used to do processing in task context when an IO is * complete. This is used during reads to verify checksums, and it is used @@ -1375,19 +1373,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { int err; - bdi->name = "btrfs"; bdi->capabilities = BDI_CAP_MAP_COPY; - err = bdi_init(bdi); + err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY); if (err) return err; - err = bdi_register(bdi, NULL, "btrfs-%d", - atomic_inc_return(&btrfs_bdi_num)); - if (err) { - bdi_destroy(bdi); - return err; - } - bdi->ra_pages = default_backing_dev_info.ra_pages; bdi->unplug_io_fn = btrfs_unplug_io_fn; bdi->unplug_io_data = info; diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 4797787c6a44..246a167cb913 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -18,6 +18,8 @@ #ifndef _CIFS_FS_SB_H #define _CIFS_FS_SB_H +#include <linux/backing-dev.h> + #define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */ #define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */ #define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */ @@ -50,5 +52,6 @@ struct cifs_sb_info { #ifdef CONFIG_CIFS_DFS_UPCALL char *mountdata; /* mount options received at mount time */ #endif + struct backing_dev_info bdi; }; #endif /* _CIFS_FS_SB_H */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index ded66be6597c..ad235d604a0b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -103,6 +103,12 @@ cifs_read_super(struct super_block *sb, void *data, if (cifs_sb == NULL) return -ENOMEM; + rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); + if (rc) { + kfree(cifs_sb); + return rc; + } + #ifdef CONFIG_CIFS_DFS_UPCALL /* copy mount params to sb for use in submounts */ /* BB: should we move this after the mount so we @@ -115,6 +121,7 @@ cifs_read_super(struct super_block *sb, void *data, int len = strlen(data); cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); if (cifs_sb->mountdata == NULL) { + bdi_destroy(&cifs_sb->bdi); kfree(sb->s_fs_info); sb->s_fs_info = NULL; return -ENOMEM; @@ -135,6 +142,7 @@ cifs_read_super(struct super_block *sb, void *data, sb->s_magic = CIFS_MAGIC_NUMBER; sb->s_op = &cifs_super_ops; + sb->s_bdi = &cifs_sb->bdi; /* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) sb->s_blocksize = cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ @@ -183,6 +191,7 @@ out_mount_failed: } #endif unload_nls(cifs_sb->local_nls); + bdi_destroy(&cifs_sb->bdi); kfree(cifs_sb); } return rc; @@ -214,6 +223,7 @@ cifs_put_super(struct super_block *sb) #endif unload_nls(cifs_sb->local_nls); + bdi_destroy(&cifs_sb->bdi); kfree(cifs_sb); unlock_kernel(); diff --git a/fs/coda/inode.c b/fs/coda/inode.c index a1695dcadd99..d97f9935a028 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -167,6 +167,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) return -EBUSY; } + error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY); + if (error) + goto bdi_err; + vc->vc_sb = sb; sb->s_fs_info = vc; @@ -175,6 +179,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = 12; sb->s_magic = CODA_SUPER_MAGIC; sb->s_op = &coda_super_operations; + sb->s_bdi = &vc->bdi; /* get root fid from Venus: this needs the root inode */ error = venus_rootfid(sb, &fid); @@ -200,6 +205,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) return 0; error: + bdi_destroy(&vc->bdi); + bdi_err: if (root) iput(root); if (vc) @@ -210,6 +217,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) static void coda_put_super(struct super_block *sb) { + bdi_destroy(&coda_vcp(sb)->bdi); coda_vcp(sb)->vc_sb = NULL; sb->s_fs_info = NULL; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c32a1b6a856b..641640dc7ae5 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -102,7 +102,6 @@ #include <linux/nbd.h> #include <linux/random.h> #include <linux/filter.h> -#include <linux/pktcdvd.h> #include <linux/hiddev.h> @@ -1126,8 +1125,6 @@ COMPATIBLE_IOCTL(PPGETMODE) COMPATIBLE_IOCTL(PPGETPHASE) COMPATIBLE_IOCTL(PPGETFLAGS) COMPATIBLE_IOCTL(PPSETFLAGS) -/* pktcdvd */ -COMPATIBLE_IOCTL(PACKET_CTRL_CMD) /* Big A */ /* sparc only */ /* Big Q for sound/OSS */ diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index bc7115403f38..bfc2e0f78f00 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -35,6 +35,7 @@ #include <linux/scatterlist.h> #include <linux/hash.h> #include <linux/nsproxy.h> +#include <linux/backing-dev.h> /* Version verification for shared data structures w/ userspace */ #define ECRYPTFS_VERSION_MAJOR 0x00 @@ -393,6 +394,7 @@ struct ecryptfs_mount_crypt_stat { struct ecryptfs_sb_info { struct super_block *wsi_sb; struct ecryptfs_mount_crypt_stat mount_crypt_stat; + struct backing_dev_info bdi; }; /* file private data. */ diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index af1a8f01ebac..760983d0f25e 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -497,17 +497,25 @@ struct kmem_cache *ecryptfs_sb_info_cache; static int ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) { + struct ecryptfs_sb_info *esi; int rc = 0; /* Released in ecryptfs_put_super() */ ecryptfs_set_superblock_private(sb, kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL)); - if (!ecryptfs_superblock_to_private(sb)) { + esi = ecryptfs_superblock_to_private(sb); + if (!esi) { ecryptfs_printk(KERN_WARNING, "Out of memory\n"); rc = -ENOMEM; goto out; } + + rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); + if (rc) + goto out; + + sb->s_bdi = &esi->bdi; sb->s_op = &ecryptfs_sops; /* Released through deactivate_super(sb) from get_sb_nodev */ sb->s_root = d_alloc(NULL, &(const struct qstr) { diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 278743c7716a..0c0ae491d231 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -122,6 +122,7 @@ static void ecryptfs_put_super(struct super_block *sb) lock_kernel(); ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); + bdi_destroy(&sb_info->bdi); kmem_cache_free(ecryptfs_sb_info_cache, sb_info); ecryptfs_set_superblock_private(sb, NULL); diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 8442e353309f..22721b2fd890 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -35,6 +35,7 @@ #include <linux/fs.h> #include <linux/time.h> +#include <linux/backing-dev.h> #include "common.h" /* FIXME: Remove once pnfs hits mainline @@ -84,6 +85,7 @@ struct exofs_sb_info { u32 s_next_generation; /* next gen # to use */ atomic_t s_curr_pending; /* number of pending commands */ uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ + struct backing_dev_info bdi; /* register our bdi with VFS */ struct pnfs_osd_data_map data_map; /* Default raid to use * FIXME: Needed ? diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 18e57ea1e5b4..03149b9a5178 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -302,6 +302,7 @@ static void exofs_put_super(struct super_block *sb) _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], sbi->layout.s_pid); + bdi_destroy(&sbi->bdi); exofs_free_sbi(sbi); sb->s_fs_info = NULL; } @@ -546,6 +547,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) if (!sbi) return -ENOMEM; + ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); + if (ret) + goto free_bdi; + /* use mount options to fill superblock */ od = osduld_path_lookup(opts->dev_name); if (IS_ERR(od)) { @@ -612,6 +617,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) } /* set up operation vectors */ + sb->s_bdi = &sbi->bdi; sb->s_fs_info = sbi; sb->s_op = &exofs_sops; sb->s_export_op = &exofs_export_ops; @@ -643,6 +649,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) return 0; free_sbi: + bdi_destroy(&sbi->bdi); +free_bdi: EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", opts->dev_name, sbi->layout.s_pid, ret); exofs_free_sbi(sbi); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 94c8ee81f5e1..236b834b4ca8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3879,6 +3879,7 @@ static int ext4_xattr_fiemap(struct inode *inode, physical += offset; length = EXT4_SB(inode->i_sb)->s_inode_size - offset; flags |= FIEMAP_EXTENT_DATA_INLINE; + brelse(iloc.bh); } else { /* external block */ physical = EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5381802d6052..81d605412844 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5375,7 +5375,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) } else { struct ext4_iloc iloc; - err = ext4_get_inode_loc(inode, &iloc); + err = __ext4_get_inode_loc(inode, &iloc, 0); if (err) return err; if (wbc->sync_mode == WB_SYNC_ALL) @@ -5386,6 +5386,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) (unsigned long long)iloc.bh->b_blocknr); err = -EIO; } + brelse(iloc.bh); } return err; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bde9d0b170c2..b423a364dca3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2535,6 +2535,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) mb_debug(1, "gonna free %u blocks in group %u (0x%p):", entry->count, entry->group, entry); + if (test_opt(sb, DISCARD)) { + ext4_fsblk_t discard_block; + + discard_block = entry->start_blk + + ext4_group_first_block_no(sb, entry->group); + trace_ext4_discard_blocks(sb, + (unsigned long long)discard_block, + entry->count); + sb_issue_discard(sb, discard_block, entry->count); + } + err = ext4_mb_load_buddy(sb, entry->group, &e4b); /* we expect to find existing buddy because it's pinned */ BUG_ON(err != 0); @@ -2556,16 +2567,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) page_cache_release(e4b.bd_bitmap_page); } ext4_unlock_group(sb, entry->group); - if (test_opt(sb, DISCARD)) { - ext4_fsblk_t discard_block; - - discard_block = entry->start_blk + - ext4_group_first_block_no(sb, entry->group); - trace_ext4_discard_blocks(sb, - (unsigned long long)discard_block, - entry->count); - sb_issue_discard(sb, discard_block, entry->count); - } kmem_cache_free(ext4_free_ext_cachep, entry); ext4_mb_release_desc(&e4b); } diff --git a/fs/ioctl.c b/fs/ioctl.c index 6c751106c2e5..7faefb4da939 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) #ifdef CONFIG_BLOCK -#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) -#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); +static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) +{ + return (offset >> inode->i_blkbits); +} + +static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) +{ + return (blk << inode->i_blkbits); +} /** * __generic_block_fiemap - FIEMAP for block based inodes (no locking) - * @inode - the inode to map - * @arg - the pointer to userspace where we copy everything to - * @get_block - the fs's get_block function + * @inode: the inode to map + * @fieinfo: the fiemap info struct that will be passed back to userspace + * @start: where to start mapping in the inode + * @len: how much space to map + * @get_block: the fs's get_block function * * This does FIEMAP for block based inodes. Basically it will just loop * through get_block until we hit the number of extents we want to map, or we @@ -250,58 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) */ int __generic_block_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, u64 start, - u64 len, get_block_t *get_block) + struct fiemap_extent_info *fieinfo, loff_t start, + loff_t len, get_block_t *get_block) { - struct buffer_head tmp; - unsigned long long start_blk; - long long length = 0, map_len = 0; + struct buffer_head map_bh; + sector_t start_blk, last_blk; + loff_t isize = i_size_read(inode); u64 logical = 0, phys = 0, size = 0; u32 flags = FIEMAP_EXTENT_MERGED; - int ret = 0, past_eof = 0, whole_file = 0; + bool past_eof = false, whole_file = false; + int ret = 0; - if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + if (ret) return ret; - start_blk = logical_to_blk(inode, start); - - length = (long long)min_t(u64, len, i_size_read(inode)); - if (length < len) - whole_file = 1; + /* + * Either the i_mutex or other appropriate locking needs to be held + * since we expect isize to not change at all through the duration of + * this call. + */ + if (len >= isize) { + whole_file = true; + len = isize; + } - map_len = length; + start_blk = logical_to_blk(inode, start); + last_blk = logical_to_blk(inode, start + len - 1); do { /* * we set b_size to the total size we want so it will map as * many contiguous blocks as possible at once */ - memset(&tmp, 0, sizeof(struct buffer_head)); - tmp.b_size = map_len; + memset(&map_bh, 0, sizeof(struct buffer_head)); + map_bh.b_size = len; - ret = get_block(inode, start_blk, &tmp, 0); + ret = get_block(inode, start_blk, &map_bh, 0); if (ret) break; /* HOLE */ - if (!buffer_mapped(&tmp)) { - length -= blk_to_logical(inode, 1); + if (!buffer_mapped(&map_bh)) { start_blk++; /* - * we want to handle the case where there is an + * We want to handle the case where there is an * allocated block at the front of the file, and then * nothing but holes up to the end of the file properly, * to make sure that extent at the front gets properly * marked with FIEMAP_EXTENT_LAST */ if (!past_eof && - blk_to_logical(inode, start_blk) >= - blk_to_logical(inode, 0)+i_size_read(inode)) + blk_to_logical(inode, start_blk) >= isize) past_eof = 1; /* - * first hole after going past the EOF, this is our + * First hole after going past the EOF, this is our * last extent */ if (past_eof && size) { @@ -309,15 +323,18 @@ int __generic_block_fiemap(struct inode *inode, ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); - break; + } else if (size) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + size = 0; } /* if we have holes up to/past EOF then we're done */ - if (length <= 0 || past_eof) + if (start_blk > last_blk || past_eof || ret) break; } else { /* - * we have gone over the length of what we wanted to + * We have gone over the length of what we wanted to * map, and it wasn't the entire file, so add the extent * we got last time and exit. * @@ -331,7 +348,7 @@ int __generic_block_fiemap(struct inode *inode, * are good to go, just add the extent to the fieinfo * and break */ - if (length <= 0 && !whole_file) { + if (start_blk > last_blk && !whole_file) { ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); @@ -351,11 +368,10 @@ int __generic_block_fiemap(struct inode *inode, } logical = blk_to_logical(inode, start_blk); - phys = blk_to_logical(inode, tmp.b_blocknr); - size = tmp.b_size; + phys = blk_to_logical(inode, map_bh.b_blocknr); + size = map_bh.b_size; flags = FIEMAP_EXTENT_MERGED; - length -= tmp.b_size; start_blk += logical_to_blk(inode, size); /* @@ -363,15 +379,13 @@ int __generic_block_fiemap(struct inode *inode, * soon as we find a hole that the last extent we found * is marked with FIEMAP_EXTENT_LAST */ - if (!past_eof && - logical+size >= - blk_to_logical(inode, 0)+i_size_read(inode)) - past_eof = 1; + if (!past_eof && logical + size >= isize) + past_eof = true; } cond_resched(); } while (1); - /* if ret is 1 then we just hit the end of the extent array */ + /* If ret is 1 then we just hit the end of the extent array */ if (ret == 1) ret = 0; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index cf98da1be23e..fa3385154023 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -526,10 +526,15 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) sb->s_blocksize_bits = 10; sb->s_magic = NCP_SUPER_MAGIC; sb->s_op = &ncp_sops; + sb->s_bdi = &server->bdi; server = NCP_SBP(sb); memset(server, 0, sizeof(*server)); + error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY); + if (error) + goto out_bdi; + server->ncp_filp = ncp_filp; server->ncp_sock = sock; @@ -719,6 +724,8 @@ out_fput2: if (server->info_filp) fput(server->info_filp); out_fput: + bdi_destroy(&server->bdi); +out_bdi: /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: * * The previously used put_filp(ncp_filp); was bogous, since @@ -756,6 +763,7 @@ static void ncp_put_super(struct super_block *sb) kill_pid(server->m.wdog_pid, SIGTERM, 1); put_pid(server->m.wdog_pid); + bdi_destroy(&server->bdi); kfree(server->priv.data); kfree(server->auth.object_name); vfree(server->rxbuf); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a8766c4ef2e0..acc9c4943b84 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -966,6 +966,8 @@ out_error: static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) { target->flags = source->flags; + target->rsize = source->rsize; + target->wsize = source->wsize; target->acregmin = source->acregmin; target->acregmax = source->acregmax; target->acdirmin = source->acdirmin; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index be46f26c9a56..a7bb5c694aa3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -837,6 +837,8 @@ out_zap_parent: /* If we have submounts, don't unhash ! */ if (have_submounts(dentry)) goto out_valid; + if (dentry->d_flags & DCACHE_DISCONNECTED) + goto out_valid; shrink_dcache_parent(dentry); } d_drop(dentry); @@ -1050,7 +1052,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *dir; int openflags, ret = 0; - if (!is_atomic_open(nd)) + if (!is_atomic_open(nd) || d_mountpoint(dentry)) goto no_open; parent = dget_parent(dentry); dir = parent->d_inode; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 638067007c65..071fcedd517c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5218,9 +5218,12 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) + if (IS_ERR(task)) { status = PTR_ERR(task); + goto out; + } rpc_put_task(task); + return 0; out: dprintk("<-- %s status=%d\n", __func__, status); return status; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e01637240eeb..b4148fc00f9f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2187,6 +2187,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (data->version == 4) { error = nfs4_try_mount(flags, dev_name, data, mnt); kfree(data->client_address); + kfree(data->nfs_server.export_path); goto out; } #endif /* CONFIG_NFS_V4 */ @@ -2657,7 +2658,7 @@ static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt) devname = nfs_path(path->mnt->mnt_devname, path->mnt->mnt_root, path->dentry, page, PAGE_SIZE); - if (devname == NULL) + if (IS_ERR(devname)) goto out_freepage; tmp = kstrdup(devname, GFP_KERNEL); if (tmp == NULL) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index de38d63aa920..3aea3ca98ab7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1201,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) +{ + if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) + return 1; + if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, + NFS_INO_COMMIT, nfs_wait_bit_killable, + TASK_KILLABLE)) + return 1; + return 0; +} + +static void nfs_commit_clear_lock(struct nfs_inode *nfsi) +{ + clear_bit(NFS_INO_COMMIT, &nfsi->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); +} + + static void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; @@ -1262,8 +1281,6 @@ static int nfs_commit_rpcsetup(struct list_head *head, task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - if (how & FLUSH_SYNC) - rpc_wait_for_completion_task(task); rpc_put_task(task); return 0; } @@ -1294,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) BDI_RECLAIMABLE); nfs_clear_page_tag_locked(req); } + nfs_commit_clear_lock(NFS_I(inode)); return -ENOMEM; } @@ -1349,6 +1367,7 @@ static void nfs_commit_release(void *calldata) next: nfs_clear_page_tag_locked(req); } + nfs_commit_clear_lock(NFS_I(data->inode)); nfs_commitdata_release(calldata); } @@ -1363,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = { static int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); - int res; + int may_wait = how & FLUSH_SYNC; + int res = 0; + if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) + goto out; spin_lock(&inode->i_lock); res = nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&inode->i_lock); @@ -1372,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how) int error = nfs_commit_list(inode, &head, how); if (error < 0) return error; - } + if (may_wait) + wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, + nfs_wait_bit_killable, + TASK_KILLABLE); + } else + nfs_commit_clear_lock(NFS_I(inode)); +out: return res; } @@ -1444,6 +1472,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) BUG_ON(!PageLocked(page)); for (;;) { + wait_on_page_writeback(page); req = nfs_page_find_request(page); if (req == NULL) break; @@ -1478,30 +1507,18 @@ int nfs_wb_page(struct inode *inode, struct page *page) .range_start = range_start, .range_end = range_end, }; - struct nfs_page *req; - int need_commit; int ret; while(PagePrivate(page)) { + wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out_error; } - req = nfs_find_and_lock_request(page); - if (!req) - break; - if (IS_ERR(req)) { - ret = PTR_ERR(req); + ret = sync_inode(inode, &wbc); + if (ret < 0) goto out_error; - } - need_commit = test_bit(PG_CLEAN, &req->wb_flags); - nfs_clear_page_tag_locked(req); - if (need_commit) { - ret = nfs_commit_inode(inode, FLUSH_SYNC); - if (ret < 0) - goto out_error; - } } return 0; out_error: diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e1703175ee28..34ccf815ea8a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -161,10 +161,10 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) argp->p = page_address(argp->pagelist[0]); argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { - argp->end = p + (argp->pagelen>>2); + argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; } else { - argp->end = p + (PAGE_SIZE>>2); + argp->end = argp->p + (PAGE_SIZE>>2); argp->pagelen -= PAGE_SIZE; } memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); @@ -1426,10 +1426,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->p = page_address(argp->pagelist[0]); argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { - argp->end = p + (argp->pagelen>>2); + argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; } else { - argp->end = p + (PAGE_SIZE>>2); + argp->end = argp->p + (PAGE_SIZE>>2); argp->pagelen -= PAGE_SIZE; } } diff --git a/fs/proc/base.c b/fs/proc/base.c index 7621db800a74..8418fcc0a6ab 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2909,7 +2909,7 @@ out_no_task: */ static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), - DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations), + DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index f8a6075abf50..07930449a958 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -46,8 +46,6 @@ static inline bool is_privroot_deh(struct dentry *dir, struct reiserfs_de_head *deh) { struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; - if (reiserfs_expose_privroot(dir->d_sb)) - return 0; return (dir == dir->d_parent && privroot->d_inode && deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 4f9586bb7631..e7cc00e636dc 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -554,7 +554,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, if (!err && new_size < i_size_read(dentry->d_inode)) { struct iattr newattrs = { .ia_ctime = current_fs_time(inode->i_sb), - .ia_size = buffer_size, + .ia_size = new_size, .ia_valid = ATTR_SIZE | ATTR_CTIME, }; @@ -973,21 +973,13 @@ int reiserfs_permission(struct inode *inode, int mask) return generic_permission(inode, mask, NULL); } -/* This will catch lookups from the fs root to .reiserfs_priv */ -static int -xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) +static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; - if (container_of(q1, struct dentry, d_name) == priv_root) - return -ENOENT; - if (q1->len == name->len && - !memcmp(q1->name, name->name, name->len)) - return 0; - return 1; + return -EPERM; } static const struct dentry_operations xattr_lookup_poison_ops = { - .d_compare = xattr_lookup_poison, + .d_revalidate = xattr_hide_revalidate, }; int reiserfs_lookup_privroot(struct super_block *s) @@ -1001,8 +993,7 @@ int reiserfs_lookup_privroot(struct super_block *s) strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; - if (!reiserfs_expose_privroot(s)) - s->s_root->d_op = &xattr_lookup_poison_ops; + dentry->d_op = &xattr_lookup_poison_ops; if (dentry->d_inode) dentry->d_inode->i_flags |= S_PRIVATE; } else diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 1c4c8f089970..dfa1d67f8fca 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -479,6 +479,7 @@ smb_put_super(struct super_block *sb) if (server->conn_pid) kill_pid(server->conn_pid, SIGTERM, 1); + bdi_destroy(&server->bdi); kfree(server->ops); smb_unload_nls(server); sb->s_fs_info = NULL; @@ -525,6 +526,11 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) if (!server) goto out_no_server; sb->s_fs_info = server; + + if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY)) + goto out_bdi; + + sb->s_bdi = &server->bdi; server->super_block = sb; server->mnt = NULL; @@ -624,6 +630,8 @@ out_no_smbiod: out_bad_option: kfree(mem); out_no_mem: + bdi_destroy(&server->bdi); +out_bdi: if (!server->mnt) printk(KERN_ERR "smb_fill_super: allocation failure\n"); sb->s_fs_info = NULL; diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 1cb0d81b164b..653c030eb840 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -87,9 +87,8 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, u64 cur_index = index >> msblk->devblksize_log2; int bytes, compressed, b = 0, k = 0, page = 0, avail; - - bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1, - sizeof(*bh), GFP_KERNEL); + bh = kcalloc(((srclength + msblk->devblksize - 1) + >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); if (bh == NULL) return -ENOMEM; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 3550aec2f655..48b6f4a385a6 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -275,7 +275,8 @@ allocate_root: err = squashfs_read_inode(root, root_inode); if (err) { - iget_failed(root); + make_bad_inode(root); + iput(root); goto failed_mount; } insert_inode_hash(root); @@ -353,6 +354,7 @@ static void squashfs_put_super(struct super_block *sb) kfree(sbi->id_table); kfree(sbi->fragment_index); kfree(sbi->meta_index); + kfree(sbi->inode_lookup_table); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 15a03d0fb9f3..7a603874e483 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -128,8 +128,9 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, goto release_mutex; } + length = stream->total_out; mutex_unlock(&msblk->read_data_mutex); - return stream->total_out; + return length; release_mutex: mutex_unlock(&msblk->read_data_mutex); diff --git a/fs/super.c b/fs/super.c index f35ac6022109..1527e6a0ee35 100644 --- a/fs/super.c +++ b/fs/super.c @@ -37,6 +37,7 @@ #include <linux/kobject.h> #include <linux/mutex.h> #include <linux/file.h> +#include <linux/backing-dev.h> #include <asm/uaccess.h> #include "internal.h" @@ -693,6 +694,7 @@ int set_anon_super(struct super_block *s, void *data) return -EMFILE; } s->s_dev = MKDEV(0, dev & MINORMASK); + s->s_bdi = &noop_backing_dev_info; return 0; } @@ -954,10 +956,11 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (error < 0) goto out_free_secdata; BUG_ON(!mnt->mnt_sb); + WARN_ON(!mnt->mnt_sb->s_bdi); - error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); - if (error) - goto out_sb; + error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); + if (error) + goto out_sb; /* * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE diff --git a/fs/sync.c b/fs/sync.c index fc5c3d75cf3c..92b228176f7c 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -14,6 +14,7 @@ #include <linux/pagemap.h> #include <linux/quotaops.h> #include <linux/buffer_head.h> +#include <linux/backing-dev.h> #include "internal.h" #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ @@ -32,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) * This should be safe, as we require bdi backing to actually * write out data in the first place */ - if (!sb->s_bdi) + if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info) return 0; if (sb->s_qcop && sb->s_qcop->quota_sync) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 52e06b487ced..29f1edca76de 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1209,6 +1209,7 @@ xfs_fs_put_super( xfs_unmountfs(mp); xfs_freesb(mp); + xfs_inode_shrinker_unregister(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); xfs_dmops_put(mp); @@ -1622,6 +1623,8 @@ xfs_fs_fill_super( if (error) goto fail_vnrele; + xfs_inode_shrinker_register(mp); + kfree(mtpt); return 0; @@ -1867,6 +1870,7 @@ init_xfs_fs(void) goto out_cleanup_procfs; vfs_initquota(); + xfs_inode_shrinker_init(); error = register_filesystem(&xfs_fs_type); if (error) @@ -1894,6 +1898,7 @@ exit_xfs_fs(void) { vfs_exitquota(); unregister_filesystem(&xfs_fs_type); + xfs_inode_shrinker_destroy(); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index fd9698215759..a427c638d909 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -95,7 +95,8 @@ xfs_inode_ag_walk( struct xfs_perag *pag, int flags), int flags, int tag, - int exclusive) + int exclusive, + int *nr_to_scan) { uint32_t first_index; int last_error = 0; @@ -134,7 +135,7 @@ restart: if (error == EFSCORRUPTED) break; - } while (1); + } while ((*nr_to_scan)--); if (skipped) { delay(1); @@ -150,12 +151,15 @@ xfs_inode_ag_iterator( struct xfs_perag *pag, int flags), int flags, int tag, - int exclusive) + int exclusive, + int *nr_to_scan) { int error = 0; int last_error = 0; xfs_agnumber_t ag; + int nr; + nr = nr_to_scan ? *nr_to_scan : INT_MAX; for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { struct xfs_perag *pag; @@ -165,14 +169,18 @@ xfs_inode_ag_iterator( continue; } error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, - exclusive); + exclusive, &nr); xfs_perag_put(pag); if (error) { last_error = error; if (error == EFSCORRUPTED) break; } + if (nr <= 0) + break; } + if (nr_to_scan) + *nr_to_scan = nr; return XFS_ERROR(last_error); } @@ -291,7 +299,7 @@ xfs_sync_data( ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, - XFS_ICI_NO_TAG, 0); + XFS_ICI_NO_TAG, 0, NULL); if (error) return XFS_ERROR(error); @@ -310,7 +318,7 @@ xfs_sync_attr( ASSERT((flags & ~SYNC_WAIT) == 0); return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, - XFS_ICI_NO_TAG, 0); + XFS_ICI_NO_TAG, 0, NULL); } STATIC int @@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag( radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); + pag->pag_ici_reclaimable++; } /* @@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag( { radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + pag->pag_ici_reclaimable--; } /* @@ -854,5 +864,93 @@ xfs_reclaim_inodes( int mode) { return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, - XFS_ICI_RECLAIM_TAG, 1); + XFS_ICI_RECLAIM_TAG, 1, NULL); +} + +/* + * Shrinker infrastructure. + * + * This is all far more complex than it needs to be. It adds a global list of + * mounts because the shrinkers can only call a global context. We need to make + * the shrinkers pass a context to avoid the need for global state. + */ +static LIST_HEAD(xfs_mount_list); +static struct rw_semaphore xfs_mount_list_lock; + +static int +xfs_reclaim_inode_shrink( + int nr_to_scan, + gfp_t gfp_mask) +{ + struct xfs_mount *mp; + struct xfs_perag *pag; + xfs_agnumber_t ag; + int reclaimable = 0; + + if (nr_to_scan) { + if (!(gfp_mask & __GFP_FS)) + return -1; + + down_read(&xfs_mount_list_lock); + list_for_each_entry(mp, &xfs_mount_list, m_mplist) { + xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, + XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); + if (nr_to_scan <= 0) + break; + } + up_read(&xfs_mount_list_lock); + } + + down_read(&xfs_mount_list_lock); + list_for_each_entry(mp, &xfs_mount_list, m_mplist) { + for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { + + pag = xfs_perag_get(mp, ag); + if (!pag->pag_ici_init) { + xfs_perag_put(pag); + continue; + } + reclaimable += pag->pag_ici_reclaimable; + xfs_perag_put(pag); + } + } + up_read(&xfs_mount_list_lock); + return reclaimable; +} + +static struct shrinker xfs_inode_shrinker = { + .shrink = xfs_reclaim_inode_shrink, + .seeks = DEFAULT_SEEKS, +}; + +void __init +xfs_inode_shrinker_init(void) +{ + init_rwsem(&xfs_mount_list_lock); + register_shrinker(&xfs_inode_shrinker); +} + +void +xfs_inode_shrinker_destroy(void) +{ + ASSERT(list_empty(&xfs_mount_list)); + unregister_shrinker(&xfs_inode_shrinker); +} + +void +xfs_inode_shrinker_register( + struct xfs_mount *mp) +{ + down_write(&xfs_mount_list_lock); + list_add_tail(&mp->m_mplist, &xfs_mount_list); + up_write(&xfs_mount_list_lock); +} + +void +xfs_inode_shrinker_unregister( + struct xfs_mount *mp) +{ + down_write(&xfs_mount_list_lock); + list_del(&mp->m_mplist); + up_write(&xfs_mount_list_lock); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index d480c346cabb..cdcbaaca9880 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, int tag, int write_lock); + int flags, int tag, int write_lock, int *nr_to_scan); + +void xfs_inode_shrinker_init(void); +void xfs_inode_shrinker_destroy(void); +void xfs_inode_shrinker_register(struct xfs_mount *mp); +void xfs_inode_shrinker_unregister(struct xfs_mount *mp); #endif diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 5d0ee8d492db..50bee07d6b0e 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes( uint flags) { ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0); + xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, + XFS_ICI_NO_TAG, 0, NULL); } /*------------------------------------------------------------------------*/ diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index b1a5a1ff88ea..abb8222b88c9 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -223,6 +223,7 @@ typedef struct xfs_perag { int pag_ici_init; /* incore inode cache initialised */ rwlock_t pag_ici_lock; /* incore inode lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ + int pag_ici_reclaimable; /* reclaimable inodes */ #endif int pagb_count; /* pagb slots in use */ xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index cd27c9d6c71f..5bba29a07812 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -177,16 +177,26 @@ xfs_swap_extents_check_format( XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) return EINVAL; - /* Check root block of temp in btree form to max in target */ + /* + * If we are in a btree format, check that the temp root block will fit + * in the target and that it has enough extents to be in btree format + * in the target. + * + * Note that we have to be careful to allow btree->extent conversions + * (a common defrag case) which will occur when the temp inode is in + * extent format... + */ if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && - XFS_IFORK_BOFF(ip) && - tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) + ((XFS_IFORK_BOFF(ip) && + tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) || + XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max)) return EINVAL; - /* Check root block of target in btree form to max in temp */ + /* Reciprocal target->temp btree format checks */ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && - XFS_IFORK_BOFF(tip) && - ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) + ((XFS_IFORK_BOFF(tip) && + ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) || + XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max)) return EINVAL; return 0; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4fa0bc7b983e..9ff48a16a7ee 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -259,6 +259,7 @@ typedef struct xfs_mount { wait_queue_head_t m_wait_single_sync_task; __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ + struct list_head m_mplist; /* inode shrinker mount list */ } xfs_mount_t; /* |