diff options
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/acl.c | 14 | ||||
-rw-r--r-- | fs/gfs2/aops.c | 18 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 26 | ||||
-rw-r--r-- | fs/gfs2/dir.c | 64 | ||||
-rw-r--r-- | fs/gfs2/dir.h | 2 | ||||
-rw-r--r-- | fs/gfs2/export.c | 3 | ||||
-rw-r--r-- | fs/gfs2/file.c | 38 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 2 | ||||
-rw-r--r-- | fs/gfs2/glock.h | 7 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 80 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 92 | ||||
-rw-r--r-- | fs/gfs2/lock_dlm.c | 993 | ||||
-rw-r--r-- | fs/gfs2/log.c | 6 | ||||
-rw-r--r-- | fs/gfs2/main.c | 13 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 4 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 33 | ||||
-rw-r--r-- | fs/gfs2/quota.c | 91 | ||||
-rw-r--r-- | fs/gfs2/recovery.c | 11 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 293 | ||||
-rw-r--r-- | fs/gfs2/rgrp.h | 16 | ||||
-rw-r--r-- | fs/gfs2/super.c | 23 | ||||
-rw-r--r-- | fs/gfs2/sys.c | 33 | ||||
-rw-r--r-- | fs/gfs2/sys.h | 2 | ||||
-rw-r--r-- | fs/gfs2/trans.h | 6 | ||||
-rw-r--r-- | fs/gfs2/xattr.c | 48 |
25 files changed, 1517 insertions, 401 deletions
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 65978d7885c8..230eb0f005b6 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -38,8 +38,9 @@ static const char *gfs2_acl_name(int type) return NULL; } -static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) +struct posix_acl *gfs2_get_acl(struct inode *inode, int type) { + struct gfs2_inode *ip = GFS2_I(inode); struct posix_acl *acl; const char *name; char *data; @@ -67,11 +68,6 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) return acl; } -struct posix_acl *gfs2_get_acl(struct inode *inode, int type) -{ - return gfs2_acl_get(GFS2_I(inode), type); -} - static int gfs2_set_mode(struct inode *inode, umode_t mode) { int error = 0; @@ -125,7 +121,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) if (S_ISLNK(inode->i_mode)) return 0; - acl = gfs2_acl_get(dip, ACL_TYPE_DEFAULT); + acl = gfs2_get_acl(&dip->i_inode, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) return PTR_ERR(acl); if (!acl) { @@ -166,7 +162,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) unsigned int len; int error; - acl = gfs2_acl_get(ip, ACL_TYPE_ACCESS); + acl = gfs2_get_acl(&ip->i_inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (!acl) @@ -216,7 +212,7 @@ static int gfs2_xattr_system_get(struct dentry *dentry, const char *name, if (type < 0) return type; - acl = gfs2_acl_get(GFS2_I(inode), type); + acl = gfs2_get_acl(inode, type); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 4858e1fed8b1..501e5cba09b3 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -615,7 +615,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, unsigned int data_blocks = 0, ind_blocks = 0, rblocks; int alloc_required; int error = 0; - struct gfs2_alloc *al = NULL; + struct gfs2_qadata *qa = NULL; pgoff_t index = pos >> PAGE_CACHE_SHIFT; unsigned from = pos & (PAGE_CACHE_SIZE - 1); struct page *page; @@ -639,8 +639,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); if (alloc_required) { - al = gfs2_alloc_get(ip); - if (!al) { + qa = gfs2_qadata_get(ip); + if (!qa) { error = -ENOMEM; goto out_unlock; } @@ -649,8 +649,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (error) goto out_alloc_put; - al->al_requested = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip); + error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); if (error) goto out_qunlock; } @@ -711,7 +710,7 @@ out_trans_fail: out_qunlock: gfs2_quota_unlock(ip); out_alloc_put: - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); } out_unlock: if (&ip->i_inode == sdp->sd_rindex) { @@ -848,7 +847,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct buffer_head *dibh; - struct gfs2_alloc *al = ip->i_alloc; + struct gfs2_qadata *qa = ip->i_qadata; unsigned int from = pos & (PAGE_CACHE_SIZE - 1); unsigned int to = from + len; int ret; @@ -880,10 +879,11 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); failed: gfs2_trans_end(sdp); - if (al) { + if (ip->i_res) gfs2_inplace_release(ip); + if (qa) { gfs2_quota_unlock(ip); - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); } if (inode == sdp->sd_rindex) { gfs2_glock_dq(&m_ip->i_gh); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 41d494d79709..14a704015970 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -133,7 +133,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) and write it out to disk */ unsigned int n = 1; - error = gfs2_alloc_block(ip, &block, &n); + error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); if (error) goto out_brelse; if (isdir) { @@ -503,7 +503,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, do { int error; n = blks - alloced; - error = gfs2_alloc_block(ip, &bn, &n); + error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); if (error) return error; alloced += n; @@ -743,9 +743,6 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, else if (ip->i_depth) revokes = sdp->sd_inptrs; - if (error) - return error; - memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); bstart = 0; blen = 0; @@ -1044,7 +1041,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; find_metapath(sdp, lblock, &mp, ip->i_height); - if (!gfs2_alloc_get(ip)) + if (!gfs2_qadata_get(ip)) return -ENOMEM; error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); @@ -1064,7 +1061,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) gfs2_quota_unhold(ip); out: - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); return error; } @@ -1166,21 +1163,20 @@ static int do_grow(struct inode *inode, u64 size) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh; - struct gfs2_alloc *al = NULL; + struct gfs2_qadata *qa = NULL; int error; if (gfs2_is_stuffed(ip) && (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { - al = gfs2_alloc_get(ip); - if (al == NULL) + qa = gfs2_qadata_get(ip); + if (qa == NULL) return -ENOMEM; error = gfs2_quota_lock_check(ip); if (error) goto do_grow_alloc_put; - al->al_requested = 1; - error = gfs2_inplace_reserve(ip); + error = gfs2_inplace_reserve(ip, 1); if (error) goto do_grow_qunlock; } @@ -1189,7 +1185,7 @@ static int do_grow(struct inode *inode, u64 size) if (error) goto do_grow_release; - if (al) { + if (qa) { error = gfs2_unstuff_dinode(ip, NULL); if (error) goto do_end_trans; @@ -1208,12 +1204,12 @@ static int do_grow(struct inode *inode, u64 size) do_end_trans: gfs2_trans_end(sdp); do_grow_release: - if (al) { + if (qa) { gfs2_inplace_release(ip); do_grow_qunlock: gfs2_quota_unlock(ip); do_grow_alloc_put: - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); } return error; } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 8ccad2467cb6..c35573abd371 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -76,6 +76,8 @@ #define IS_LEAF 1 /* Hashed (leaf) directory */ #define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ +#define MAX_RA_BLOCKS 32 /* max read-ahead blocks */ + #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) @@ -821,7 +823,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, struct gfs2_dirent *dent; struct qstr name = { .name = "", .len = 0, .hash = 0 }; - error = gfs2_alloc_block(ip, &bn, &n); + error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); if (error) return NULL; bh = gfs2_meta_new(ip->i_gl, bn); @@ -1376,6 +1378,52 @@ out: return error; } +/** + * gfs2_dir_readahead - Issue read-ahead requests for leaf blocks. + * + * Note: we can't calculate each index like dir_e_read can because we don't + * have the leaf, and therefore we don't have the depth, and therefore we + * don't have the length. So we have to just read enough ahead to make up + * for the loss of information. + */ +static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, + struct file_ra_state *f_ra) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_glock *gl = ip->i_gl; + struct buffer_head *bh; + u64 blocknr = 0, last; + unsigned count; + + /* First check if we've already read-ahead for the whole range. */ + if (index + MAX_RA_BLOCKS < f_ra->start) + return; + + f_ra->start = max((pgoff_t)index, f_ra->start); + for (count = 0; count < MAX_RA_BLOCKS; count++) { + if (f_ra->start >= hsize) /* if exceeded the hash table */ + break; + + last = blocknr; + blocknr = be64_to_cpu(ip->i_hash_cache[f_ra->start]); + f_ra->start++; + if (blocknr == last) + continue; + + bh = gfs2_getbuf(gl, blocknr, 1); + if (trylock_buffer(bh)) { + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + brelse(bh); + continue; + } + bh->b_end_io = end_buffer_read_sync; + submit_bh(READA | REQ_META, bh); + continue; + } + brelse(bh); + } +} /** * dir_e_read - Reads the entries from a directory into a filldir buffer @@ -1388,7 +1436,7 @@ out: */ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, - filldir_t filldir) + filldir_t filldir, struct file_ra_state *f_ra) { struct gfs2_inode *dip = GFS2_I(inode); u32 hsize, len = 0; @@ -1402,10 +1450,14 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, hash = gfs2_dir_offset2hash(*offset); index = hash >> (32 - dip->i_depth); + if (dip->i_hash_cache == NULL) + f_ra->start = 0; lp = gfs2_dir_get_hash_table(dip); if (IS_ERR(lp)) return PTR_ERR(lp); + gfs2_dir_readahead(inode, hsize, index, f_ra); + while (index < hsize) { error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, &copied, &depth, @@ -1423,7 +1475,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, } int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, - filldir_t filldir) + filldir_t filldir, struct file_ra_state *f_ra) { struct gfs2_inode *dip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -1437,7 +1489,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, return 0; if (dip->i_diskflags & GFS2_DIF_EXHASH) - return dir_e_read(inode, offset, opaque, filldir); + return dir_e_read(inode, offset, opaque, filldir, f_ra); if (!gfs2_is_stuffed(dip)) { gfs2_consist_inode(dip); @@ -1798,7 +1850,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, if (!ht) return -ENOMEM; - if (!gfs2_alloc_get(dip)) { + if (!gfs2_qadata_get(dip)) { error = -ENOMEM; goto out; } @@ -1887,7 +1939,7 @@ out_rlist: gfs2_rlist_free(&rlist); gfs2_quota_unhold(dip); out_put: - gfs2_alloc_put(dip); + gfs2_qadata_put(dip); out: kfree(ht); return error; diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index ff5772fbf024..98c960beab35 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -25,7 +25,7 @@ extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, const struct gfs2_inode *ip); extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, - filldir_t filldir); + filldir_t filldir, struct file_ra_state *f_ra); extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, const struct gfs2_inode *nip, unsigned int new_type); diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index fe9945f2ff72..70ba891654f8 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -99,6 +99,7 @@ static int gfs2_get_name(struct dentry *parent, char *name, struct gfs2_holder gh; u64 offset = 0; int error; + struct file_ra_state f_ra = { .start = 0 }; if (!dir) return -EINVAL; @@ -118,7 +119,7 @@ static int gfs2_get_name(struct dentry *parent, char *name, if (error) return error; - error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir); + error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir, &f_ra); gfs2_glock_dq_uninit(&gh); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index ce36a56dfeac..c5fb3597f696 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -105,7 +105,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) return error; } - error = gfs2_dir_read(dir, &offset, dirent, filldir); + error = gfs2_dir_read(dir, &offset, dirent, filldir, &file->f_ra); gfs2_glock_dq_uninit(&d_gh); @@ -223,7 +223,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) int error; u32 new_flags, flags; - error = mnt_want_write(filp->f_path.mnt); + error = mnt_want_write_file(filp); if (error) return error; @@ -285,7 +285,7 @@ out_trans_end: out: gfs2_glock_dq_uninit(&gh); out_drop_write: - mnt_drop_write(filp->f_path.mnt); + mnt_drop_write_file(filp); return error; } @@ -365,7 +365,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; struct gfs2_holder gh; - struct gfs2_alloc *al; + struct gfs2_qadata *qa; loff_t size; int ret; @@ -393,16 +393,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) } ret = -ENOMEM; - al = gfs2_alloc_get(ip); - if (al == NULL) + qa = gfs2_qadata_get(ip); + if (qa == NULL) goto out_unlock; ret = gfs2_quota_lock_check(ip); if (ret) goto out_alloc_put; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); - al->al_requested = data_blocks + ind_blocks; - ret = gfs2_inplace_reserve(ip); + ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); if (ret) goto out_quota_unlock; @@ -448,7 +447,7 @@ out_trans_fail: out_quota_unlock: gfs2_quota_unlock(ip); out_alloc_put: - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); out_unlock: gfs2_glock_dq(&gh); out: @@ -609,7 +608,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, struct inode *inode = mapping->host; int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); struct gfs2_inode *ip = GFS2_I(inode); - int ret, ret1 = 0; + int ret = 0, ret1 = 0; if (mapping->nrpages) { ret1 = filemap_fdatawrite_range(mapping, start, end); @@ -750,8 +749,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, struct gfs2_inode *ip = GFS2_I(inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes; - struct gfs2_alloc *al; + struct gfs2_qadata *qa; int error; + const loff_t pos = offset; + const loff_t count = len; loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; loff_t max_chunk_size = UINT_MAX & bsize_mask; @@ -782,8 +783,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, while (len > 0) { if (len < bytes) bytes = len; - al = gfs2_alloc_get(ip); - if (!al) { + qa = gfs2_qadata_get(ip); + if (!qa) { error = -ENOMEM; goto out_unlock; } @@ -795,8 +796,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); - al->al_requested = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip); + error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); if (error) { if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { bytes >>= 1; @@ -810,7 +810,6 @@ retry: max_bytes = bytes; calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, &max_bytes, &data_blocks, &ind_blocks); - al->al_requested = data_blocks + ind_blocks; rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + RES_RG_HDR + gfs2_rg_blocks(ip); @@ -832,8 +831,11 @@ retry: offset += max_bytes; gfs2_inplace_release(ip); gfs2_quota_unlock(ip); - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); } + + if (error == 0) + error = generic_write_sync(file, pos, count); goto out_unlock; out_trans_fail: @@ -841,7 +843,7 @@ out_trans_fail: out_qunlock: gfs2_quota_unlock(ip); out_alloc_put: - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); out_unlock: gfs2_glock_dq(&ip->i_gh); out_uninit: diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 88e8a23d0026..376816fcd040 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1353,7 +1353,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) spin_lock(&gl->gl_spin); gl->gl_reply = ret; - if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { + if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { if (gfs2_should_freeze(gl)) { set_bit(GLF_FROZEN, &gl->gl_flags); spin_unlock(&gl->gl_spin); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2553b858a72e..307ac31df781 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -121,8 +121,11 @@ enum { struct lm_lockops { const char *lm_proto_name; - int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); - void (*lm_unmount) (struct gfs2_sbd *sdp); + int (*lm_mount) (struct gfs2_sbd *sdp, const char *table); + void (*lm_first_done) (struct gfs2_sbd *sdp); + void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid, + unsigned int result); + void (*lm_unmount) (struct gfs2_sbd *sdp); void (*lm_withdraw) (struct gfs2_sbd *sdp); void (*lm_put_lock) (struct gfs2_glock *gl); int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 7389dfdcc9ef..97742a7ea9cc 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -139,8 +139,45 @@ struct gfs2_bufdata { #define GDLM_STRNAME_BYTES 25 #define GDLM_LVB_SIZE 32 +/* + * ls_recover_flags: + * + * DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been + * held by failed nodes whose journals need recovery. Those locks should + * only be used for journal recovery until the journal recovery is done. + * This is set by the dlm recover_prep callback and cleared by the + * gfs2_control thread when journal recovery is complete. To avoid + * races between recover_prep setting and gfs2_control clearing, recover_spin + * is held while changing this bit and reading/writing recover_block + * and recover_start. + * + * DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used. + * + * DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing + * recovery of all journals before allowing other nodes to mount the fs. + * This is cleared when FIRST_MOUNT_DONE is set. + * + * DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished + * recovery of all journals, and now allows other nodes to mount the fs. + * + * DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared + * BLOCK_LOCKS for the first time. The gfs2_control thread should now + * control clearing BLOCK_LOCKS for further recoveries. + * + * DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq. + * + * DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep() + * and recover_done(), i.e. set while recover_block == recover_start. + */ + enum { DFL_BLOCK_LOCKS = 0, + DFL_NO_DLM_OPS = 1, + DFL_FIRST_MOUNT = 2, + DFL_FIRST_MOUNT_DONE = 3, + DFL_MOUNT_DONE = 4, + DFL_UNMOUNT = 5, + DFL_DLM_RECOVERY = 6, }; struct lm_lockname { @@ -244,17 +281,16 @@ struct gfs2_glock { #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ -struct gfs2_alloc { +struct gfs2_qadata { /* quota allocation data */ /* Quota stuff */ - struct gfs2_quota_data *al_qd[2*MAXQUOTAS]; - struct gfs2_holder al_qd_ghs[2*MAXQUOTAS]; - unsigned int al_qd_num; - - u32 al_requested; /* Filled in by caller of gfs2_inplace_reserve() */ - u32 al_alloced; /* Filled in by gfs2_alloc_*() */ + struct gfs2_quota_data *qa_qd[2*MAXQUOTAS]; + struct gfs2_holder qa_qd_ghs[2*MAXQUOTAS]; + unsigned int qa_qd_num; +}; - /* Filled in by gfs2_inplace_reserve() */ - struct gfs2_holder al_rgd_gh; +struct gfs2_blkreserv { + u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ + struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */ }; enum { @@ -275,7 +311,8 @@ struct gfs2_inode { struct gfs2_glock *i_gl; /* Move into i_gh? */ struct gfs2_holder i_iopen_gh; struct gfs2_holder i_gh; /* for prepare/commit_write only */ - struct gfs2_alloc *i_alloc; + struct gfs2_qadata *i_qadata; /* quota allocation data */ + struct gfs2_blkreserv *i_res; /* resource group block reservation */ struct gfs2_rgrpd *i_rgd; u64 i_goal; /* goal block for allocations */ struct rw_semaphore i_rw_mutex; @@ -392,6 +429,7 @@ struct gfs2_jdesc { #define JDF_RECOVERY 1 unsigned int jd_jid; unsigned int jd_blocks; + int jd_recover_error; }; struct gfs2_statfs_change_host { @@ -461,6 +499,7 @@ enum { SDF_NORECOVERY = 4, SDF_DEMOTE = 5, SDF_NOJOURNALID = 6, + SDF_RORECOVERY = 7, /* read only recovery */ }; #define GFS2_FSNAME_LEN 256 @@ -499,14 +538,26 @@ struct gfs2_sb_host { struct lm_lockstruct { int ls_jid; unsigned int ls_first; - unsigned int ls_first_done; unsigned int ls_nodir; const struct lm_lockops *ls_ops; - unsigned long ls_flags; dlm_lockspace_t *ls_dlm; - int ls_recover_jid_done; - int ls_recover_jid_status; + int ls_recover_jid_done; /* These two are deprecated, */ + int ls_recover_jid_status; /* used previously by gfs_controld */ + + struct dlm_lksb ls_mounted_lksb; /* mounted_lock */ + struct dlm_lksb ls_control_lksb; /* control_lock */ + char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */ + struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ + + spinlock_t ls_recover_spin; /* protects following fields */ + unsigned long ls_recover_flags; /* DFL_ */ + uint32_t ls_recover_mount; /* gen in first recover_done cb */ + uint32_t ls_recover_start; /* gen in last recover_done cb */ + uint32_t ls_recover_block; /* copy recover_start in last recover_prep */ + uint32_t ls_recover_size; /* size of recover_submit, recover_result */ + uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */ + uint32_t *ls_recover_result; /* result of last jid recovery */ }; struct gfs2_sbd { @@ -544,6 +595,7 @@ struct gfs2_sbd { wait_queue_head_t sd_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; + struct delayed_work sd_control_work; /* Inode Stuff */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index cfd4959b218c..a7d611b93f0f 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -333,7 +333,7 @@ out: */ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, - unsigned int mode) + umode_t mode) { int error; @@ -364,7 +364,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, return 0; } -static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, +static void munge_mode_uid_gid(struct gfs2_inode *dip, umode_t *mode, unsigned int *uid, unsigned int *gid) { if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && @@ -389,12 +389,13 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); int error; + int dblocks = 1; - if (gfs2_alloc_get(dip) == NULL) - return -ENOMEM; + error = gfs2_rindex_update(sdp); + if (error) + fs_warn(sdp, "rindex update returns %d\n", error); - dip->i_alloc->al_requested = RES_DINODE; - error = gfs2_inplace_reserve(dip); + error = gfs2_inplace_reserve(dip, RES_DINODE); if (error) goto out; @@ -402,14 +403,13 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) if (error) goto out_ipreserv; - error = gfs2_alloc_di(dip, no_addr, generation); + error = gfs2_alloc_blocks(dip, no_addr, &dblocks, 1, generation); gfs2_trans_end(sdp); out_ipreserv: gfs2_inplace_release(dip); out: - gfs2_alloc_put(dip); return error; } @@ -447,7 +447,7 @@ static void gfs2_init_dir(struct buffer_head *dibh, */ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, - const struct gfs2_inum_host *inum, unsigned int mode, + const struct gfs2_inum_host *inum, umode_t mode, unsigned int uid, unsigned int gid, const u64 *generation, dev_t dev, const char *symname, unsigned size, struct buffer_head **bhp) @@ -516,7 +516,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, } static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, - unsigned int mode, const struct gfs2_inum_host *inum, + umode_t mode, const struct gfs2_inum_host *inum, const u64 *generation, dev_t dev, const char *symname, unsigned int size, struct buffer_head **bhp) { @@ -525,7 +525,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, int error; munge_mode_uid_gid(dip, &mode, &uid, &gid); - if (!gfs2_alloc_get(dip)) + if (!gfs2_qadata_get(dip)) return -ENOMEM; error = gfs2_quota_lock(dip, uid, gid); @@ -547,7 +547,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, out_quota: gfs2_quota_unlock(dip); out: - gfs2_alloc_put(dip); + gfs2_qadata_put(dip); return error; } @@ -555,13 +555,13 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_alloc *al; + struct gfs2_qadata *qa; int alloc_required; struct buffer_head *dibh; int error; - al = gfs2_alloc_get(dip); - if (!al) + qa = gfs2_qadata_get(dip); + if (!qa) return -ENOMEM; error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); @@ -576,9 +576,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, if (error) goto fail_quota_locks; - al->al_requested = sdp->sd_max_dirres; - - error = gfs2_inplace_reserve(dip); + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); if (error) goto fail_quota_locks; @@ -601,9 +599,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto fail_end_trans; - inc_nlink(&ip->i_inode); - if (S_ISDIR(ip->i_inode.i_mode)) - inc_nlink(&ip->i_inode); + set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -619,11 +615,11 @@ fail_quota_locks: gfs2_quota_unlock(dip); fail: - gfs2_alloc_put(dip); + gfs2_qadata_put(dip); return error; } -int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, +static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *fs_info) { const struct xattr *xattr; @@ -659,7 +655,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, */ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, - unsigned int mode, dev_t dev, const char *symname, + umode_t mode, dev_t dev, const char *symname, unsigned int size, int excl) { const struct qstr *name = &dentry->d_name; @@ -728,9 +724,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, brelse(bh); gfs2_trans_end(sdp); - gfs2_inplace_release(dip); + /* Check if we reserved space in the rgrp. Function link_dinode may + not, depending on whether alloc is required. */ + if (dip->i_res) + gfs2_inplace_release(dip); gfs2_quota_unlock(dip); - gfs2_alloc_put(dip); + gfs2_qadata_put(dip); mark_inode_dirty(inode); gfs2_glock_dq_uninit_m(2, ghs); d_instantiate(dentry, inode); @@ -760,7 +759,7 @@ fail: */ static int gfs2_create(struct inode *dir, struct dentry *dentry, - int mode, struct nameidata *nd) + umode_t mode, struct nameidata *nd) { int excl = 0; if (nd && (nd->flags & LOOKUP_EXCL)) @@ -875,8 +874,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, error = 0; if (alloc_required) { - struct gfs2_alloc *al = gfs2_alloc_get(dip); - if (!al) { + struct gfs2_qadata *qa = gfs2_qadata_get(dip); + + if (!qa) { error = -ENOMEM; goto out_gunlock; } @@ -885,9 +885,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) goto out_alloc; - al->al_requested = sdp->sd_max_dirres; - - error = gfs2_inplace_reserve(dip); + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); if (error) goto out_gunlock_q; @@ -930,7 +928,7 @@ out_gunlock_q: gfs2_quota_unlock(dip); out_alloc: if (alloc_required) - gfs2_alloc_put(dip); + gfs2_qadata_put(dip); out_gunlock: gfs2_glock_dq(ghs + 1); out_child: @@ -1037,12 +1035,14 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) struct buffer_head *bh; struct gfs2_holder ghs[3]; struct gfs2_rgrpd *rgd; - int error; + int error = -EROFS; gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); + if (!rgd) + goto out_inodes; gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); @@ -1088,12 +1088,13 @@ out_end_trans: out_gunlock: gfs2_glock_dq(ghs + 2); out_rgrp: - gfs2_holder_uninit(ghs + 2); gfs2_glock_dq(ghs + 1); out_child: - gfs2_holder_uninit(ghs + 1); gfs2_glock_dq(ghs); out_parent: + gfs2_holder_uninit(ghs + 2); +out_inodes: + gfs2_holder_uninit(ghs + 1); gfs2_holder_uninit(ghs); return error; } @@ -1129,7 +1130,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, * Returns: errno */ -static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) +static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0); } @@ -1143,7 +1144,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) * */ -static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, +static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0); @@ -1350,8 +1351,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = 0; if (alloc_required) { - struct gfs2_alloc *al = gfs2_alloc_get(ndip); - if (!al) { + struct gfs2_qadata *qa = gfs2_qadata_get(ndip); + + if (!qa) { error = -ENOMEM; goto out_gunlock; } @@ -1360,9 +1362,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_alloc; - al->al_requested = sdp->sd_max_dirres; - - error = gfs2_inplace_reserve(ndip); + error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); if (error) goto out_gunlock_q; @@ -1423,7 +1423,7 @@ out_gunlock_q: gfs2_quota_unlock(ndip); out_alloc: if (alloc_required) - gfs2_alloc_put(ndip); + gfs2_qadata_put(ndip); out_gunlock: while (x--) { gfs2_glock_dq(ghs + x); @@ -1584,7 +1584,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) ogid = ngid = NO_QUOTA_CHANGE; - if (!gfs2_alloc_get(ip)) + if (!gfs2_qadata_get(ip)) return -ENOMEM; error = gfs2_quota_lock(ip, nuid, ngid); @@ -1616,7 +1616,7 @@ out_end_trans: out_gunlock_q: gfs2_quota_unlock(ip); out_alloc: - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); return error; } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 98c80d8c2a62..8944d1e32ab5 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. + * Copyright 2004-2011 Red Hat, Inc. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -11,12 +11,15 @@ #include <linux/dlm.h> #include <linux/slab.h> #include <linux/types.h> +#include <linux/delay.h> #include <linux/gfs2_ondisk.h> #include "incore.h" #include "glock.h" #include "util.h" +#include "sys.h" +extern struct workqueue_struct *gfs2_control_wq; static void gdlm_ast(void *arg) { @@ -185,34 +188,1002 @@ static void gdlm_cancel(struct gfs2_glock *gl) dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); } -static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname) +/* + * dlm/gfs2 recovery coordination using dlm_recover callbacks + * + * 1. dlm_controld sees lockspace members change + * 2. dlm_controld blocks dlm-kernel locking activity + * 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep) + * 4. dlm_controld starts and finishes its own user level recovery + * 5. dlm_controld starts dlm-kernel dlm_recoverd to do kernel recovery + * 6. dlm_recoverd notifies gfs2 of failed nodes (recover_slot) + * 7. dlm_recoverd does its own lock recovery + * 8. dlm_recoverd unblocks dlm-kernel locking activity + * 9. dlm_recoverd notifies gfs2 when done (recover_done with new generation) + * 10. gfs2_control updates control_lock lvb with new generation and jid bits + * 11. gfs2_control enqueues journals for gfs2_recover to recover (maybe none) + * 12. gfs2_recover dequeues and recovers journals of failed nodes + * 13. gfs2_recover provides recovery results to gfs2_control (recovery_result) + * 14. gfs2_control updates control_lock lvb jid bits for recovered journals + * 15. gfs2_control unblocks normal locking when all journals are recovered + * + * - failures during recovery + * + * recover_prep() may set BLOCK_LOCKS (step 3) again before gfs2_control + * clears BLOCK_LOCKS (step 15), e.g. another node fails while still + * recovering for a prior failure. gfs2_control needs a way to detect + * this so it can leave BLOCK_LOCKS set in step 15. This is managed using + * the recover_block and recover_start values. + * + * recover_done() provides a new lockspace generation number each time it + * is called (step 9). This generation number is saved as recover_start. + * When recover_prep() is called, it sets BLOCK_LOCKS and sets + * recover_block = recover_start. So, while recover_block is equal to + * recover_start, BLOCK_LOCKS should remain set. (recover_spin must + * be held around the BLOCK_LOCKS/recover_block/recover_start logic.) + * + * - more specific gfs2 steps in sequence above + * + * 3. recover_prep sets BLOCK_LOCKS and sets recover_block = recover_start + * 6. recover_slot records any failed jids (maybe none) + * 9. recover_done sets recover_start = new generation number + * 10. gfs2_control sets control_lock lvb = new gen + bits for failed jids + * 12. gfs2_recover does journal recoveries for failed jids identified above + * 14. gfs2_control clears control_lock lvb bits for recovered jids + * 15. gfs2_control checks if recover_block == recover_start (step 3 occured + * again) then do nothing, otherwise if recover_start > recover_block + * then clear BLOCK_LOCKS. + * + * - parallel recovery steps across all nodes + * + * All nodes attempt to update the control_lock lvb with the new generation + * number and jid bits, but only the first to get the control_lock EX will + * do so; others will see that it's already done (lvb already contains new + * generation number.) + * + * . All nodes get the same recover_prep/recover_slot/recover_done callbacks + * . All nodes attempt to set control_lock lvb gen + bits for the new gen + * . One node gets control_lock first and writes the lvb, others see it's done + * . All nodes attempt to recover jids for which they see control_lock bits set + * . One node succeeds for a jid, and that one clears the jid bit in the lvb + * . All nodes will eventually see all lvb bits clear and unblock locks + * + * - is there a problem with clearing an lvb bit that should be set + * and missing a journal recovery? + * + * 1. jid fails + * 2. lvb bit set for step 1 + * 3. jid recovered for step 1 + * 4. jid taken again (new mount) + * 5. jid fails (for step 4) + * 6. lvb bit set for step 5 (will already be set) + * 7. lvb bit cleared for step 3 + * + * This is not a problem because the failure in step 5 does not + * require recovery, because the mount in step 4 could not have + * progressed far enough to unblock locks and access the fs. The + * control_mount() function waits for all recoveries to be complete + * for the latest lockspace generation before ever unblocking locks + * and returning. The mount in step 4 waits until the recovery in + * step 1 is done. + * + * - special case of first mounter: first node to mount the fs + * + * The first node to mount a gfs2 fs needs to check all the journals + * and recover any that need recovery before other nodes are allowed + * to mount the fs. (Others may begin mounting, but they must wait + * for the first mounter to be done before taking locks on the fs + * or accessing the fs.) This has two parts: + * + * 1. The mounted_lock tells a node it's the first to mount the fs. + * Each node holds the mounted_lock in PR while it's mounted. + * Each node tries to acquire the mounted_lock in EX when it mounts. + * If a node is granted the mounted_lock EX it means there are no + * other mounted nodes (no PR locks exist), and it is the first mounter. + * The mounted_lock is demoted to PR when first recovery is done, so + * others will fail to get an EX lock, but will get a PR lock. + * + * 2. The control_lock blocks others in control_mount() while the first + * mounter is doing first mount recovery of all journals. + * A mounting node needs to acquire control_lock in EX mode before + * it can proceed. The first mounter holds control_lock in EX while doing + * the first mount recovery, blocking mounts from other nodes, then demotes + * control_lock to NL when it's done (others_may_mount/first_done), + * allowing other nodes to continue mounting. + * + * first mounter: + * control_lock EX/NOQUEUE success + * mounted_lock EX/NOQUEUE success (no other PR, so no other mounters) + * set first=1 + * do first mounter recovery + * mounted_lock EX->PR + * control_lock EX->NL, write lvb generation + * + * other mounter: + * control_lock EX/NOQUEUE success (if fail -EAGAIN, retry) + * mounted_lock EX/NOQUEUE fail -EAGAIN (expected due to other mounters PR) + * mounted_lock PR/NOQUEUE success + * read lvb generation + * control_lock EX->NL + * set first=0 + * + * - mount during recovery + * + * If a node mounts while others are doing recovery (not first mounter), + * the mounting node will get its initial recover_done() callback without + * having seen any previous failures/callbacks. + * + * It must wait for all recoveries preceding its mount to be finished + * before it unblocks locks. It does this by repeating the "other mounter" + * steps above until the lvb generation number is >= its mount generation + * number (from initial recover_done) and all lvb bits are clear. + * + * - control_lock lvb format + * + * 4 bytes generation number: the latest dlm lockspace generation number + * from recover_done callback. Indicates the jid bitmap has been updated + * to reflect all slot failures through that generation. + * 4 bytes unused. + * GDLM_LVB_SIZE-8 bytes of jid bit map. If bit N is set, it indicates + * that jid N needs recovery. + */ + +#define JID_BITMAP_OFFSET 8 /* 4 byte generation number + 4 byte unused */ + +static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, + char *lvb_bits) +{ + uint32_t gen; + memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); + memcpy(&gen, lvb_bits, sizeof(uint32_t)); + *lvb_gen = le32_to_cpu(gen); +} + +static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, + char *lvb_bits) +{ + uint32_t gen; + memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); + gen = cpu_to_le32(lvb_gen); + memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); +} + +static int all_jid_bits_clear(char *lvb) +{ + int i; + for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) { + if (lvb[i]) + return 0; + } + return 1; +} + +static void sync_wait_cb(void *arg) +{ + struct lm_lockstruct *ls = arg; + complete(&ls->ls_sync_wait); +} + +static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; - if (fsname == NULL) { - fs_info(sdp, "no fsname found\n"); - return -EINVAL; + error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); + if (error) { + fs_err(sdp, "%s lkid %x error %d\n", + name, lksb->sb_lkid, error); + return error; + } + + wait_for_completion(&ls->ls_sync_wait); + + if (lksb->sb_status != -DLM_EUNLOCK) { + fs_err(sdp, "%s lkid %x status %d\n", + name, lksb->sb_lkid, lksb->sb_status); + return -1; + } + return 0; +} + +static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags, + unsigned int num, struct dlm_lksb *lksb, char *name) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char strname[GDLM_STRNAME_BYTES]; + int error, status; + + memset(strname, 0, GDLM_STRNAME_BYTES); + snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num); + + error = dlm_lock(ls->ls_dlm, mode, lksb, flags, + strname, GDLM_STRNAME_BYTES - 1, + 0, sync_wait_cb, ls, NULL); + if (error) { + fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n", + name, lksb->sb_lkid, flags, mode, error); + return error; + } + + wait_for_completion(&ls->ls_sync_wait); + + status = lksb->sb_status; + + if (status && status != -EAGAIN) { + fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n", + name, lksb->sb_lkid, flags, mode, status); + } + + return status; +} + +static int mounted_unlock(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock"); +} + +static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK, + &ls->ls_mounted_lksb, "mounted_lock"); +} + +static int control_unlock(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock"); +} + +static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK, + &ls->ls_control_lksb, "control_lock"); +} + +static void gfs2_control_func(struct work_struct *work) +{ + struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char lvb_bits[GDLM_LVB_SIZE]; + uint32_t block_gen, start_gen, lvb_gen, flags; + int recover_set = 0; + int write_lvb = 0; + int recover_size; + int i, error; + + spin_lock(&ls->ls_recover_spin); + /* + * No MOUNT_DONE means we're still mounting; control_mount() + * will set this flag, after which this thread will take over + * all further clearing of BLOCK_LOCKS. + * + * FIRST_MOUNT means this node is doing first mounter recovery, + * for which recovery control is handled by + * control_mount()/control_first_done(), not this thread. + */ + if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || + test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + spin_unlock(&ls->ls_recover_spin); + return; + } + block_gen = ls->ls_recover_block; + start_gen = ls->ls_recover_start; + spin_unlock(&ls->ls_recover_spin); + + /* + * Equal block_gen and start_gen implies we are between + * recover_prep and recover_done callbacks, which means + * dlm recovery is in progress and dlm locking is blocked. + * There's no point trying to do any work until recover_done. + */ + + if (block_gen == start_gen) + return; + + /* + * Propagate recover_submit[] and recover_result[] to lvb: + * dlm_recoverd adds to recover_submit[] jids needing recovery + * gfs2_recover adds to recover_result[] journal recovery results + * + * set lvb bit for jids in recover_submit[] if the lvb has not + * yet been updated for the generation of the failure + * + * clear lvb bit for jids in recover_result[] if the result of + * the journal recovery is SUCCESS + */ + + error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK); + if (error) { + fs_err(sdp, "control lock EX error %d\n", error); + return; + } + + control_lvb_read(ls, &lvb_gen, lvb_bits); + + spin_lock(&ls->ls_recover_spin); + if (block_gen != ls->ls_recover_block || + start_gen != ls->ls_recover_start) { + fs_info(sdp, "recover generation %u block1 %u %u\n", + start_gen, block_gen, ls->ls_recover_block); + spin_unlock(&ls->ls_recover_spin); + control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); + return; + } + + recover_size = ls->ls_recover_size; + + if (lvb_gen <= start_gen) { + /* + * Clear lvb bits for jids we've successfully recovered. + * Because all nodes attempt to recover failed journals, + * a journal can be recovered multiple times successfully + * in succession. Only the first will really do recovery, + * the others find it clean, but still report a successful + * recovery. So, another node may have already recovered + * the jid and cleared the lvb bit for it. + */ + for (i = 0; i < recover_size; i++) { + if (ls->ls_recover_result[i] != LM_RD_SUCCESS) + continue; + + ls->ls_recover_result[i] = 0; + + if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) + continue; + + __clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); + write_lvb = 1; + } + } + + if (lvb_gen == start_gen) { + /* + * Failed slots before start_gen are already set in lvb. + */ + for (i = 0; i < recover_size; i++) { + if (!ls->ls_recover_submit[i]) + continue; + if (ls->ls_recover_submit[i] < lvb_gen) + ls->ls_recover_submit[i] = 0; + } + } else if (lvb_gen < start_gen) { + /* + * Failed slots before start_gen are not yet set in lvb. + */ + for (i = 0; i < recover_size; i++) { + if (!ls->ls_recover_submit[i]) + continue; + if (ls->ls_recover_submit[i] < start_gen) { + ls->ls_recover_submit[i] = 0; + __set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); + } + } + /* even if there are no bits to set, we need to write the + latest generation to the lvb */ + write_lvb = 1; + } else { + /* + * we should be getting a recover_done() for lvb_gen soon + */ + } + spin_unlock(&ls->ls_recover_spin); + + if (write_lvb) { + control_lvb_write(ls, start_gen, lvb_bits); + flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK; + } else { + flags = DLM_LKF_CONVERT; + } + + error = control_lock(sdp, DLM_LOCK_NL, flags); + if (error) { + fs_err(sdp, "control lock NL error %d\n", error); + return; + } + + /* + * Everyone will see jid bits set in the lvb, run gfs2_recover_set(), + * and clear a jid bit in the lvb if the recovery is a success. + * Eventually all journals will be recovered, all jid bits will + * be cleared in the lvb, and everyone will clear BLOCK_LOCKS. + */ + + for (i = 0; i < recover_size; i++) { + if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) { + fs_info(sdp, "recover generation %u jid %d\n", + start_gen, i); + gfs2_recover_set(sdp, i); + recover_set++; + } + } + if (recover_set) + return; + + /* + * No more jid bits set in lvb, all recovery is done, unblock locks + * (unless a new recover_prep callback has occured blocking locks + * again while working above) + */ + + spin_lock(&ls->ls_recover_spin); + if (ls->ls_recover_block == block_gen && + ls->ls_recover_start == start_gen) { + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + fs_info(sdp, "recover generation %u done\n", start_gen); + gfs2_glock_thaw(sdp); + } else { + fs_info(sdp, "recover generation %u block2 %u %u\n", + start_gen, block_gen, ls->ls_recover_block); + spin_unlock(&ls->ls_recover_spin); + } +} + +static int control_mount(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char lvb_bits[GDLM_LVB_SIZE]; + uint32_t start_gen, block_gen, mount_gen, lvb_gen; + int mounted_mode; + int retries = 0; + int error; + + memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb)); + memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb)); + memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE); + ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb; + init_completion(&ls->ls_sync_wait); + + set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + + error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK); + if (error) { + fs_err(sdp, "control_mount control_lock NL error %d\n", error); + return error; + } + + error = mounted_lock(sdp, DLM_LOCK_NL, 0); + if (error) { + fs_err(sdp, "control_mount mounted_lock NL error %d\n", error); + control_unlock(sdp); + return error; + } + mounted_mode = DLM_LOCK_NL; + +restart: + if (retries++ && signal_pending(current)) { + error = -EINTR; + goto fail; + } + + /* + * We always start with both locks in NL. control_lock is + * demoted to NL below so we don't need to do it here. + */ + + if (mounted_mode != DLM_LOCK_NL) { + error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); + if (error) + goto fail; + mounted_mode = DLM_LOCK_NL; + } + + /* + * Other nodes need to do some work in dlm recovery and gfs2_control + * before the recover_done and control_lock will be ready for us below. + * A delay here is not required but often avoids having to retry. + */ + + msleep_interruptible(500); + + /* + * Acquire control_lock in EX and mounted_lock in either EX or PR. + * control_lock lvb keeps track of any pending journal recoveries. + * mounted_lock indicates if any other nodes have the fs mounted. + */ + + error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK); + if (error == -EAGAIN) { + goto restart; + } else if (error) { + fs_err(sdp, "control_mount control_lock EX error %d\n", error); + goto fail; + } + + error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); + if (!error) { + mounted_mode = DLM_LOCK_EX; + goto locks_done; + } else if (error != -EAGAIN) { + fs_err(sdp, "control_mount mounted_lock EX error %d\n", error); + goto fail; + } + + error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); + if (!error) { + mounted_mode = DLM_LOCK_PR; + goto locks_done; + } else { + /* not even -EAGAIN should happen here */ + fs_err(sdp, "control_mount mounted_lock PR error %d\n", error); + goto fail; + } + +locks_done: + /* + * If we got both locks above in EX, then we're the first mounter. + * If not, then we need to wait for the control_lock lvb to be + * updated by other mounted nodes to reflect our mount generation. + * + * In simple first mounter cases, first mounter will see zero lvb_gen, + * but in cases where all existing nodes leave/fail before mounting + * nodes finish control_mount, then all nodes will be mounting and + * lvb_gen will be non-zero. + */ + + control_lvb_read(ls, &lvb_gen, lvb_bits); + + if (lvb_gen == 0xFFFFFFFF) { + /* special value to force mount attempts to fail */ + fs_err(sdp, "control_mount control_lock disabled\n"); + error = -EINVAL; + goto fail; + } + + if (mounted_mode == DLM_LOCK_EX) { + /* first mounter, keep both EX while doing first recovery */ + spin_lock(&ls->ls_recover_spin); + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags); + set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + fs_info(sdp, "first mounter control generation %u\n", lvb_gen); + return 0; + } + + error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); + if (error) + goto fail; + + /* + * We are not first mounter, now we need to wait for the control_lock + * lvb generation to be >= the generation from our first recover_done + * and all lvb bits to be clear (no pending journal recoveries.) + */ + + if (!all_jid_bits_clear(lvb_bits)) { + /* journals need recovery, wait until all are clear */ + fs_info(sdp, "control_mount wait for journal recovery\n"); + goto restart; + } + + spin_lock(&ls->ls_recover_spin); + block_gen = ls->ls_recover_block; + start_gen = ls->ls_recover_start; + mount_gen = ls->ls_recover_mount; + + if (lvb_gen < mount_gen) { + /* wait for mounted nodes to update control_lock lvb to our + generation, which might include new recovery bits set */ + fs_info(sdp, "control_mount wait1 block %u start %u mount %u " + "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, + lvb_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + goto restart; + } + + if (lvb_gen != start_gen) { + /* wait for mounted nodes to update control_lock lvb to the + latest recovery generation */ + fs_info(sdp, "control_mount wait2 block %u start %u mount %u " + "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, + lvb_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + goto restart; + } + + if (block_gen == start_gen) { + /* dlm recovery in progress, wait for it to finish */ + fs_info(sdp, "control_mount wait3 block %u start %u mount %u " + "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, + lvb_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + goto restart; } - error = dlm_new_lockspace(fsname, strlen(fsname), &ls->ls_dlm, - DLM_LSFL_FS | DLM_LSFL_NEWEXCL | - (ls->ls_nodir ? DLM_LSFL_NODIR : 0), - GDLM_LVB_SIZE); + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags); + memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t)); + memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); + spin_unlock(&ls->ls_recover_spin); + return 0; + +fail: + mounted_unlock(sdp); + control_unlock(sdp); + return error; +} + +static int dlm_recovery_wait(void *word) +{ + schedule(); + return 0; +} + +static int control_first_done(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char lvb_bits[GDLM_LVB_SIZE]; + uint32_t start_gen, block_gen; + int error; + +restart: + spin_lock(&ls->ls_recover_spin); + start_gen = ls->ls_recover_start; + block_gen = ls->ls_recover_block; + + if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) || + !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || + !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + /* sanity check, should not happen */ + fs_err(sdp, "control_first_done start %u block %u flags %lx\n", + start_gen, block_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + control_unlock(sdp); + return -1; + } + + if (start_gen == block_gen) { + /* + * Wait for the end of a dlm recovery cycle to switch from + * first mounter recovery. We can ignore any recover_slot + * callbacks between the recover_prep and next recover_done + * because we are still the first mounter and any failed nodes + * have not fully mounted, so they don't need recovery. + */ + spin_unlock(&ls->ls_recover_spin); + fs_info(sdp, "control_first_done wait gen %u\n", start_gen); + + wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY, + dlm_recovery_wait, TASK_UNINTERRUPTIBLE); + goto restart; + } + + clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); + set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags); + memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t)); + memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); + spin_unlock(&ls->ls_recover_spin); + + memset(lvb_bits, 0, sizeof(lvb_bits)); + control_lvb_write(ls, start_gen, lvb_bits); + + error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT); + if (error) + fs_err(sdp, "control_first_done mounted PR error %d\n", error); + + error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK); if (error) - printk(KERN_ERR "dlm_new_lockspace error %d", error); + fs_err(sdp, "control_first_done control NL error %d\n", error); return error; } +/* + * Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC) + * to accomodate the largest slot number. (NB dlm slot numbers start at 1, + * gfs2 jids start at 0, so jid = slot - 1) + */ + +#define RECOVER_SIZE_INC 16 + +static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, + int num_slots) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + uint32_t *submit = NULL; + uint32_t *result = NULL; + uint32_t old_size, new_size; + int i, max_jid; + + max_jid = 0; + for (i = 0; i < num_slots; i++) { + if (max_jid < slots[i].slot - 1) + max_jid = slots[i].slot - 1; + } + + old_size = ls->ls_recover_size; + + if (old_size >= max_jid + 1) + return 0; + + new_size = old_size + RECOVER_SIZE_INC; + + submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); + result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); + if (!submit || !result) { + kfree(submit); + kfree(result); + return -ENOMEM; + } + + spin_lock(&ls->ls_recover_spin); + memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t)); + memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t)); + kfree(ls->ls_recover_submit); + kfree(ls->ls_recover_result); + ls->ls_recover_submit = submit; + ls->ls_recover_result = result; + ls->ls_recover_size = new_size; + spin_unlock(&ls->ls_recover_spin); + return 0; +} + +static void free_recover_size(struct lm_lockstruct *ls) +{ + kfree(ls->ls_recover_submit); + kfree(ls->ls_recover_result); + ls->ls_recover_submit = NULL; + ls->ls_recover_result = NULL; + ls->ls_recover_size = 0; +} + +/* dlm calls before it does lock recovery */ + +static void gdlm_recover_prep(void *arg) +{ + struct gfs2_sbd *sdp = arg; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + + spin_lock(&ls->ls_recover_spin); + ls->ls_recover_block = ls->ls_recover_start; + set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); + + if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || + test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + spin_unlock(&ls->ls_recover_spin); + return; + } + set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); +} + +/* dlm calls after recover_prep has been completed on all lockspace members; + identifies slot/jid of failed member */ + +static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) +{ + struct gfs2_sbd *sdp = arg; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + int jid = slot->slot - 1; + + spin_lock(&ls->ls_recover_spin); + if (ls->ls_recover_size < jid + 1) { + fs_err(sdp, "recover_slot jid %d gen %u short size %d", + jid, ls->ls_recover_block, ls->ls_recover_size); + spin_unlock(&ls->ls_recover_spin); + return; + } + + if (ls->ls_recover_submit[jid]) { + fs_info(sdp, "recover_slot jid %d gen %u prev %u", + jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); + } + ls->ls_recover_submit[jid] = ls->ls_recover_block; + spin_unlock(&ls->ls_recover_spin); +} + +/* dlm calls after recover_slot and after it completes lock recovery */ + +static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, + int our_slot, uint32_t generation) +{ + struct gfs2_sbd *sdp = arg; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + + /* ensure the ls jid arrays are large enough */ + set_recover_size(sdp, slots, num_slots); + + spin_lock(&ls->ls_recover_spin); + ls->ls_recover_start = generation; + + if (!ls->ls_recover_mount) { + ls->ls_recover_mount = generation; + ls->ls_jid = our_slot - 1; + } + + if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) + queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); + + clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY); + spin_unlock(&ls->ls_recover_spin); +} + +/* gfs2_recover thread has a journal recovery result */ + +static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, + unsigned int result) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + + if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) + return; + + /* don't care about the recovery of own journal during mount */ + if (jid == ls->ls_jid) + return; + + spin_lock(&ls->ls_recover_spin); + if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + spin_unlock(&ls->ls_recover_spin); + return; + } + if (ls->ls_recover_size < jid + 1) { + fs_err(sdp, "recovery_result jid %d short size %d", + jid, ls->ls_recover_size); + spin_unlock(&ls->ls_recover_spin); + return; + } + + fs_info(sdp, "recover jid %d result %s\n", jid, + result == LM_RD_GAVEUP ? "busy" : "success"); + + ls->ls_recover_result[jid] = result; + + /* GAVEUP means another node is recovering the journal; delay our + next attempt to recover it, to give the other node a chance to + finish before trying again */ + + if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) + queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, + result == LM_RD_GAVEUP ? HZ : 0); + spin_unlock(&ls->ls_recover_spin); +} + +const struct dlm_lockspace_ops gdlm_lockspace_ops = { + .recover_prep = gdlm_recover_prep, + .recover_slot = gdlm_recover_slot, + .recover_done = gdlm_recover_done, +}; + +static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char cluster[GFS2_LOCKNAME_LEN]; + const char *fsname; + uint32_t flags; + int error, ops_result; + + /* + * initialize everything + */ + + INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func); + spin_lock_init(&ls->ls_recover_spin); + ls->ls_recover_flags = 0; + ls->ls_recover_mount = 0; + ls->ls_recover_start = 0; + ls->ls_recover_block = 0; + ls->ls_recover_size = 0; + ls->ls_recover_submit = NULL; + ls->ls_recover_result = NULL; + + error = set_recover_size(sdp, NULL, 0); + if (error) + goto fail; + + /* + * prepare dlm_new_lockspace args + */ + + fsname = strchr(table, ':'); + if (!fsname) { + fs_info(sdp, "no fsname found\n"); + error = -EINVAL; + goto fail_free; + } + memset(cluster, 0, sizeof(cluster)); + memcpy(cluster, table, strlen(table) - strlen(fsname)); + fsname++; + + flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL; + if (ls->ls_nodir) + flags |= DLM_LSFL_NODIR; + + /* + * create/join lockspace + */ + + error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE, + &gdlm_lockspace_ops, sdp, &ops_result, + &ls->ls_dlm); + if (error) { + fs_err(sdp, "dlm_new_lockspace error %d\n", error); + goto fail_free; + } + + if (ops_result < 0) { + /* + * dlm does not support ops callbacks, + * old dlm_controld/gfs_controld are used, try without ops. + */ + fs_info(sdp, "dlm lockspace ops not used\n"); + free_recover_size(ls); + set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags); + return 0; + } + + if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) { + fs_err(sdp, "dlm lockspace ops disallow jid preset\n"); + error = -EINVAL; + goto fail_release; + } + + /* + * control_mount() uses control_lock to determine first mounter, + * and for later mounts, waits for any recoveries to be cleared. + */ + + error = control_mount(sdp); + if (error) { + fs_err(sdp, "mount control error %d\n", error); + goto fail_release; + } + + ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); + clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); + return 0; + +fail_release: + dlm_release_lockspace(ls->ls_dlm, 2); +fail_free: + free_recover_size(ls); +fail: + return error; +} + +static void gdlm_first_done(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + int error; + + if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) + return; + + error = control_first_done(sdp); + if (error) + fs_err(sdp, "mount first_done error %d\n", error); +} + static void gdlm_unmount(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) + goto release; + + /* wait for gfs2_control_wq to be done with this mount */ + + spin_lock(&ls->ls_recover_spin); + set_bit(DFL_UNMOUNT, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + flush_delayed_work_sync(&sdp->sd_control_work); + + /* mounted_lock and control_lock will be purged in dlm recovery */ +release: if (ls->ls_dlm) { dlm_release_lockspace(ls->ls_dlm, 2); ls->ls_dlm = NULL; } + + free_recover_size(ls); } static const match_table_t dlm_tokens = { @@ -226,6 +1197,8 @@ static const match_table_t dlm_tokens = { const struct lm_lockops gfs2_dlm_ops = { .lm_proto_name = "lock_dlm", .lm_mount = gdlm_mount, + .lm_first_done = gdlm_first_done, + .lm_recovery_result = gdlm_recovery_result, .lm_unmount = gdlm_unmount, .lm_put_lock = gdlm_put_lock, .lm_lock = gdlm_lock, diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 598646434362..756fae9eaf8f 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -626,7 +626,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); else - submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh); + submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) @@ -951,8 +951,8 @@ int gfs2_logd(void *data) wake_up(&sdp->sd_log_waitq); t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; - if (freezing(current)) - refrigerator(); + + try_to_freeze(); do { prepare_to_wait(&sdp->sd_logd_waitq, &wait, diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 8a139ff1919f..a8d9bcd0e19c 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -28,6 +28,8 @@ #include "recovery.h" #include "dir.h" +struct workqueue_struct *gfs2_control_wq; + static struct shrinker qd_shrinker = { .shrink = gfs2_shrink_qd_memory, .seeks = DEFAULT_SEEKS, @@ -40,7 +42,8 @@ static void gfs2_init_inode_once(void *foo) inode_init_once(&ip->i_inode); init_rwsem(&ip->i_rw_mutex); INIT_LIST_HEAD(&ip->i_trunc_list); - ip->i_alloc = NULL; + ip->i_qadata = NULL; + ip->i_res = NULL; ip->i_hash_cache = NULL; } @@ -145,12 +148,19 @@ static int __init init_gfs2_fs(void) if (!gfs_recovery_wq) goto fail_wq; + gfs2_control_wq = alloc_workqueue("gfs2_control", + WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0); + if (!gfs2_control_wq) + goto fail_control; + gfs2_register_debugfs(); printk("GFS2 installed\n"); return 0; +fail_control: + destroy_workqueue(gfs_recovery_wq); fail_wq: unregister_filesystem(&gfs2meta_fs_type); fail_unregister: @@ -194,6 +204,7 @@ static void __exit exit_gfs2_fs(void) unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); destroy_workqueue(gfs_recovery_wq); + destroy_workqueue(gfs2_control_wq); rcu_barrier(); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index be29858900f6..181586e673f9 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh); + ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh); dblock++; extlen--; @@ -444,7 +444,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) bh = gfs2_getbuf(gl, dblock, CREATE); if (!buffer_uptodate(bh) && !buffer_locked(bh)) - ll_rw_block(READA, 1, &bh); + ll_rw_block(READA | REQ_META, 1, &bh); brelse(bh); dblock++; extlen--; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index cb23c2be731a..6aacf3f230a2 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -224,7 +224,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) bio->bi_end_io = end_bio_io_page; bio->bi_private = page; - submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio); + submit_bio(READ_SYNC | REQ_META, bio); wait_on_page_locked(page); bio_put(bio); if (!PageUptodate(page)) { @@ -562,8 +562,12 @@ static void gfs2_others_may_mount(struct gfs2_sbd *sdp) { char *message = "FIRSTMOUNT=Done"; char *envp[] = { message, NULL }; - struct lm_lockstruct *ls = &sdp->sd_lockstruct; - ls->ls_first_done = 1; + + fs_info(sdp, "first mount done, others may mount\n"); + + if (sdp->sd_lockstruct.ls_ops->lm_first_done) + sdp->sd_lockstruct.ls_ops->lm_first_done(sdp); + kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); } @@ -944,7 +948,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) struct gfs2_args *args = &sdp->sd_args; const char *proto = sdp->sd_proto_name; const char *table = sdp->sd_table_name; - const char *fsname; char *o, *options; int ret; @@ -1004,21 +1007,12 @@ hostdata_error: } } - if (sdp->sd_args.ar_spectator) - snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table); - else - snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table, - sdp->sd_lockstruct.ls_jid); - - fsname = strchr(table, ':'); - if (fsname) - fsname++; if (lm->lm_mount == NULL) { fs_info(sdp, "Now mounting FS...\n"); complete_all(&sdp->sd_locking_init); return 0; } - ret = lm->lm_mount(sdp, fsname); + ret = lm->lm_mount(sdp, table); if (ret == 0) fs_info(sdp, "Joined cluster. Now mounting FS...\n"); complete_all(&sdp->sd_locking_init); @@ -1084,7 +1078,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (sdp->sd_args.ar_spectator) { sb->s_flags |= MS_RDONLY; - set_bit(SDF_NORECOVERY, &sdp->sd_flags); + set_bit(SDF_RORECOVERY, &sdp->sd_flags); } if (sdp->sd_args.ar_posix_acl) sb->s_flags |= MS_POSIXACL; @@ -1124,6 +1118,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (error) goto fail; + snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name); + gfs2_create_debugfs_file(sdp); error = gfs2_sys_fs_add(sdp); @@ -1160,6 +1156,13 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent goto fail_sb; } + if (sdp->sd_args.ar_spectator) + snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", + sdp->sd_table_name); + else + snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", + sdp->sd_table_name, sdp->sd_lockstruct.ls_jid); + error = init_inodes(sdp, DO); if (error) goto fail_sb; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 7e528dc14f85..a45b21b03915 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -494,11 +494,11 @@ static void qdsb_put(struct gfs2_quota_data *qd) int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = ip->i_alloc; - struct gfs2_quota_data **qd = al->al_qd; + struct gfs2_qadata *qa = ip->i_qadata; + struct gfs2_quota_data **qd = qa->qa_qd; int error; - if (gfs2_assert_warn(sdp, !al->al_qd_num) || + if (gfs2_assert_warn(sdp, !qa->qa_qd_num) || gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) return -EIO; @@ -508,20 +508,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); if (error) goto out; - al->al_qd_num++; + qa->qa_qd_num++; qd++; error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); if (error) goto out; - al->al_qd_num++; + qa->qa_qd_num++; qd++; if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { error = qdsb_get(sdp, QUOTA_USER, uid, qd); if (error) goto out; - al->al_qd_num++; + qa->qa_qd_num++; qd++; } @@ -529,7 +529,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); if (error) goto out; - al->al_qd_num++; + qa->qa_qd_num++; qd++; } @@ -542,16 +542,16 @@ out: void gfs2_quota_unhold(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = ip->i_alloc; + struct gfs2_qadata *qa = ip->i_qadata; unsigned int x; gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); - for (x = 0; x < al->al_qd_num; x++) { - qdsb_put(al->al_qd[x]); - al->al_qd[x] = NULL; + for (x = 0; x < qa->qa_qd_num; x++) { + qdsb_put(qa->qa_qd[x]); + qa->qa_qd[x] = NULL; } - al->al_qd_num = 0; + qa->qa_qd_num = 0; } static int sort_qd(const void *a, const void *b) @@ -712,7 +712,7 @@ get_a_page: set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { - ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); + ll_rw_block(READ | REQ_META, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto unlock_out; @@ -762,7 +762,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) struct gfs2_quota_data *qd; loff_t offset; unsigned int nalloc = 0, blocks; - struct gfs2_alloc *al = NULL; int error; gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), @@ -792,26 +791,19 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) nalloc++; } - al = gfs2_alloc_get(ip); - if (!al) { - error = -ENOMEM; - goto out_gunlock; - } /* * 1 blk for unstuffing inode if stuffed. We add this extra * block to the reservation unconditionally. If the inode * doesn't need unstuffing, the block will be released to the * rgrp since it won't be allocated during the transaction */ - al->al_requested = 1; /* +3 in the end for unstuffing block, inode size update block * and another block in case quota straddles page boundary and * two blocks need to be updated instead of 1 */ blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; - if (nalloc) - al->al_requested += nalloc * (data_blocks + ind_blocks); - error = gfs2_inplace_reserve(ip); + error = gfs2_inplace_reserve(ip, 1 + + (nalloc * (data_blocks + ind_blocks))); if (error) goto out_alloc; @@ -840,8 +832,6 @@ out_end_trans: out_ipres: gfs2_inplace_release(ip); out_alloc: - gfs2_alloc_put(ip); -out_gunlock: gfs2_glock_dq_uninit(&i_gh); out: while (qx--) @@ -925,7 +915,7 @@ fail: int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = ip->i_alloc; + struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qd; unsigned int x; int error = 0; @@ -938,15 +928,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; - sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *), + sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *), sort_qd, NULL); - for (x = 0; x < al->al_qd_num; x++) { + for (x = 0; x < qa->qa_qd_num; x++) { int force = NO_FORCE; - qd = al->al_qd[x]; + qd = qa->qa_qd[x]; if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) force = FORCE; - error = do_glock(qd, force, &al->al_qd_ghs[x]); + error = do_glock(qd, force, &qa->qa_qd_ghs[x]); if (error) break; } @@ -955,7 +945,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) set_bit(GIF_QD_LOCKED, &ip->i_flags); else { while (x--) - gfs2_glock_dq_uninit(&al->al_qd_ghs[x]); + gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]); gfs2_quota_unhold(ip); } @@ -1000,7 +990,7 @@ static int need_sync(struct gfs2_quota_data *qd) void gfs2_quota_unlock(struct gfs2_inode *ip) { - struct gfs2_alloc *al = ip->i_alloc; + struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qda[4]; unsigned int count = 0; unsigned int x; @@ -1008,14 +998,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) goto out; - for (x = 0; x < al->al_qd_num; x++) { + for (x = 0; x < qa->qa_qd_num; x++) { struct gfs2_quota_data *qd; int sync; - qd = al->al_qd[x]; + qd = qa->qa_qd[x]; sync = need_sync(qd); - gfs2_glock_dq_uninit(&al->al_qd_ghs[x]); + gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]); if (sync && qd_trylock(qd)) qda[count++] = qd; @@ -1048,7 +1038,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type) int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = ip->i_alloc; + struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qd; s64 value; unsigned int x; @@ -1060,8 +1050,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; - for (x = 0; x < al->al_qd_num; x++) { - qd = al->al_qd[x]; + for (x = 0; x < qa->qa_qd_num; x++) { + qd = qa->qa_qd[x]; if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) @@ -1099,7 +1089,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 uid, u32 gid) { - struct gfs2_alloc *al = ip->i_alloc; + struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qd; unsigned int x; @@ -1108,8 +1098,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, if (ip->i_diskflags & GFS2_DIF_SYSTEM) return; - for (x = 0; x < al->al_qd_num; x++) { - qd = al->al_qd[x]; + for (x = 0; x < qa->qa_qd_num; x++) { + qd = qa->qa_qd[x]; if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { @@ -1427,8 +1417,8 @@ int gfs2_quotad(void *data) /* Check for & recover partially truncated inodes */ quotad_check_trunc_list(sdp); - if (freezing(current)) - refrigerator(); + try_to_freeze(); + t = min(quotad_timeo, statfs_timeo); prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); @@ -1529,7 +1519,6 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, unsigned int data_blocks, ind_blocks; unsigned int blocks = 0; int alloc_required; - struct gfs2_alloc *al; loff_t offset; int error; @@ -1594,15 +1583,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, if (gfs2_is_stuffed(ip)) alloc_required = 1; if (alloc_required) { - al = gfs2_alloc_get(ip); - if (al == NULL) - goto out_i; gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); - blocks = al->al_requested = 1 + data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip); + blocks = 1 + data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip, blocks); if (error) - goto out_alloc; + goto out_i; blocks += gfs2_rg_blocks(ip); } @@ -1617,11 +1603,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, gfs2_trans_end(sdp); out_release: - if (alloc_required) { + if (alloc_required) gfs2_inplace_release(ip); -out_alloc: - gfs2_alloc_put(ip); - } out_i: gfs2_glock_dq_uninit(&i_gh); out_q: diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index f2a02edcac8f..963b2d75200c 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -436,12 +436,16 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, char env_status[20]; char *envp[] = { env_jid, env_status, NULL }; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + ls->ls_recover_jid_done = jid; ls->ls_recover_jid_status = message; sprintf(env_jid, "JID=%d", jid); sprintf(env_status, "RECOVERY=%s", message == LM_RD_SUCCESS ? "Done" : "Failed"); kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); + + if (sdp->sd_lockstruct.ls_ops->lm_recovery_result) + sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message); } void gfs2_recover_func(struct work_struct *work) @@ -512,7 +516,9 @@ void gfs2_recover_func(struct work_struct *work) if (error) goto fail_gunlock_ji; - if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { + if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) { + ro = 1; + } else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) ro = 1; } else { @@ -577,6 +583,7 @@ fail_gunlock_j: fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); fail: + jd->jd_recover_error = error; gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); done: clear_bit(JDF_RECOVERY, &jd->jd_flags); @@ -605,6 +612,6 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); - return 0; + return wait ? jd->jd_recover_error : 0; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 96bd6d759f29..981bfa32121a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -65,8 +65,8 @@ static const char valid_change[16] = { }; static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, - unsigned char old_state, unsigned char new_state, - unsigned int *n); + unsigned char old_state, + struct gfs2_bitmap **rbi); /** * gfs2_setbit - Set a bit in the bitmaps @@ -860,22 +860,36 @@ fail: } /** - * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode + * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode * @ip: the incore GFS2 inode structure * - * Returns: the struct gfs2_alloc + * Returns: the struct gfs2_qadata */ -struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) +struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); int error; - BUG_ON(ip->i_alloc != NULL); - ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS); + BUG_ON(ip->i_qadata != NULL); + ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS); error = gfs2_rindex_update(sdp); if (error) fs_warn(sdp, "rindex update returns %d\n", error); - return ip->i_alloc; + return ip->i_qadata; +} + +/** + * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode + * @ip: the incore GFS2 inode structure + * + * Returns: the struct gfs2_qadata + */ + +static struct gfs2_blkreserv *gfs2_blkrsv_get(struct gfs2_inode *ip) +{ + BUG_ON(ip->i_res != NULL); + ip->i_res = kzalloc(sizeof(struct gfs2_blkreserv), GFP_NOFS); + return ip->i_res; } /** @@ -890,15 +904,20 @@ struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) { - const struct gfs2_alloc *al = ip->i_alloc; + const struct gfs2_blkreserv *rs = ip->i_res; if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; - if (rgd->rd_free_clone >= al->al_requested) + if (rgd->rd_free_clone >= rs->rs_requested) return 1; return 0; } +static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) +{ + return (bi->bi_start * GFS2_NBBY) + blk; +} + /** * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes * @rgd: The rgrp @@ -912,20 +931,20 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip u32 goal = 0, block; u64 no_addr; struct gfs2_sbd *sdp = rgd->rd_sbd; - unsigned int n; struct gfs2_glock *gl; struct gfs2_inode *ip; int error; int found = 0; + struct gfs2_bitmap *bi; while (goal < rgd->rd_data) { down_write(&sdp->sd_log_flush_lock); - n = 1; - block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, - GFS2_BLKST_UNLINKED, &n); + block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi); up_write(&sdp->sd_log_flush_lock); if (block == BFITNOENT) break; + + block = gfs2_bi2rgd_blk(bi, block); /* rgblk_search can return a block < goal, so we need to keep it marching forward. */ no_addr = block + rgd->rd_data0; @@ -977,8 +996,8 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd, *begin = NULL; - struct gfs2_alloc *al = ip->i_alloc; - int error, rg_locked; + struct gfs2_blkreserv *rs = ip->i_res; + int error, rg_locked, flags = LM_FLAG_TRY; int loops = 0; if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) @@ -997,7 +1016,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) error = 0; } else { error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, - LM_FLAG_TRY, &al->al_rgd_gh); + flags, &rs->rs_rgd_gh); } switch (error) { case 0: @@ -1008,12 +1027,14 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) if (rgd->rd_flags & GFS2_RDF_CHECK) try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); if (!rg_locked) - gfs2_glock_dq_uninit(&al->al_rgd_gh); + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); /* fall through */ case GLR_TRYFAILED: rgd = gfs2_rgrpd_get_next(rgd); - if (rgd == begin) + if (rgd == begin) { + flags = 0; loops++; + } break; default: return error; @@ -1023,6 +1044,13 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) return -ENOSPC; } +static void gfs2_blkrsv_put(struct gfs2_inode *ip) +{ + BUG_ON(ip->i_res == NULL); + kfree(ip->i_res); + ip->i_res = NULL; +} + /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1030,16 +1058,23 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) * Returns: errno */ -int gfs2_inplace_reserve(struct gfs2_inode *ip) +int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = ip->i_alloc; + struct gfs2_blkreserv *rs; int error = 0; u64 last_unlinked = NO_BLOCK; int tries = 0; - if (gfs2_assert_warn(sdp, al->al_requested)) - return -EINVAL; + rs = gfs2_blkrsv_get(ip); + if (!rs) + return -ENOMEM; + + rs->rs_requested = requested; + if (gfs2_assert_warn(sdp, requested)) { + error = -EINVAL; + goto out; + } do { error = get_local_rgrp(ip, &last_unlinked); @@ -1056,6 +1091,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip) gfs2_log_flush(sdp, NULL); } while (tries++ < 3); +out: + if (error) + gfs2_blkrsv_put(ip); return error; } @@ -1068,10 +1106,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip) void gfs2_inplace_release(struct gfs2_inode *ip) { - struct gfs2_alloc *al = ip->i_alloc; + struct gfs2_blkreserv *rs = ip->i_res; - if (al->al_rgd_gh.gh_gl) - gfs2_glock_dq_uninit(&al->al_rgd_gh); + if (rs->rs_rgd_gh.gh_gl) + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); + gfs2_blkrsv_put(ip); } /** @@ -1108,39 +1147,35 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) } /** - * rgblk_search - find a block in @old_state, change allocation - * state to @new_state + * rgblk_search - find a block in @state * @rgd: the resource group descriptor * @goal: the goal block within the RG (start here to search for avail block) - * @old_state: GFS2_BLKST_XXX the before-allocation state to find - * @new_state: GFS2_BLKST_XXX the after-allocation block state - * @n: The extent length + * @state: GFS2_BLKST_XXX the before-allocation state to find + * @dinode: TRUE if the first block we allocate is for a dinode + * @rbi: address of the pointer to the bitmap containing the block found * - * Walk rgrp's bitmap to find bits that represent a block in @old_state. - * Add the found bitmap buffer to the transaction. - * Set the found bits to @new_state to change block's allocation state. + * Walk rgrp's bitmap to find bits that represent a block in @state. * * This function never fails, because we wouldn't call it unless we * know (from reservation results, etc.) that a block is available. * - * Scope of @goal and returned block is just within rgrp, not the whole - * filesystem. + * Scope of @goal is just within rgrp, not the whole filesystem. + * Scope of @returned block is just within bitmap, not the whole filesystem. * - * Returns: the block number allocated + * Returns: the block number found relative to the bitmap rbi */ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, - unsigned char old_state, unsigned char new_state, - unsigned int *n) + unsigned char state, + struct gfs2_bitmap **rbi) { struct gfs2_bitmap *bi = NULL; const u32 length = rgd->rd_length; u32 blk = BFITNOENT; unsigned int buf, x; - const unsigned int elen = *n; const u8 *buffer = NULL; - *n = 0; + *rbi = NULL; /* Find bitmap block that contains bits for goal block */ for (buf = 0; buf < length; buf++) { bi = rgd->rd_bits + buf; @@ -1163,21 +1198,21 @@ do_search: bi = rgd->rd_bits + buf; if (test_bit(GBF_FULL, &bi->bi_flags) && - (old_state == GFS2_BLKST_FREE)) + (state == GFS2_BLKST_FREE)) goto skip; /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone bitmaps, so we must search the originals for that. */ buffer = bi->bi_bh->b_data + bi->bi_offset; WARN_ON(!buffer_uptodate(bi->bi_bh)); - if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) + if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) buffer = bi->bi_clone + bi->bi_offset; - blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state); + blk = gfs2_bitfit(buffer, bi->bi_len, goal, state); if (blk != BFITNOENT) break; - if ((goal == 0) && (old_state == GFS2_BLKST_FREE)) + if ((goal == 0) && (state == GFS2_BLKST_FREE)) set_bit(GBF_FULL, &bi->bi_flags); /* Try next bitmap block (wrap back to rgrp header if at end) */ @@ -1187,16 +1222,37 @@ skip: goal = 0; } - if (blk == BFITNOENT) - return blk; + if (blk != BFITNOENT) + *rbi = bi; - *n = 1; - if (old_state == new_state) - goto out; + return blk; +} + +/** + * gfs2_alloc_extent - allocate an extent from a given bitmap + * @rgd: the resource group descriptor + * @bi: the bitmap within the rgrp + * @blk: the block within the bitmap + * @dinode: TRUE if the first block we allocate is for a dinode + * @n: The extent length + * + * Add the found bitmap buffer to the transaction. + * Set the found bits to @new_state to change block's allocation state. + * Returns: starting block number of the extent (fs scope) + */ +static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, + u32 blk, bool dinode, unsigned int *n) +{ + const unsigned int elen = *n; + u32 goal; + const u8 *buffer = NULL; + *n = 0; + buffer = bi->bi_bh->b_data + bi->bi_offset; gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, - bi, blk, new_state); + bi, blk, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); + (*n)++; goal = blk; while (*n < elen) { goal++; @@ -1206,11 +1262,12 @@ skip: GFS2_BLKST_FREE) break; gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, - bi, goal, new_state); + bi, goal, GFS2_BLKST_USED); (*n)++; } -out: - return (bi->bi_start * GFS2_NBBY) + blk; + blk = gfs2_bi2rgd_blk(bi, blk); + rgd->rd_last_alloc = blk + *n - 1; + return rgd->rd_data0 + blk; } /** @@ -1298,121 +1355,93 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) } /** - * gfs2_alloc_block - Allocate one or more blocks + * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode * @ip: the inode to allocate the block for * @bn: Used to return the starting block number - * @n: requested number of blocks/extent length (value/result) + * @ndata: requested number of blocks/extent length (value/result) + * @dinode: 1 if we're allocating a dinode block, else 0 + * @generation: the generation number of the inode * * Returns: 0 or error */ -int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) +int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, + bool dinode, u64 *generation) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_alloc *al = ip->i_alloc; struct gfs2_rgrpd *rgd; - u32 goal, blk; - u64 block; + unsigned int ndata; + u32 goal, blk; /* block, within the rgrp scope */ + u64 block; /* block, within the file system scope */ int error; + struct gfs2_bitmap *bi; /* Only happens if there is a bug in gfs2, return something distinctive * to ensure that it is noticed. */ - if (al == NULL) + if (ip->i_res == NULL) return -ECANCELED; rgd = ip->i_rgd; - if (rgrp_contains_block(rgd, ip->i_goal)) + if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) goal = ip->i_goal - rgd->rd_data0; else goal = rgd->rd_last_alloc; - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); + blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); /* Since all blocks are reserved in advance, this shouldn't happen */ if (blk == BFITNOENT) goto rgrp_error; - rgd->rd_last_alloc = blk; - block = rgd->rd_data0 + blk; - ip->i_goal = block + *n - 1; - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error == 0) { - struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); - brelse(dibh); + block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + ndata = *nblocks; + if (dinode) + ndata--; + + if (!dinode) { + ip->i_goal = block + ndata - 1; + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error == 0) { + struct gfs2_dinode *di = + (struct gfs2_dinode *)dibh->b_data; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + di->di_goal_meta = di->di_goal_data = + cpu_to_be64(ip->i_goal); + brelse(dibh); + } } - if (rgd->rd_free < *n) + if (rgd->rd_free < *nblocks) goto rgrp_error; - rgd->rd_free -= *n; - - gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); - gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - - al->al_alloced += *n; - - gfs2_statfs_change(sdp, 0, -(s64)*n, 0); - gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid); - - rgd->rd_free_clone -= *n; - trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED); - *bn = block; - return 0; - -rgrp_error: - gfs2_rgrp_error(rgd); - return -EIO; -} - -/** - * gfs2_alloc_di - Allocate a dinode - * @dip: the directory that the inode is going in - * @bn: the block number which is allocated - * @generation: the generation number of the inode - * - * Returns: 0 on success or error - */ - -int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation) -{ - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_alloc *al = dip->i_alloc; - struct gfs2_rgrpd *rgd = dip->i_rgd; - u32 blk; - u64 block; - unsigned int n = 1; - - blk = rgblk_search(rgd, rgd->rd_last_alloc, - GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n); - - /* Since all blocks are reserved in advance, this shouldn't happen */ - if (blk == BFITNOENT) - goto rgrp_error; - - rgd->rd_last_alloc = blk; - block = rgd->rd_data0 + blk; - if (rgd->rd_free == 0) - goto rgrp_error; - - rgd->rd_free--; - rgd->rd_dinodes++; - *generation = rgd->rd_igeneration++; - if (*generation == 0) + rgd->rd_free -= *nblocks; + if (dinode) { + rgd->rd_dinodes++; *generation = rgd->rd_igeneration++; + if (*generation == 0) + *generation = rgd->rd_igeneration++; + } + gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - al->al_alloced++; + gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); + if (dinode) + gfs2_trans_add_unrevoke(sdp, block, 1); - gfs2_statfs_change(sdp, 0, -1, +1); - gfs2_trans_add_unrevoke(sdp, block, 1); + /* + * This needs reviewing to see why we cannot do the quota change + * at this point in the dinode case. + */ + if (ndata) + gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, + ip->i_inode.i_gid); - rgd->rd_free_clone--; - trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); + rgd->rd_free_clone -= *nblocks; + trace_gfs2_block_alloc(ip, block, *nblocks, + dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; return 0; diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index cf5c50180192..ceec9106cdf4 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -28,19 +28,19 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); -extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); -static inline void gfs2_alloc_put(struct gfs2_inode *ip) +extern struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip); +static inline void gfs2_qadata_put(struct gfs2_inode *ip) { - BUG_ON(ip->i_alloc == NULL); - kfree(ip->i_alloc); - ip->i_alloc = NULL; + BUG_ON(ip->i_qadata == NULL); + kfree(ip->i_qadata); + ip->i_qadata = NULL; } -extern int gfs2_inplace_reserve(struct gfs2_inode *ip); +extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); extern void gfs2_inplace_release(struct gfs2_inode *ip); -extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); -extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); +extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, + bool dinode, u64 *generation); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 71e420989f77..4553ce515f62 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1284,18 +1284,18 @@ static int is_ancestor(const struct dentry *d1, const struct dentry *d2) /** * gfs2_show_options - Show mount options for /proc/mounts * @s: seq_file structure - * @mnt: vfsmount + * @root: root of this (sub)tree * * Returns: 0 on success or error code */ -static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) +static int gfs2_show_options(struct seq_file *s, struct dentry *root) { - struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; + struct gfs2_sbd *sdp = root->d_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; int val; - if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) + if (is_ancestor(root, sdp->sd_master_dir)) seq_printf(s, ",meta"); if (args->ar_lockproto[0]) seq_printf(s, ",lockproto=%s", args->ar_lockproto); @@ -1399,8 +1399,9 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip) static int gfs2_dinode_dealloc(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al; + struct gfs2_qadata *qa; struct gfs2_rgrpd *rgd; + struct gfs2_holder gh; int error; if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { @@ -1408,8 +1409,8 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) return -EIO; } - al = gfs2_alloc_get(ip); - if (!al) + qa = gfs2_qadata_get(ip); + if (!qa) return -ENOMEM; error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); @@ -1423,8 +1424,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) goto out_qs; } - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, - &al->al_rgd_gh); + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); if (error) goto out_qs; @@ -1440,11 +1440,11 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) gfs2_trans_end(sdp); out_rg_gunlock: - gfs2_glock_dq_uninit(&al->al_rgd_gh); + gfs2_glock_dq_uninit(&gh); out_qs: gfs2_quota_unhold(ip); out: - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); return error; } @@ -1582,7 +1582,6 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) static void gfs2_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(gfs2_inode_cachep, inode); } diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 443cabcfcd23..d33172c291ba 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -298,7 +298,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf) ssize_t ret; int val = 0; - if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags)) + if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags)) val = 1; ret = sprintf(buf, "%d\n", val); return ret; @@ -313,9 +313,9 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) val = simple_strtol(buf, NULL, 0); if (val == 1) - set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags); + set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); else if (val == 0) { - clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags); + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); smp_mb__after_clear_bit(); gfs2_glock_thaw(sdp); } else { @@ -350,8 +350,8 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len) goto out; if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) goto out; - sdp->sd_lockstruct.ls_first = first; - rv = 0; + sdp->sd_lockstruct.ls_first = first; + rv = 0; out: spin_unlock(&sdp->sd_jindex_spin); return rv ? rv : len; @@ -360,19 +360,14 @@ out: static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_first_done); + return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags)); } -static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) { - unsigned jid; struct gfs2_jdesc *jd; int rv; - rv = sscanf(buf, "%u", &jid); - if (rv != 1) - return -EINVAL; - rv = -ESHUTDOWN; spin_lock(&sdp->sd_jindex_spin); if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) @@ -389,6 +384,20 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) } out: spin_unlock(&sdp->sd_jindex_spin); + return rv; +} + +static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +{ + unsigned jid; + int rv; + + rv = sscanf(buf, "%u", &jid); + if (rv != 1) + return -EINVAL; + + rv = gfs2_recover_set(sdp, jid); + return rv ? rv : len; } diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h index e94560e836d7..79182d6ad6ac 100644 --- a/fs/gfs2/sys.h +++ b/fs/gfs2/sys.h @@ -19,5 +19,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp); int gfs2_sys_init(void); void gfs2_sys_uninit(void); +int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid); + #endif /* __SYS_DOT_H__ */ diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index f8f101ef600c..125d4572e1c0 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -30,9 +30,9 @@ struct gfs2_glock; * block, or all of the blocks in the rg, whichever is smaller */ static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) { - const struct gfs2_alloc *al = ip->i_alloc; - if (al->al_requested < ip->i_rgd->rd_length) - return al->al_requested + 1; + const struct gfs2_blkreserv *rs = ip->i_res; + if (rs->rs_requested < ip->i_rgd->rd_length) + return rs->rs_requested + 1; return ip->i_rgd->rd_length; } diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 71d7bf830c09..e9636591b5d5 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -321,11 +321,11 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, int leave) { - struct gfs2_alloc *al; + struct gfs2_qadata *qa; int error; - al = gfs2_alloc_get(ip); - if (!al) + qa = gfs2_qadata_get(ip); + if (!qa) return -ENOMEM; error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); @@ -336,7 +336,7 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, gfs2_quota_unhold(ip); out_alloc: - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); return error; } @@ -549,9 +549,10 @@ int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata) goto out; error = gfs2_ea_get_copy(ip, &el, data, len); - if (error == 0) - error = len; - *ppdata = data; + if (error < 0) + kfree(data); + else + *ppdata = data; out: brelse(el.el_bh); return error; @@ -609,7 +610,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) u64 block; int error; - error = gfs2_alloc_block(ip, &block, &n); + error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); if (error) return error; gfs2_trans_add_unrevoke(sdp, block, 1); @@ -671,7 +672,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, int mh_size = sizeof(struct gfs2_meta_header); unsigned int n = 1; - error = gfs2_alloc_block(ip, &block, &n); + error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); if (error) return error; gfs2_trans_add_unrevoke(sdp, block, 1); @@ -708,21 +709,19 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, unsigned int blks, ea_skeleton_call_t skeleton_call, void *private) { - struct gfs2_alloc *al; + struct gfs2_qadata *qa; struct buffer_head *dibh; int error; - al = gfs2_alloc_get(ip); - if (!al) + qa = gfs2_qadata_get(ip); + if (!qa) return -ENOMEM; error = gfs2_quota_lock_check(ip); if (error) goto out; - al->al_requested = blks; - - error = gfs2_inplace_reserve(ip); + error = gfs2_inplace_reserve(ip, blks); if (error) goto out_gunlock_q; @@ -751,7 +750,7 @@ out_ipres: out_gunlock_q: gfs2_quota_unlock(ip); out: - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); return error; } @@ -991,7 +990,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, } else { u64 blk; unsigned int n = 1; - error = gfs2_alloc_block(ip, &blk, &n); + error = gfs2_alloc_blocks(ip, &blk, &n, 0, NULL); if (error) return error; gfs2_trans_add_unrevoke(sdp, blk, 1); @@ -1435,9 +1434,9 @@ out: static int ea_dealloc_block(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = ip->i_alloc; struct gfs2_rgrpd *rgd; struct buffer_head *dibh; + struct gfs2_holder gh; int error; rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr); @@ -1446,8 +1445,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip) return -EIO; } - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, - &al->al_rgd_gh); + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); if (error) return error; @@ -1471,7 +1469,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip) gfs2_trans_end(sdp); out_gunlock: - gfs2_glock_dq_uninit(&al->al_rgd_gh); + gfs2_glock_dq_uninit(&gh); return error; } @@ -1484,11 +1482,11 @@ out_gunlock: int gfs2_ea_dealloc(struct gfs2_inode *ip) { - struct gfs2_alloc *al; + struct gfs2_qadata *qa; int error; - al = gfs2_alloc_get(ip); - if (!al) + qa = gfs2_qadata_get(ip); + if (!qa) return -ENOMEM; error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); @@ -1510,7 +1508,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) out_quota: gfs2_quota_unhold(ip); out_alloc: - gfs2_alloc_put(ip); + gfs2_qadata_put(ip); return error; } |