diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-19 01:50:28 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-19 01:50:28 +0200 |
commit | 3f80fbff5f1ef8a842bbe5343bbc9ddad883f25c (patch) | |
tree | 259f5be0f203b5c7e97837a1957f461abcfef5d0 | |
parent | Merge branch 'devicetree/merge' of git://git.secretlab.ca/git/linux-2.6 (diff) | |
parent | configfs: Fix race between configfs_readdir() and configfs_d_iput() (diff) | |
download | linux-3f80fbff5f1ef8a842bbe5343bbc9ddad883f25c.tar.xz linux-3f80fbff5f1ef8a842bbe5343bbc9ddad883f25c.zip |
Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2:
configfs: Fix race between configfs_readdir() and configfs_d_iput()
configfs: Don't try to d_delete() negative dentries.
ocfs2/dlm: Target node death during resource migration leads to thread spin
ocfs2: Skip mount recovery for hard-ro mounts
ocfs2/cluster: Heartbeat mismatch message improved
ocfs2/cluster: Increase the live threshold for global heartbeat
ocfs2/dlm: Use negotiated o2dlm protocol version
ocfs2: skip existing hole when removing the last extent_rec in punching-hole codes.
ocfs2: Initialize data_ac (might be used uninitialized)
-rw-r--r-- | fs/configfs/dir.c | 39 | ||||
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 61 | ||||
-rw-r--r-- | fs/ocfs2/dir.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 12 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 3 |
7 files changed, 96 insertions, 27 deletions
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 3313dd19f543..9a37a9b6de3a 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -53,11 +53,14 @@ DEFINE_SPINLOCK(configfs_dirent_lock); static void configfs_d_iput(struct dentry * dentry, struct inode * inode) { - struct configfs_dirent * sd = dentry->d_fsdata; + struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { BUG_ON(sd->s_dentry != dentry); + /* Coordinate with configfs_readdir */ + spin_lock(&configfs_dirent_lock); sd->s_dentry = NULL; + spin_unlock(&configfs_dirent_lock); configfs_put(sd); } iput(inode); @@ -689,7 +692,8 @@ static int create_default_group(struct config_group *parent_group, sd = child->d_fsdata; sd->s_type |= CONFIGFS_USET_DEFAULT; } else { - d_delete(child); + BUG_ON(child->d_inode); + d_drop(child); dput(child); } } @@ -1545,7 +1549,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir struct configfs_dirent * parent_sd = dentry->d_fsdata; struct configfs_dirent *cursor = filp->private_data; struct list_head *p, *q = &cursor->s_sibling; - ino_t ino; + ino_t ino = 0; int i = filp->f_pos; switch (i) { @@ -1573,6 +1577,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir struct configfs_dirent *next; const char * name; int len; + struct inode *inode = NULL; next = list_entry(p, struct configfs_dirent, s_sibling); @@ -1581,9 +1586,28 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir name = configfs_get_name(next); len = strlen(name); - if (next->s_dentry) - ino = next->s_dentry->d_inode->i_ino; - else + + /* + * We'll have a dentry and an inode for + * PINNED items and for open attribute + * files. We lock here to prevent a race + * with configfs_d_iput() clearing + * s_dentry before calling iput(). + * + * Why do we go to the trouble? If + * someone has an attribute file open, + * the inode number should match until + * they close it. Beyond that, we don't + * care. + */ + spin_lock(&configfs_dirent_lock); + dentry = next->s_dentry; + if (dentry) + inode = dentry->d_inode; + if (inode) + ino = inode->i_ino; + spin_unlock(&configfs_dirent_lock); + if (!inode) ino = iunique(configfs_sb, 2); if (filldir(dirent, name, len, filp->f_pos, ino, @@ -1683,7 +1707,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) err = configfs_attach_group(sd->s_element, &group->cg_item, dentry); if (err) { - d_delete(dentry); + BUG_ON(dentry->d_inode); + d_drop(dentry); dput(dentry); } else { spin_lock(&configfs_dirent_lock); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 643720209a98..9a3e6bbff27b 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg, /* We want to make sure that nobody is heartbeating on top of us -- * this will help detect an invalid configuration. */ -static int o2hb_check_last_timestamp(struct o2hb_region *reg) +static void o2hb_check_last_timestamp(struct o2hb_region *reg) { - int node_num, ret; struct o2hb_disk_slot *slot; struct o2hb_disk_heartbeat_block *hb_block; + char *errstr; - node_num = o2nm_this_node(); - - ret = 1; - slot = ®->hr_slots[node_num]; + slot = ®->hr_slots[o2nm_this_node()]; /* Don't check on our 1st timestamp */ - if (slot->ds_last_time) { - hb_block = slot->ds_raw_block; + if (!slot->ds_last_time) + return; - if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time) - ret = 0; - } + hb_block = slot->ds_raw_block; + if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time && + le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation && + hb_block->hb_node == slot->ds_node_num) + return; - return ret; +#define ERRSTR1 "Another node is heartbeating on device" +#define ERRSTR2 "Heartbeat generation mismatch on device" +#define ERRSTR3 "Heartbeat sequence mismatch on device" + + if (hb_block->hb_node != slot->ds_node_num) + errstr = ERRSTR1; + else if (le64_to_cpu(hb_block->hb_generation) != + slot->ds_last_generation) + errstr = ERRSTR2; + else + errstr = ERRSTR3; + + mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), " + "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name, + slot->ds_node_num, (unsigned long long)slot->ds_last_generation, + (unsigned long long)slot->ds_last_time, hb_block->hb_node, + (unsigned long long)le64_to_cpu(hb_block->hb_generation), + (unsigned long long)le64_to_cpu(hb_block->hb_seq)); } static inline void o2hb_prepare_block(struct o2hb_region *reg, @@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) /* With an up to date view of the slots, we can check that no * other node has been improperly configured to heartbeat in * our slot. */ - if (!o2hb_check_last_timestamp(reg)) - mlog(ML_ERROR, "Device \"%s\": another node is heartbeating " - "in our slot!\n", reg->hr_dev_name); + o2hb_check_last_timestamp(reg); /* fill in the proper info for our next heartbeat */ o2hb_prepare_block(reg, reg->hr_generation); @@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) } i = -1; - while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { - + while((i = find_next_bit(configured_nodes, + O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { change |= o2hb_check_slot(reg, ®->hr_slots[i]); } @@ -1690,6 +1704,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, struct file *filp = NULL; struct inode *inode = NULL; ssize_t ret = -EINVAL; + int live_threshold; if (reg->hr_bdev) goto out; @@ -1766,8 +1781,18 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, * A node is considered live after it has beat LIVE_THRESHOLD * times. We're not steady until we've given them a chance * _after_ our first read. + * The default threshold is bare minimum so as to limit the delay + * during mounts. For global heartbeat, the threshold doubled for the + * first region. */ - atomic_set(®->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1); + live_threshold = O2HB_LIVE_THRESHOLD; + if (o2hb_global_heartbeat_active()) { + spin_lock(&o2hb_live_lock); + if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1) + live_threshold <<= 1; + spin_unlock(&o2hb_live_lock); + } + atomic_set(®->hr_steady_iterations, live_threshold + 1); hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", reg->hr_item.ci_name); diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 9fe5b8fd658f..8582e3f4f120 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2868,7 +2868,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, bytes = blocks_wanted << sb->s_blocksize_bits; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); struct ocfs2_inode_info *oi = OCFS2_I(dir); - struct ocfs2_alloc_context *data_ac; + struct ocfs2_alloc_context *data_ac = NULL; struct ocfs2_alloc_context *meta_ac = NULL; struct buffer_head *dirdata_bh = NULL; struct buffer_head *dx_root_bh = NULL; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 7540a492eaba..3b179d6cbde0 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1614,7 +1614,8 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) spin_unlock(&dlm->spinlock); /* Support for global heartbeat and node info was added in 1.1 */ - if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { + if (dlm->dlm_locking_proto.pv_major > 1 || + dlm->dlm_locking_proto.pv_minor > 0) { status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); if (status) { mlog_errno(status); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index fede57ed005f..84d166328cf7 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2574,6 +2574,9 @@ fail: res->state &= ~DLM_LOCK_RES_MIGRATING; wake = 1; spin_unlock(&res->spinlock); + if (dlm_is_host_down(ret)) + dlm_wait_for_node_death(dlm, target, + DLM_NODE_DEATH_WAIT_MAX); goto leave; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 41565ae52856..89659d6dc206 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1607,6 +1607,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode, range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); if (le32_to_cpu(rec->e_cpos) >= trunc_start) { + /* + * remove an entire extent record. + */ *trunc_cpos = le32_to_cpu(rec->e_cpos); /* * Skip holes if any. @@ -1617,7 +1620,16 @@ static void ocfs2_calc_trunc_pos(struct inode *inode, *blkno = le64_to_cpu(rec->e_blkno); *trunc_end = le32_to_cpu(rec->e_cpos); } else if (range > trunc_start) { + /* + * remove a partial extent record, which means we're + * removing the last extent record. + */ *trunc_cpos = trunc_start; + /* + * skip hole if any. + */ + if (range < *trunc_end) + *trunc_end = range; *trunc_len = *trunc_end - trunc_start; coff = trunc_start - le32_to_cpu(rec->e_cpos); *blkno = le64_to_cpu(rec->e_blkno) + diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index b141a44605ca..295d56454e8b 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1260,6 +1260,9 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) { struct ocfs2_journal *journal = osb->journal; + if (ocfs2_is_hard_readonly(osb)) + return; + /* No need to queue up our truncate_log as regular cleanup will catch * that */ ocfs2_queue_recovery_completion(journal, osb->slot_num, |