diff options
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 345 |
1 files changed, 196 insertions, 149 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 82df3ce9874a..4da228a0e6d0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -50,8 +50,8 @@ */ #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ due to ENOSPC */ -#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ -#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ +#define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */ +#define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */ #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ @@ -143,6 +143,7 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, { if (path->p_bh) { /* path points to block */ + BUFFER_TRACE(path->p_bh, "get_write_access"); return ext4_journal_get_write_access(handle, path->p_bh); } /* path points to leaf/index in inode body */ @@ -524,7 +525,7 @@ __read_extent_tree_block(const char *function, unsigned int line, lblk - prev, ~0, EXTENT_STATUS_HOLE); - if (ext4_ext_is_uninitialized(ex)) + if (ext4_ext_is_unwritten(ex)) status = EXTENT_STATUS_UNWRITTEN; ext4_es_cache_extent(inode, lblk, len, ext4_ext_pblock(ex), status); @@ -620,7 +621,7 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) } else if (path->p_ext) { ext_debug(" %d:[%d]%d:%llu ", le32_to_cpu(path->p_ext->ee_block), - ext4_ext_is_uninitialized(path->p_ext), + ext4_ext_is_unwritten(path->p_ext), ext4_ext_get_actual_len(path->p_ext), ext4_ext_pblock(path->p_ext)); } else @@ -646,7 +647,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), - ext4_ext_is_uninitialized(ex), + ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); } ext_debug("\n"); @@ -677,7 +678,7 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), - ext4_ext_is_uninitialized(ex), + ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), newblock); ex++; @@ -802,7 +803,7 @@ ext4_ext_binsearch(struct inode *inode, ext_debug(" -> %d:%llu:[%d]%d ", le32_to_cpu(path->p_ext->ee_block), ext4_ext_pblock(path->p_ext), - ext4_ext_is_uninitialized(path->p_ext), + ext4_ext_is_unwritten(path->p_ext), ext4_ext_get_actual_len(path->p_ext)); #ifdef CHECK_BINSEARCH @@ -1686,11 +1687,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, /* * Make sure that both extents are initialized. We don't merge - * uninitialized extents so that we can be sure that end_io code has + * unwritten extents so that we can be sure that end_io code has * the extent that was written properly split out and conversion to * initialized is trivial. */ - if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2)) + if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2)) return 0; ext1_ee_len = ext4_ext_get_actual_len(ex1); @@ -1707,10 +1708,10 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, */ if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) return 0; - if (ext4_ext_is_uninitialized(ex1) && + if (ext4_ext_is_unwritten(ex1) && (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) || atomic_read(&EXT4_I(inode)->i_unwritten) || - (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN))) + (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN))) return 0; #ifdef AGGRESSIVE_TEST if (ext1_ee_len >= 4) @@ -1735,7 +1736,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, { struct ext4_extent_header *eh; unsigned int depth, len; - int merge_done = 0, uninit; + int merge_done = 0, unwritten; depth = ext_depth(inode); BUG_ON(path[depth].p_hdr == NULL); @@ -1745,11 +1746,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) break; /* merge with next extent! */ - uninit = ext4_ext_is_uninitialized(ex); + unwritten = ext4_ext_is_unwritten(ex); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(ex + 1)); - if (uninit) - ext4_ext_mark_uninitialized(ex); + if (unwritten) + ext4_ext_mark_unwritten(ex); if (ex + 1 < EXT_LAST_EXTENT(eh)) { len = (EXT_LAST_EXTENT(eh) - ex - 1) @@ -1903,7 +1904,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *npath = NULL; int depth, len, err; ext4_lblk_t next; - int mb_flags = 0, uninit; + int mb_flags = 0, unwritten; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); @@ -1943,21 +1944,21 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, if (ext4_can_extents_be_merged(inode, ex, newext)) { ext_debug("append [%d]%d block to %u:[%d]%d" "(from %llu)\n", - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), le32_to_cpu(ex->ee_block), - ext4_ext_is_uninitialized(ex), + ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) return err; - uninit = ext4_ext_is_uninitialized(ex); + unwritten = ext4_ext_is_unwritten(ex); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); - if (uninit) - ext4_ext_mark_uninitialized(ex); + if (unwritten) + ext4_ext_mark_unwritten(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; @@ -1969,10 +1970,10 @@ prepend: ext_debug("prepend %u[%d]%d block to %u:[%d]%d" "(from %llu)\n", le32_to_cpu(newext->ee_block), - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), le32_to_cpu(ex->ee_block), - ext4_ext_is_uninitialized(ex), + ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, @@ -1980,13 +1981,13 @@ prepend: if (err) return err; - uninit = ext4_ext_is_uninitialized(ex); + unwritten = ext4_ext_is_unwritten(ex); ex->ee_block = newext->ee_block; ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); - if (uninit) - ext4_ext_mark_uninitialized(ex); + if (unwritten) + ext4_ext_mark_unwritten(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; @@ -2046,7 +2047,7 @@ has_space: ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext)); nearex = EXT_FIRST_EXTENT(eh); } else { @@ -2057,7 +2058,7 @@ has_space: "nearest %p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), nearex); nearex++; @@ -2068,7 +2069,7 @@ has_space: "nearest %p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), nearex); } @@ -2078,7 +2079,7 @@ has_space: "move %d extents from 0x%p to 0x%p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), len, nearex, nearex + 1); memmove(nearex + 1, nearex, @@ -2200,7 +2201,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, es.es_lblk = le32_to_cpu(ex->ee_block); es.es_len = ext4_ext_get_actual_len(ex); es.es_pblk = ext4_ext_pblock(ex); - if (ext4_ext_is_uninitialized(ex)) + if (ext4_ext_is_unwritten(ex)) flags |= FIEMAP_EXTENT_UNWRITTEN; } @@ -2576,7 +2577,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, unsigned num; ext4_lblk_t ex_ee_block; unsigned short ex_ee_len; - unsigned uninitialized = 0; + unsigned unwritten = 0; struct ext4_extent *ex; ext4_fsblk_t pblk; @@ -2623,13 +2624,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, while (ex >= EXT_FIRST_EXTENT(eh) && ex_ee_block + ex_ee_len > start) { - if (ext4_ext_is_uninitialized(ex)) - uninitialized = 1; + if (ext4_ext_is_unwritten(ex)) + unwritten = 1; else - uninitialized = 0; + unwritten = 0; ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, - uninitialized, ex_ee_len); + unwritten, ex_ee_len); path[depth].p_ext = ex; a = ex_ee_block > start ? ex_ee_block : start; @@ -2701,11 +2702,11 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex->ee_len = cpu_to_le16(num); /* - * Do not mark uninitialized if all the blocks in the + * Do not mark unwritten if all the blocks in the * extent have been removed. */ - if (uninitialized && num) - ext4_ext_mark_uninitialized(ex); + if (unwritten && num) + ext4_ext_mark_unwritten(ex); /* * If the extent was completely released, * we need to remove it from the leaf @@ -2854,9 +2855,9 @@ again: end < ee_block + ext4_ext_get_actual_len(ex) - 1) { int split_flag = 0; - if (ext4_ext_is_uninitialized(ex)) - split_flag = EXT4_EXT_MARK_UNINIT1 | - EXT4_EXT_MARK_UNINIT2; + if (ext4_ext_is_unwritten(ex)) + split_flag = EXT4_EXT_MARK_UNWRIT1 | + EXT4_EXT_MARK_UNWRIT2; /* * Split the extent in two so that 'end' is the last @@ -3113,7 +3114,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) * @path: the path to the extent * @split: the logical block where the extent is splitted. * @split_flags: indicates if the extent could be zeroout if split fails, and - * the states(init or uninit) of new extents. + * the states(init or unwritten) of new extents. * @flags: flags used to insert new extent to extent tree. * * @@ -3155,10 +3156,10 @@ static int ext4_split_extent_at(handle_t *handle, newblock = split - ee_block + ext4_ext_pblock(ex); BUG_ON(split < ee_block || split >= (ee_block + ee_len)); - BUG_ON(!ext4_ext_is_uninitialized(ex) && + BUG_ON(!ext4_ext_is_unwritten(ex) && split_flag & (EXT4_EXT_MAY_ZEROOUT | - EXT4_EXT_MARK_UNINIT1 | - EXT4_EXT_MARK_UNINIT2)); + EXT4_EXT_MARK_UNWRIT1 | + EXT4_EXT_MARK_UNWRIT2)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -3170,8 +3171,8 @@ static int ext4_split_extent_at(handle_t *handle, * then we just change the state of the extent, and splitting * is not needed. */ - if (split_flag & EXT4_EXT_MARK_UNINIT2) - ext4_ext_mark_uninitialized(ex); + if (split_flag & EXT4_EXT_MARK_UNWRIT2) + ext4_ext_mark_unwritten(ex); else ext4_ext_mark_initialized(ex); @@ -3185,8 +3186,8 @@ static int ext4_split_extent_at(handle_t *handle, /* case a */ memcpy(&orig_ex, ex, sizeof(orig_ex)); ex->ee_len = cpu_to_le16(split - ee_block); - if (split_flag & EXT4_EXT_MARK_UNINIT1) - ext4_ext_mark_uninitialized(ex); + if (split_flag & EXT4_EXT_MARK_UNWRIT1) + ext4_ext_mark_unwritten(ex); /* * path may lead to new leaf, not to original leaf any more @@ -3200,8 +3201,8 @@ static int ext4_split_extent_at(handle_t *handle, ex2->ee_block = cpu_to_le32(split); ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); ext4_ext_store_pblock(ex2, newblock); - if (split_flag & EXT4_EXT_MARK_UNINIT2) - ext4_ext_mark_uninitialized(ex2); + if (split_flag & EXT4_EXT_MARK_UNWRIT2) + ext4_ext_mark_unwritten(ex2); err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { @@ -3278,7 +3279,7 @@ static int ext4_split_extent(handle_t *handle, struct ext4_extent *ex; unsigned int ee_len, depth; int err = 0; - int uninitialized; + int unwritten; int split_flag1, flags1; int allocated = map->m_len; @@ -3286,14 +3287,14 @@ static int ext4_split_extent(handle_t *handle, ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - uninitialized = ext4_ext_is_uninitialized(ex); + unwritten = ext4_ext_is_unwritten(ex); if (map->m_lblk + map->m_len < ee_block + ee_len) { split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; - if (uninitialized) - split_flag1 |= EXT4_EXT_MARK_UNINIT1 | - EXT4_EXT_MARK_UNINIT2; + if (unwritten) + split_flag1 |= EXT4_EXT_MARK_UNWRIT1 | + EXT4_EXT_MARK_UNWRIT2; if (split_flag & EXT4_EXT_DATA_VALID2) split_flag1 |= EXT4_EXT_DATA_VALID1; err = ext4_split_extent_at(handle, inode, path, @@ -3313,15 +3314,20 @@ static int ext4_split_extent(handle_t *handle, return PTR_ERR(path); depth = ext_depth(inode); ex = path[depth].p_ext; - uninitialized = ext4_ext_is_uninitialized(ex); + if (!ex) { + EXT4_ERROR_INODE(inode, "unexpected hole at %lu", + (unsigned long) map->m_lblk); + return -EIO; + } + unwritten = ext4_ext_is_unwritten(ex); split_flag1 = 0; if (map->m_lblk >= ee_block) { split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; - if (uninitialized) { - split_flag1 |= EXT4_EXT_MARK_UNINIT1; + if (unwritten) { + split_flag1 |= EXT4_EXT_MARK_UNWRIT1; split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | - EXT4_EXT_MARK_UNINIT2); + EXT4_EXT_MARK_UNWRIT2); } err = ext4_split_extent_at(handle, inode, path, map->m_lblk, split_flag1, flags); @@ -3336,16 +3342,16 @@ out: /* * This function is called by ext4_ext_map_blocks() if someone tries to write - * to an uninitialized extent. It may result in splitting the uninitialized + * to an unwritten extent. It may result in splitting the unwritten * extent into multiple extents (up to three - one initialized and two - * uninitialized). + * unwritten). * There are three possibilities: * a> There is no split required: Entire extent should be initialized * b> Splits in two extents: Write is happening at either end of the extent * c> Splits in three extents: Somone is writing in middle of the extent * * Pre-conditions: - * - The extent pointed to by 'path' is uninitialized. + * - The extent pointed to by 'path' is unwritten. * - The extent pointed to by 'path' contains a superset * of the logical span [map->m_lblk, map->m_lblk + map->m_len). * @@ -3391,12 +3397,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); /* Pre-conditions */ - BUG_ON(!ext4_ext_is_uninitialized(ex)); + BUG_ON(!ext4_ext_is_unwritten(ex)); BUG_ON(!in_range(map->m_lblk, ee_block, ee_len)); /* * Attempt to transfer newly initialized blocks from the currently - * uninitialized extent to its neighbor. This is much cheaper + * unwritten extent to its neighbor. This is much cheaper * than an insertion followed by a merge as those involve costly * memmove() calls. Transferring to the left is the common case in * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE) @@ -3432,7 +3438,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * - C4: abut_ex can receive the additional blocks without * overflowing the (initialized) length limit. */ - if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/ + if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/ ((prev_lblk + prev_len) == ee_block) && /*C2*/ ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ @@ -3447,7 +3453,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex->ee_block = cpu_to_le32(ee_block + map_len); ext4_ext_store_pblock(ex, ee_pblk + map_len); ex->ee_len = cpu_to_le16(ee_len - map_len); - ext4_ext_mark_uninitialized(ex); /* Restore the flag */ + ext4_ext_mark_unwritten(ex); /* Restore the flag */ /* Extend abut_ex by 'map_len' blocks */ abut_ex->ee_len = cpu_to_le16(prev_len + map_len); @@ -3478,7 +3484,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * - C4: abut_ex can receive the additional blocks without * overflowing the (initialized) length limit. */ - if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/ + if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/ ((map->m_lblk + map_len) == next_lblk) && /*C2*/ ((ee_pblk + ee_len) == next_pblk) && /*C3*/ (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ @@ -3493,7 +3499,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, abut_ex->ee_block = cpu_to_le32(next_lblk - map_len); ext4_ext_store_pblock(abut_ex, next_pblk - map_len); ex->ee_len = cpu_to_le16(ee_len - map_len); - ext4_ext_mark_uninitialized(ex); /* Restore the flag */ + ext4_ext_mark_unwritten(ex); /* Restore the flag */ /* Extend abut_ex by 'map_len' blocks */ abut_ex->ee_len = cpu_to_le16(next_len + map_len); @@ -3598,26 +3604,26 @@ out: /* * This function is called by ext4_ext_map_blocks() from * ext4_get_blocks_dio_write() when DIO to write - * to an uninitialized extent. + * to an unwritten extent. * - * Writing to an uninitialized extent may result in splitting the uninitialized - * extent into multiple initialized/uninitialized extents (up to three) + * Writing to an unwritten extent may result in splitting the unwritten + * extent into multiple initialized/unwritten extents (up to three) * There are three possibilities: - * a> There is no split required: Entire extent should be uninitialized + * a> There is no split required: Entire extent should be unwritten * b> Splits in two extents: Write is happening at either end of the extent * c> Splits in three extents: Somone is writing in middle of the extent * * This works the same way in the case of initialized -> unwritten conversion. * * One of more index blocks maybe needed if the extent tree grow after - * the uninitialized extent split. To prevent ENOSPC occur at the IO - * complete, we need to split the uninitialized extent before DIO submit - * the IO. The uninitialized extent called at this time will be split - * into three uninitialized extent(at most). After IO complete, the part + * the unwritten extent split. To prevent ENOSPC occur at the IO + * complete, we need to split the unwritten extent before DIO submit + * the IO. The unwritten extent called at this time will be split + * into three unwritten extent(at most). After IO complete, the part * being filled will be convert to initialized by the end_io callback function * via ext4_convert_unwritten_extents(). * - * Returns the size of uninitialized extent to be written on success. + * Returns the size of unwritten extent to be written on success. */ static int ext4_split_convert_extents(handle_t *handle, struct inode *inode, @@ -3655,7 +3661,7 @@ static int ext4_split_convert_extents(handle_t *handle, } else if (flags & EXT4_GET_BLOCKS_CONVERT) { split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; - split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2); + split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); } flags |= EXT4_GET_BLOCKS_PRE_IO; return ext4_split_extent(handle, inode, path, map, split_flag, flags); @@ -3694,13 +3700,19 @@ static int ext4_convert_initialized_extents(handle_t *handle, } depth = ext_depth(inode); ex = path[depth].p_ext; + if (!ex) { + EXT4_ERROR_INODE(inode, "unexpected hole at %lu", + (unsigned long) map->m_lblk); + err = -EIO; + goto out; + } } err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; - /* first mark the extent as uninitialized */ - ext4_ext_mark_uninitialized(ex); + /* first mark the extent as unwritten */ + ext4_ext_mark_unwritten(ex); /* note: ext4_ext_correct_indexes() isn't needed here because * borders are not changed @@ -3960,10 +3972,10 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, /* * Make sure that the extent is no bigger than we support with - * uninitialized extent + * unwritten extent */ - if (map->m_len > EXT_UNINIT_MAX_LEN) - map->m_len = EXT_UNINIT_MAX_LEN / 2; + if (map->m_len > EXT_UNWRITTEN_MAX_LEN) + map->m_len = EXT_UNWRITTEN_MAX_LEN / 2; ret = ext4_convert_initialized_extents(handle, inode, map, path); @@ -3982,7 +3994,7 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, } static int -ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, +ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, struct ext4_ext_path *path, int flags, unsigned int allocated, ext4_fsblk_t newblock) @@ -3991,23 +4003,23 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, int err = 0; ext4_io_end_t *io = ext4_inode_aio(inode); - ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " + ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical " "block %llu, max_blocks %u, flags %x, allocated %u\n", inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, flags, allocated); ext4_ext_show_leaf(inode, path); /* - * When writing into uninitialized space, we should not fail to + * When writing into unwritten space, we should not fail to * allocate metadata blocks for the new extent block if needed. */ flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL; - trace_ext4_ext_handle_uninitialized_extents(inode, map, flags, + trace_ext4_ext_handle_unwritten_extents(inode, map, flags, allocated, newblock); /* get_block() before submit the IO, split the extent */ - if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { + if (flags & EXT4_GET_BLOCKS_PRE_IO) { ret = ext4_split_convert_extents(handle, inode, map, path, flags | EXT4_GET_BLOCKS_CONVERT); if (ret <= 0) @@ -4022,12 +4034,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); map->m_flags |= EXT4_MAP_UNWRITTEN; - if (ext4_should_dioread_nolock(inode)) - map->m_flags |= EXT4_MAP_UNINIT; goto out; } /* IO end_io complete, convert the filled extent to written */ - if ((flags & EXT4_GET_BLOCKS_CONVERT)) { + if (flags & EXT4_GET_BLOCKS_CONVERT) { ret = ext4_convert_unwritten_extents_endio(handle, inode, map, path); if (ret >= 0) { @@ -4048,7 +4058,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, * repeat fallocate creation request * we already have an unwritten extent */ - if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) { + if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) { map->m_flags |= EXT4_MAP_UNWRITTEN; goto map_out; } @@ -4299,7 +4309,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, /* - * Uninitialized extents are treated as holes, except that + * unwritten extents are treated as holes, except that * we split out initialized portions during a write. */ ee_len = ext4_ext_get_actual_len(ex); @@ -4318,16 +4328,16 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * If the extent is initialized check whether the * caller wants to convert it to unwritten. */ - if ((!ext4_ext_is_uninitialized(ex)) && + if ((!ext4_ext_is_unwritten(ex)) && (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { allocated = ext4_ext_convert_initialized_extent( handle, inode, map, path, flags, allocated, newblock); goto out2; - } else if (!ext4_ext_is_uninitialized(ex)) + } else if (!ext4_ext_is_unwritten(ex)) goto out; - ret = ext4_ext_handle_uninitialized_extents( + ret = ext4_ext_handle_unwritten_extents( handle, inode, map, path, flags, allocated, newblock); if (ret < 0) @@ -4399,15 +4409,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, /* * See if request is beyond maximum number of blocks we can have in * a single extent. For an initialized extent this limit is - * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is - * EXT_UNINIT_MAX_LEN. + * EXT_INIT_MAX_LEN and for an unwritten extent this limit is + * EXT_UNWRITTEN_MAX_LEN. */ if (map->m_len > EXT_INIT_MAX_LEN && - !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) + !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT)) map->m_len = EXT_INIT_MAX_LEN; - else if (map->m_len > EXT_UNINIT_MAX_LEN && - (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) - map->m_len = EXT_UNINIT_MAX_LEN; + else if (map->m_len > EXT_UNWRITTEN_MAX_LEN && + (flags & EXT4_GET_BLOCKS_UNWRIT_EXT)) + map->m_len = EXT_UNWRITTEN_MAX_LEN; /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ newex.ee_len = cpu_to_le16(map->m_len); @@ -4455,21 +4465,19 @@ got_allocated_blocks: /* try to insert new extent into found leaf and return */ ext4_ext_store_pblock(&newex, newblock + offset); newex.ee_len = cpu_to_le16(ar.len); - /* Mark uninitialized */ - if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ - ext4_ext_mark_uninitialized(&newex); + /* Mark unwritten */ + if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){ + ext4_ext_mark_unwritten(&newex); map->m_flags |= EXT4_MAP_UNWRITTEN; /* * io_end structure was created for every IO write to an - * uninitialized extent. To avoid unnecessary conversion, + * unwritten extent. To avoid unnecessary conversion, * here we flag the IO that really needs the conversion. * For non asycn direct IO case, flag the inode state * that we need to perform conversion when IO is done. */ - if ((flags & EXT4_GET_BLOCKS_PRE_IO)) + if (flags & EXT4_GET_BLOCKS_PRE_IO) set_unwritten = 1; - if (ext4_should_dioread_nolock(inode)) - map->m_flags |= EXT4_MAP_UNINIT; } err = 0; @@ -4596,9 +4604,9 @@ got_allocated_blocks: /* * Cache the extent and update transaction to commit on fdatasync only - * when it is _not_ an uninitialized extent. + * when it is _not_ an unwritten extent. */ - if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) + if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0) ext4_update_inode_fsync_trans(handle, inode, 1); else ext4_update_inode_fsync_trans(handle, inode, 0); @@ -4672,7 +4680,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, * that it doesn't get unnecessarily split into multiple * extents. */ - if (len <= EXT_UNINIT_MAX_LEN) + if (len <= EXT_UNWRITTEN_MAX_LEN) flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; /* @@ -4730,6 +4738,16 @@ static long ext4_zero_range(struct file *file, loff_t offset, trace_ext4_zero_range(inode, offset, len, mode); + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + /* Call ext4_force_commit to flush all data in case of data=journal. */ + if (ext4_should_journal_data(inode)) { + ret = ext4_force_commit(inode->i_sb); + if (ret) + return ret; + } + /* * Write out all dirty pages to avoid race conditions * Then release them. @@ -4761,7 +4779,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, else max_blocks -= lblk; - flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | + flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; if (mode & FALLOC_FL_KEEP_SIZE) flags |= EXT4_GET_BLOCKS_KEEP_SIZE; @@ -4878,9 +4896,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (mode & FALLOC_FL_PUNCH_HOLE) return ext4_punch_hole(inode, offset, len); - if (mode & FALLOC_FL_COLLAPSE_RANGE) - return ext4_collapse_range(inode, offset, len); - ret = ext4_convert_inline_data(inode); if (ret) return ret; @@ -4892,6 +4907,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EOPNOTSUPP; + if (mode & FALLOC_FL_COLLAPSE_RANGE) + return ext4_collapse_range(inode, offset, len); + if (mode & FALLOC_FL_ZERO_RANGE) return ext4_zero_range(file, offset, len, mode); @@ -4904,7 +4922,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - lblk; - flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; + flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; if (mode & FALLOC_FL_KEEP_SIZE) flags |= EXT4_GET_BLOCKS_KEEP_SIZE; @@ -5229,18 +5247,19 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) update = 1; - *start = ex_last->ee_block + + *start = le32_to_cpu(ex_last->ee_block) + ext4_ext_get_actual_len(ex_last); while (ex_start <= ex_last) { - ex_start->ee_block -= shift; - if (ex_start > - EXT_FIRST_EXTENT(path[depth].p_hdr)) { - if (ext4_ext_try_to_merge_right(inode, - path, ex_start - 1)) - ex_last--; - } - ex_start++; + le32_add_cpu(&ex_start->ee_block, -shift); + /* Try to merge to the left. */ + if ((ex_start > + EXT_FIRST_EXTENT(path[depth].p_hdr)) && + ext4_ext_try_to_merge_right(inode, + path, ex_start - 1)) + ex_last--; + else + ex_start++; } err = ext4_ext_dirty(handle, inode, path + depth); if (err) @@ -5255,7 +5274,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, if (err) goto out; - path[depth].p_idx->ei_block -= shift; + le32_add_cpu(&path[depth].p_idx->ei_block, -shift); err = ext4_ext_dirty(handle, inode, path + depth); if (err) goto out; @@ -5300,7 +5319,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, return ret; } - stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); + stop_block = le32_to_cpu(extent->ee_block) + + ext4_ext_get_actual_len(extent); ext4_ext_drop_refs(path); kfree(path); @@ -5313,10 +5333,18 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * enough to accomodate the shift. */ path = ext4_ext_find_extent(inode, start - 1, NULL, 0); + if (IS_ERR(path)) + return PTR_ERR(path); depth = path->p_depth; extent = path[depth].p_ext; - ex_start = extent->ee_block; - ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); + if (extent) { + ex_start = le32_to_cpu(extent->ee_block); + ex_end = le32_to_cpu(extent->ee_block) + + ext4_ext_get_actual_len(extent); + } else { + ex_start = 0; + ex_end = 0; + } ext4_ext_drop_refs(path); kfree(path); @@ -5331,7 +5359,13 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, return PTR_ERR(path); depth = path->p_depth; extent = path[depth].p_ext; - current_block = extent->ee_block; + if (!extent) { + EXT4_ERROR_INODE(inode, "unexpected hole at %lu", + (unsigned long) start); + return -EIO; + } + + current_block = le32_to_cpu(extent->ee_block); if (start > current_block) { /* Hole, move to the next extent */ ret = mext_next_extent(inode, path, &extent); @@ -5365,17 +5399,18 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ext4_lblk_t punch_start, punch_stop; handle_t *handle; unsigned int credits; - loff_t new_size; + loff_t new_size, ioffset; int ret; - BUG_ON(offset + len > i_size_read(inode)); - /* Collapse range works only on fs block size aligned offsets. */ if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || len & (EXT4_BLOCK_SIZE(sb) - 1)) return -EINVAL; if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) return -EOPNOTSUPP; trace_ext4_collapse_range(inode, offset, len); @@ -5383,22 +5418,34 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); + /* Call ext4_force_commit to flush all data in case of data=journal. */ + if (ext4_should_journal_data(inode)) { + ret = ext4_force_commit(inode->i_sb); + if (ret) + return ret; + } + + /* + * Need to round down offset to be aligned with page size boundary + * for page size > block size. + */ + ioffset = round_down(offset, PAGE_SIZE); + /* Write out all dirty pages */ - ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); + ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, + LLONG_MAX); if (ret) return ret; /* Take mutex lock */ mutex_lock(&inode->i_mutex); - /* It's not possible punch hole on append only file */ - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { - ret = -EPERM; - goto out_mutex; - } - - if (IS_SWAPFILE(inode)) { - ret = -ETXTBSY; + /* + * There is no need to overlap collapse range with EOF, in which case + * it is effectively a truncate operation + */ + if (offset + len >= i_size_read(inode)) { + ret = -EINVAL; goto out_mutex; } @@ -5408,7 +5455,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - truncate_pagecache_range(inode, offset, -1); + truncate_pagecache(inode, ioffset); /* Wait for existing dio to complete */ ext4_inode_block_unlocked_dio(inode); @@ -5425,7 +5472,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ext4_discard_preallocations(inode); ret = ext4_es_remove_extent(inode, punch_start, - EXT_MAX_BLOCKS - punch_start - 1); + EXT_MAX_BLOCKS - punch_start); if (ret) { up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; @@ -5436,6 +5483,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } + ext4_discard_preallocations(inode); ret = ext4_ext_shift_extents(inode, handle, punch_stop, punch_stop - punch_start); @@ -5445,10 +5493,9 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) } new_size = i_size_read(inode) - len; - truncate_setsize(inode, new_size); + i_size_write(inode, new_size); EXT4_I(inode)->i_disksize = new_size; - ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); if (IS_SYNC(inode)) ext4_handle_sync(handle); |