diff options
Diffstat (limited to '')
-rw-r--r-- | lib/lz4/lz4_decompress.c | 481 |
1 files changed, 341 insertions, 140 deletions
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 141734d255e4..0c9d3ad17e0f 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -43,30 +43,36 @@ /*-***************************** * Decompression functions *******************************/ -/* LZ4_decompress_generic() : - * This generic decompression function cover all use cases. - * It shall be instantiated several times, using different sets of directives - * Note that it is important this generic function is really inlined, + +#define DEBUGLOG(l, ...) {} /* disabled */ + +#ifndef assert +#define assert(condition) ((void)0) +#endif + +/* + * LZ4_decompress_generic() : + * This generic decompression function covers all use cases. + * It shall be instantiated several times, using different sets of directives. + * Note that it is important for performance that this function really get inlined, * in order to remove useless branches during compilation optimization. */ static FORCE_INLINE int LZ4_decompress_generic( - const char * const source, - char * const dest, - int inputSize, + const char * const src, + char * const dst, + int srcSize, /* * If endOnInput == endOnInputSize, - * this value is the max size of Output Buffer. + * this value is `dstCapacity` */ int outputSize, /* endOnOutputSize, endOnInputSize */ - int endOnInput, + endCondition_directive endOnInput, /* full, partial */ - int partialDecoding, - /* only used if partialDecoding == partial */ - int targetOutputSize, + earlyEnd_directive partialDecoding, /* noDict, withPrefix64k, usingExtDict */ - int dict, - /* == dest when no prefix */ + dict_directive dict, + /* always <= dst, == dst when no prefix */ const BYTE * const lowPrefix, /* only if dict == usingExtDict */ const BYTE * const dictStart, @@ -74,35 +80,43 @@ static FORCE_INLINE int LZ4_decompress_generic( const size_t dictSize ) { - /* Local Variables */ - const BYTE *ip = (const BYTE *) source; - const BYTE * const iend = ip + inputSize; + const BYTE *ip = (const BYTE *) src; + const BYTE * const iend = ip + srcSize; - BYTE *op = (BYTE *) dest; + BYTE *op = (BYTE *) dst; BYTE * const oend = op + outputSize; BYTE *cpy; - BYTE *oexit = op + targetOutputSize; - const BYTE * const lowLimit = lowPrefix - dictSize; const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize; - static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; - static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; + static const unsigned int inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4}; + static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; const int safeDecode = (endOnInput == endOnInputSize); const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB))); + /* Set up the "end" pointers for the shortcut. */ + const BYTE *const shortiend = iend - + (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; + const BYTE *const shortoend = oend - + (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; + + DEBUGLOG(5, "%s (srcSize:%i, dstSize:%i)", __func__, + srcSize, outputSize); + /* Special cases */ - /* targetOutputSize too high => decode everything */ - if ((partialDecoding) && (oexit > oend - MFLIMIT)) - oexit = oend - MFLIMIT; + assert(lowPrefix <= op); + assert(src != NULL); /* Empty output buffer */ if ((endOnInput) && (unlikely(outputSize == 0))) - return ((inputSize == 1) && (*ip == 0)) ? 0 : -1; + return ((srcSize == 1) && (*ip == 0)) ? 0 : -1; if ((!endOnInput) && (unlikely(outputSize == 0))) return (*ip == 0 ? 1 : -1); + if ((endOnInput) && unlikely(srcSize == 0)) + return -1; + /* Main Loop : decode sequences */ while (1) { size_t length; @@ -111,12 +125,74 @@ static FORCE_INLINE int LZ4_decompress_generic( /* get literal length */ unsigned int const token = *ip++; - length = token>>ML_BITS; + /* ip < iend before the increment */ + assert(!endOnInput || ip <= iend); + + /* + * A two-stage shortcut for the most common case: + * 1) If the literal length is 0..14, and there is enough + * space, enter the shortcut and copy 16 bytes on behalf + * of the literals (in the fast mode, only 8 bytes can be + * safely copied this way). + * 2) Further if the match length is 4..18, copy 18 bytes + * in a similar manner; but we ensure that there's enough + * space in the output for those 18 bytes earlier, upon + * entering the shortcut (in other words, there is a + * combined check for both stages). + */ + if ((endOnInput ? length != RUN_MASK : length <= 8) + /* + * strictly "less than" on input, to re-enter + * the loop with at least one byte + */ + && likely((endOnInput ? ip < shortiend : 1) & + (op <= shortoend))) { + /* Copy the literals */ + memcpy(op, ip, endOnInput ? 16 : 8); + op += length; ip += length; + + /* + * The second stage: + * prepare for match copying, decode full info. + * If it doesn't work out, the info won't be wasted. + */ + length = token & ML_MASK; /* match length */ + offset = LZ4_readLE16(ip); + ip += 2; + match = op - offset; + assert(match <= op); /* check overflow */ + + /* Do not deal with overlapping matches. */ + if ((length != ML_MASK) && + (offset >= 8) && + (dict == withPrefix64k || match >= lowPrefix)) { + /* Copy the match. */ + memcpy(op + 0, match + 0, 8); + memcpy(op + 8, match + 8, 8); + memcpy(op + 16, match + 16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ + continue; + } + + /* + * The second stage didn't work out, but the info + * is ready. Propel it right to the point of match + * copying. + */ + goto _copy_match; + } + + /* decode literal length */ if (length == RUN_MASK) { unsigned int s; + if (unlikely(endOnInput ? ip >= iend - RUN_MASK : 0)) { + /* overflow detection */ + goto _output_error; + } do { s = *ip++; length += s; @@ -125,14 +201,14 @@ static FORCE_INLINE int LZ4_decompress_generic( : 1) & (s == 255)); if ((safeDecode) - && unlikely( - (size_t)(op + length) < (size_t)(op))) { + && unlikely((uptrval)(op) + + length < (uptrval)(op))) { /* overflow detection */ goto _output_error; } if ((safeDecode) - && unlikely( - (size_t)(ip + length) < (size_t)(ip))) { + && unlikely((uptrval)(ip) + + length < (uptrval)(ip))) { /* overflow detection */ goto _output_error; } @@ -140,16 +216,19 @@ static FORCE_INLINE int LZ4_decompress_generic( /* copy literals */ cpy = op + length; - if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT)) + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + + if (((endOnInput) && ((cpy > oend - MFLIMIT) || (ip + length > iend - (2 + 1 + LASTLITERALS)))) || ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) { if (partialDecoding) { if (cpy > oend) { /* - * Error : - * write attempt beyond end of output buffer + * Partial decoding : + * stop in the middle of literal segment */ - goto _output_error; + cpy = oend; + length = oend - op; } if ((endOnInput) && (ip + length > iend)) { @@ -184,29 +263,43 @@ static FORCE_INLINE int LZ4_decompress_generic( memcpy(op, ip, length); ip += length; op += length; + /* Necessarily EOF, due to parsing restrictions */ - break; + if (!partialDecoding || (cpy == oend)) + break; + } else { + /* may overwrite up to WILDCOPYLENGTH beyond cpy */ + LZ4_wildCopy(op, ip, cpy); + ip += length; + op = cpy; } - LZ4_wildCopy(op, ip, cpy); - ip += length; - op = cpy; - /* get offset */ offset = LZ4_readLE16(ip); ip += 2; match = op - offset; - if ((checkOffset) && (unlikely(match < lowLimit))) { + /* get matchlength */ + length = token & ML_MASK; + +_copy_match: + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { /* Error : offset outside buffers */ goto _output_error; } /* costs ~1%; silence an msan warning when offset == 0 */ - LZ4_write32(op, (U32)offset); + /* + * note : when partialDecoding, there is no guarantee that + * at least 4 bytes remain available in output buffer + */ + if (!partialDecoding) { + assert(oend > op); + assert(oend - op >= 4); + + LZ4_write32(op, (U32)offset); + } - /* get matchlength */ - length = token & ML_MASK; if (length == ML_MASK) { unsigned int s; @@ -221,7 +314,7 @@ static FORCE_INLINE int LZ4_decompress_generic( if ((safeDecode) && unlikely( - (size_t)(op + length) < (size_t)op)) { + (uptrval)(op) + length < (uptrval)op)) { /* overflow detection */ goto _output_error; } @@ -229,24 +322,26 @@ static FORCE_INLINE int LZ4_decompress_generic( length += MINMATCH; - /* check external dictionary */ + /* match starting within external dictionary */ if ((dict == usingExtDict) && (match < lowPrefix)) { if (unlikely(op + length > oend - LASTLITERALS)) { /* doesn't respect parsing restriction */ - goto _output_error; + if (!partialDecoding) + goto _output_error; + length = min(length, (size_t)(oend - op)); } if (length <= (size_t)(lowPrefix - match)) { /* - * match can be copied as a single segment - * from external dictionary + * match fits entirely within external + * dictionary : just copy */ memmove(op, dictEnd - (lowPrefix - match), length); op += length; } else { /* - * match encompass external + * match stretches into both external * dictionary and current block */ size_t const copySize = (size_t)(lowPrefix - match); @@ -254,7 +349,6 @@ static FORCE_INLINE int LZ4_decompress_generic( memcpy(op, dictEnd - copySize, copySize); op += copySize; - if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ BYTE * const endOfMatch = op + restSize; @@ -267,23 +361,44 @@ static FORCE_INLINE int LZ4_decompress_generic( op += restSize; } } - continue; } /* copy match within block */ cpy = op + length; - if (unlikely(offset < 8)) { - const int dec64 = dec64table[offset]; + /* + * partialDecoding : + * may not respect endBlock parsing restrictions + */ + assert(op <= oend); + if (partialDecoding && + (cpy > oend - MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = min(length, (size_t)(oend - op)); + const BYTE * const matchEnd = match + mlen; + BYTE * const copyEnd = op + mlen; + + if (matchEnd > op) { + /* overlap copy */ + while (op < copyEnd) + *op++ = *match++; + } else { + memcpy(op, match, mlen); + } + op = copyEnd; + if (op == oend) + break; + continue; + } + if (unlikely(offset < 8)) { op[0] = match[0]; op[1] = match[1]; op[2] = match[2]; op[3] = match[3]; - match += dec32table[offset]; + match += inc32table[offset]; memcpy(op + 4, match, 4); - match -= dec64; + match -= dec64table[offset]; } else { LZ4_copy8(op, match); match += 8; @@ -291,7 +406,7 @@ static FORCE_INLINE int LZ4_decompress_generic( op += 8; - if (unlikely(cpy > oend - 12)) { + if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) { BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1); if (cpy > oend - LASTLITERALS) { @@ -307,60 +422,139 @@ static FORCE_INLINE int LZ4_decompress_generic( match += oCopyLimit - op; op = oCopyLimit; } - while (op < cpy) *op++ = *match++; } else { LZ4_copy8(op, match); - if (length > 16) LZ4_wildCopy(op + 8, match + 8, cpy); } - - op = cpy; /* correction */ + op = cpy; /* wildcopy correction */ } /* end of decoding */ if (endOnInput) { /* Nb of output bytes decoded */ - return (int) (((char *)op) - dest); + return (int) (((char *)op) - dst); } else { /* Nb of input bytes read */ - return (int) (((const char *)ip) - source); + return (int) (((const char *)ip) - src); } /* Overflow error detected */ _output_error: - return -1; + return (int) (-(((const char *)ip) - src)) - 1; } int LZ4_decompress_safe(const char *source, char *dest, int compressedSize, int maxDecompressedSize) { - return LZ4_decompress_generic(source, dest, compressedSize, - maxDecompressedSize, endOnInputSize, full, 0, - noDict, (BYTE *)dest, NULL, 0); + return LZ4_decompress_generic(source, dest, + compressedSize, maxDecompressedSize, + endOnInputSize, decode_full_block, + noDict, (BYTE *)dest, NULL, 0); } -int LZ4_decompress_safe_partial(const char *source, char *dest, - int compressedSize, int targetOutputSize, int maxDecompressedSize) +int LZ4_decompress_safe_partial(const char *src, char *dst, + int compressedSize, int targetOutputSize, int dstCapacity) { - return LZ4_decompress_generic(source, dest, compressedSize, - maxDecompressedSize, endOnInputSize, partial, - targetOutputSize, noDict, (BYTE *)dest, NULL, 0); + dstCapacity = min(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + endOnInputSize, partial_decode, + noDict, (BYTE *)dst, NULL, 0); } int LZ4_decompress_fast(const char *source, char *dest, int originalSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, withPrefix64k, - (BYTE *)(dest - 64 * KB), NULL, 64 * KB); + endOnOutputSize, decode_full_block, + withPrefix64k, + (BYTE *)dest - 64 * KB, NULL, 0); +} + +/* ===== Instantiate a few more decoding cases, used more than once. ===== */ + +int LZ4_decompress_safe_withPrefix64k(const char *source, char *dest, + int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + withPrefix64k, + (BYTE *)dest - 64 * KB, NULL, 0); +} + +static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest, + int compressedSize, + int maxOutputSize, + size_t prefixSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + noDict, + (BYTE *)dest - prefixSize, NULL, 0); +} + +int LZ4_decompress_safe_forceExtDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + usingExtDict, (BYTE *)dest, + (const BYTE *)dictStart, dictSize); } +static int LZ4_decompress_fast_extDict(const char *source, char *dest, + int originalSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + 0, originalSize, + endOnOutputSize, decode_full_block, + usingExtDict, (BYTE *)dest, + (const BYTE *)dictStart, dictSize); +} + +/* + * The "double dictionary" mode, for use with e.g. ring buffers: the first part + * of the dictionary is passed as prefix, and the second via dictStart + dictSize. + * These routines are used only once, in LZ4_decompress_*_continue(). + */ +static FORCE_INLINE +int LZ4_decompress_safe_doubleDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + size_t prefixSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + usingExtDict, (BYTE *)dest - prefixSize, + (const BYTE *)dictStart, dictSize); +} + +static FORCE_INLINE +int LZ4_decompress_fast_doubleDict(const char *source, char *dest, + int originalSize, size_t prefixSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + 0, originalSize, + endOnOutputSize, decode_full_block, + usingExtDict, (BYTE *)dest - prefixSize, + (const BYTE *)dictStart, dictSize); +} + +/* ===== streaming decompression functions ===== */ + int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, const char *dictionary, int dictSize) { - LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode; + LZ4_streamDecode_t_internal *lz4sd = + &LZ4_streamDecode->internal_donotuse; lz4sd->prefixSize = (size_t) dictSize; lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize; @@ -382,35 +576,51 @@ int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, const char *source, char *dest, int compressedSize, int maxOutputSize) { - LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse; + LZ4_streamDecode_t_internal *lz4sd = + &LZ4_streamDecode->internal_donotuse; int result; - if (lz4sd->prefixEnd == (BYTE *)dest) { - result = LZ4_decompress_generic(source, dest, - compressedSize, - maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, - lz4sd->externalDict, - lz4sd->extDictSize); - + if (lz4sd->prefixSize == 0) { + /* The first call, no dictionary yet. */ + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_safe(source, dest, + compressedSize, maxOutputSize); + if (result <= 0) + return result; + lz4sd->prefixSize = result; + lz4sd->prefixEnd = (BYTE *)dest + result; + } else if (lz4sd->prefixEnd == (BYTE *)dest) { + /* They're rolling the current segment. */ + if (lz4sd->prefixSize >= 64 * KB - 1) + result = LZ4_decompress_safe_withPrefix64k(source, dest, + compressedSize, maxOutputSize); + else if (lz4sd->extDictSize == 0) + result = LZ4_decompress_safe_withSmallPrefix(source, + dest, compressedSize, maxOutputSize, + lz4sd->prefixSize); + else + result = LZ4_decompress_safe_doubleDict(source, dest, + compressedSize, maxOutputSize, + lz4sd->prefixSize, + lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; - lz4sd->prefixSize += result; - lz4sd->prefixEnd += result; + lz4sd->prefixEnd += result; } else { + /* + * The buffer wraps around, or they're + * switching to another buffer. + */ lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, + result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, (BYTE *)dest, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = result; - lz4sd->prefixEnd = (BYTE *)dest + result; + lz4sd->prefixEnd = (BYTE *)dest + result; } return result; @@ -422,75 +632,66 @@ int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse; int result; - if (lz4sd->prefixEnd == (BYTE *)dest) { - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, - lz4sd->prefixEnd - lz4sd->prefixSize, - lz4sd->externalDict, lz4sd->extDictSize); - + if (lz4sd->prefixSize == 0) { + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_fast(source, dest, originalSize); + if (result <= 0) + return result; + lz4sd->prefixSize = originalSize; + lz4sd->prefixEnd = (BYTE *)dest + originalSize; + } else if (lz4sd->prefixEnd == (BYTE *)dest) { + if (lz4sd->prefixSize >= 64 * KB - 1 || + lz4sd->extDictSize == 0) + result = LZ4_decompress_fast(source, dest, + originalSize); + else + result = LZ4_decompress_fast_doubleDict(source, dest, + originalSize, lz4sd->prefixSize, + lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; - lz4sd->prefixSize += originalSize; - lz4sd->prefixEnd += originalSize; + lz4sd->prefixEnd += originalSize; } else { lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, (BYTE *)dest, - lz4sd->externalDict, lz4sd->extDictSize); + result = LZ4_decompress_fast_extDict(source, dest, + originalSize, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = originalSize; - lz4sd->prefixEnd = (BYTE *)dest + originalSize; + lz4sd->prefixEnd = (BYTE *)dest + originalSize; } - return result; } -/* - * Advanced decoding functions : - * *_usingDict() : - * These decoding functions work the same as "_continue" ones, - * the dictionary must be explicitly provided within parameters - */ -static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source, - char *dest, int compressedSize, int maxOutputSize, int safe, - const char *dictStart, int dictSize) +int LZ4_decompress_safe_usingDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const char *dictStart, int dictSize) { if (dictSize == 0) - return LZ4_decompress_generic(source, dest, - compressedSize, maxOutputSize, safe, full, 0, - noDict, (BYTE *)dest, NULL, 0); - if (dictStart + dictSize == dest) { - if (dictSize >= (int)(64 * KB - 1)) - return LZ4_decompress_generic(source, dest, - compressedSize, maxOutputSize, safe, full, 0, - withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0); - return LZ4_decompress_generic(source, dest, compressedSize, - maxOutputSize, safe, full, 0, noDict, - (BYTE *)dest - dictSize, NULL, 0); + return LZ4_decompress_safe(source, dest, + compressedSize, maxOutputSize); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 * KB - 1) + return LZ4_decompress_safe_withPrefix64k(source, dest, + compressedSize, maxOutputSize); + return LZ4_decompress_safe_withSmallPrefix(source, dest, + compressedSize, maxOutputSize, dictSize); } - return LZ4_decompress_generic(source, dest, compressedSize, - maxOutputSize, safe, full, 0, usingExtDict, - (BYTE *)dest, (const BYTE *)dictStart, dictSize); -} - -int LZ4_decompress_safe_usingDict(const char *source, char *dest, - int compressedSize, int maxOutputSize, - const char *dictStart, int dictSize) -{ - return LZ4_decompress_usingDict_generic(source, dest, - compressedSize, maxOutputSize, 1, dictStart, dictSize); + return LZ4_decompress_safe_forceExtDict(source, dest, + compressedSize, maxOutputSize, dictStart, dictSize); } int LZ4_decompress_fast_usingDict(const char *source, char *dest, - int originalSize, const char *dictStart, int dictSize) + int originalSize, + const char *dictStart, int dictSize) { - return LZ4_decompress_usingDict_generic(source, dest, 0, - originalSize, 0, dictStart, dictSize); + if (dictSize == 0 || dictStart + dictSize == dest) + return LZ4_decompress_fast(source, dest, originalSize); + + return LZ4_decompress_fast_extDict(source, dest, originalSize, + dictStart, dictSize); } #ifndef STATIC |