summaryrefslogtreecommitdiffstats
path: root/lib/zstd/compress/fse_compress.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/zstd/compress/fse_compress.c')
-rw-r--r--lib/zstd/compress/fse_compress.c83
1 files changed, 63 insertions, 20 deletions
diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c
index 436985b620e5..ec5b1ca6d71a 100644
--- a/lib/zstd/compress/fse_compress.c
+++ b/lib/zstd/compress/fse_compress.c
@@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
U32 const step = FSE_TABLESTEP(tableSize);
+ U32 const maxSV1 = maxSymbolValue+1;
- U32* cumul = (U32*)workSpace;
- FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
+ U16* cumul = (U16*)workSpace; /* size = maxSV1 */
+ FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
U32 highThreshold = tableSize-1;
- if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
+ assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
/* CTable header */
tableU16[-2] = (U16) tableLog;
@@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
/* symbol start positions */
{ U32 u;
cumul[0] = 0;
- for (u=1; u <= maxSymbolValue+1; u++) {
+ for (u=1; u <= maxSV1; u++) {
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
cumul[u] = cumul[u-1] + 1;
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
} else {
- cumul[u] = cumul[u-1] + normalizedCounter[u-1];
+ assert(normalizedCounter[u-1] >= 0);
+ cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
+ assert(cumul[u] >= cumul[u-1]); /* no overflow */
} }
- cumul[maxSymbolValue+1] = tableSize+1;
+ cumul[maxSV1] = (U16)(tableSize+1);
}
/* Spread symbols */
- { U32 position = 0;
+ if (highThreshold == tableSize - 1) {
+ /* Case for no low prob count symbols. Lay down 8 bytes at a time
+ * to reduce branch misses since we are operating on a small block
+ */
+ BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
+ { U64 const add = 0x0101010101010101ull;
+ size_t pos = 0;
+ U64 sv = 0;
+ U32 s;
+ for (s=0; s<maxSV1; ++s, sv += add) {
+ int i;
+ int const n = normalizedCounter[s];
+ MEM_write64(spread + pos, sv);
+ for (i = 8; i < n; i += 8) {
+ MEM_write64(spread + pos + i, sv);
+ }
+ assert(n>=0);
+ pos += (size_t)n;
+ }
+ }
+ /* Spread symbols across the table. Lack of lowprob symbols means that
+ * we don't need variable sized inner loop, so we can unroll the loop and
+ * reduce branch misses.
+ */
+ { size_t position = 0;
+ size_t s;
+ size_t const unroll = 2; /* Experimentally determined optimal unroll */
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
+ size_t u;
+ for (u = 0; u < unroll; ++u) {
+ size_t const uPosition = (position + (u * step)) & tableMask;
+ tableSymbol[uPosition] = spread[s + u];
+ }
+ position = (position + (unroll * step)) & tableMask;
+ }
+ assert(position == 0); /* Must have initialized all positions */
+ }
+ } else {
+ U32 position = 0;
U32 symbol;
- for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+ for (symbol=0; symbol<maxSV1; symbol++) {
int nbOccurrences;
int const freq = normalizedCounter[symbol];
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
@@ -120,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
while (position > highThreshold)
position = (position + step) & tableMask; /* Low proba area */
} }
-
assert(position==0); /* Must have initialized all positions */
}
@@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
case -1:
case 1:
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
- symbolTT[s].deltaFindState = total - 1;
+ assert(total <= INT_MAX);
+ symbolTT[s].deltaFindState = (int)(total - 1);
total ++;
break;
default :
- {
- U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
- U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
+ assert(normalizedCounter[s] > 1);
+ { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
+ U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
- symbolTT[s].deltaFindState = total - normalizedCounter[s];
- total += normalizedCounter[s];
+ symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
+ total += (unsigned)normalizedCounter[s];
} } } }
#if 0 /* debug : symbol costs */
@@ -164,8 +206,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
symbol, normalizedCounter[symbol],
FSE_getMaxNbBits(symbolTT, symbol),
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
- }
- }
+ } }
#endif
return 0;
@@ -173,16 +214,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
-
#ifndef FSE_COMMONDEFS_ONLY
-
/*-**************************************************************
* FSE NCount encoding
****************************************************************/
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
{
- size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+ size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+ + 4 /* bitCount initialized at 4 */
+ + 2 /* first two symbols may use one additional bit each */) / 8)
+ + 1 /* round up to whole nb bytes */
+ + 2 /* additional two bytes for bitstream flush */;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
}