diff options
author | Stefan Richter <stefanr@s5r6.in-berlin.de> | 2011-05-10 20:52:07 +0200 |
---|---|---|
committer | Stefan Richter <stefanr@s5r6.in-berlin.de> | 2011-05-10 22:50:41 +0200 |
commit | 020abf03cd659388f94cb328e1e1df0656e0d7ff (patch) | |
tree | 40d05011708ad1b4a05928d167eb120420581aa6 /lib | |
parent | firewire: ohci: optimize find_branch_descriptor() (diff) | |
parent | Linux 2.6.39-rc7 (diff) | |
download | linux-020abf03cd659388f94cb328e1e1df0656e0d7ff.tar.xz linux-020abf03cd659388f94cb328e1e1df0656e0d7ff.zip |
Merge tag 'v2.6.39-rc7'
in order to pull in changes in drivers/media/dvb/firewire/ and
sound/firewire/.
Diffstat (limited to 'lib')
50 files changed, 7194 insertions, 496 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index fa9bf2c06199..9c10e38fc609 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -22,6 +22,9 @@ config GENERIC_FIND_FIRST_BIT config GENERIC_FIND_NEXT_BIT bool +config GENERIC_FIND_BIT_LE + bool + config GENERIC_FIND_LAST_BIT bool default y @@ -106,6 +109,8 @@ config LZO_COMPRESS config LZO_DECOMPRESS tristate +source "lib/xz/Kconfig" + # # These all provide a common interface (hence the apparent duplication with # ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.) @@ -120,6 +125,10 @@ config DECOMPRESS_BZIP2 config DECOMPRESS_LZMA tristate +config DECOMPRESS_XZ + select XZ_DEC + tristate + config DECOMPRESS_LZO select LZO_DECOMPRESS tristate @@ -149,6 +158,45 @@ config REED_SOLOMON_DEC16 boolean # +# BCH support is selected if needed +# +config BCH + tristate + +config BCH_CONST_PARAMS + boolean + help + Drivers may select this option to force specific constant + values for parameters 'm' (Galois field order) and 't' + (error correction capability). Those specific values must + be set by declaring default values for symbols BCH_CONST_M + and BCH_CONST_T. + Doing so will enable extra compiler optimizations, + improving encoding and decoding performance up to 2x for + usual (m,t) values (typically such that m*t < 200). + When this option is selected, the BCH library supports + only a single (m,t) configuration. This is mainly useful + for NAND flash board drivers requiring known, fixed BCH + parameters. + +config BCH_CONST_M + int + range 5 15 + help + Constant value for Galois field order 'm'. If 'k' is the + number of data bits to protect, 'm' should be chosen such + that (k + m*t) <= 2**m - 1. + Drivers should declare a default value for this symbol if + they select option BCH_CONST_PARAMS. + +config BCH_CONST_T + int + help + Constant value for error correction capability in bits 't'. + Drivers should declare a default value for this symbol if + they select option BCH_CONST_PARAMS. + +# # Textsearch support is select'ed if needed # config TEXTSEARCH @@ -195,6 +243,10 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS depends on EXPERIMENTAL && BROKEN +config CPU_RMAP + bool + depends on SMP + # # Netlink attribute parsing support is select'ed if needed # @@ -210,4 +262,14 @@ config GENERIC_ATOMIC64 config LRU_CACHE tristate +config AVERAGE + bool "Averaging functions" + help + This option is provided for the case where no in-kernel-tree + modules require averaging functions, but a module built outside + the kernel tree does. Such modules that use library averaging + functions require Y here. + + If unsure, say N. + endmenu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 28b42b9274d0..c768bcdda1b7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -9,6 +9,17 @@ config PRINTK_TIME operations. This is useful for identifying long delays in kernel startup. +config DEFAULT_MESSAGE_LOGLEVEL + int "Default message log level (1-7)" + range 1 7 + default "4" + help + Default log level for printk statements with no specified priority. + + This was hard-coded to KERN_WARNING since at least 2.6.10 but folks + that are auditing their logs closely may want to set it to a lower + priority. + config ENABLE_WARN_DEPRECATED bool "Enable __deprecated logic" default y @@ -102,11 +113,6 @@ config HEADERS_CHECK config DEBUG_SECTION_MISMATCH bool "Enable full Section mismatch analysis" - depends on UNDEFINED || (BLACKFIN) - default y - # This option is on purpose disabled for now. - # It will be enabled when we are down to a reasonable number - # of section mismatch warnings (< 10 for an allyesconfig build) help The section mismatch analysis checks if there are illegal references from one section to another section. @@ -173,7 +179,25 @@ config LOCKUP_DETECTOR An NMI is generated every 60 seconds or so to check for hardlockups. config HARDLOCKUP_DETECTOR - def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI + def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ + !ARCH_HAS_NMI_WATCHDOG + +config BOOTPARAM_HARDLOCKUP_PANIC + bool "Panic (Reboot) On Hard Lockups" + depends on LOCKUP_DETECTOR + help + Say Y here to enable the kernel to panic on "hard lockups", + which are bugs that cause the kernel to loop in kernel + mode with interrupts disabled for more than 60 seconds. + + Say N if unsure. + +config BOOTPARAM_HARDLOCKUP_PANIC_VALUE + int + depends on LOCKUP_DETECTOR + range 0 1 + default 0 if !BOOTPARAM_HARDLOCKUP_PANIC + default 1 if BOOTPARAM_HARDLOCKUP_PANIC config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" @@ -410,11 +434,9 @@ config DEBUG_KMEMLEAK_EARLY_LOG_SIZE config DEBUG_KMEMLEAK_TEST tristate "Simple test for the kernel memory leak detector" - depends on DEBUG_KMEMLEAK + depends on DEBUG_KMEMLEAK && m help - Say Y or M here to build a test for the kernel memory leak - detector. This option enables a module that explicitly leaks - memory. + This option enables a module that explicitly leaks memory. If unsure, say N. @@ -469,15 +491,6 @@ config DEBUG_MUTEXES This feature allows mutex semantics violations to be detected and reported. -config BKL - bool "Big Kernel Lock" if (SMP || PREEMPT) - default y - help - This is the traditional lock that is used in old code instead - of proper locking. All drivers that use the BKL should depend - on this symbol. - Say Y here unless you are working on removing the BKL. - config DEBUG_LOCK_ALLOC bool "Lock debugging: detect incorrect freeing of live locks" depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT @@ -656,7 +669,7 @@ config DEBUG_HIGHMEM Disable for production systems. config DEBUG_BUGVERBOSE - bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED + bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EXPERT depends on BUG depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \ FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 @@ -728,8 +741,8 @@ config DEBUG_WRITECOUNT If unsure, say N. config DEBUG_MEMORY_INIT - bool "Debug memory initialisation" if EMBEDDED - default !EMBEDDED + bool "Debug memory initialisation" if EXPERT + default !EXPERT help Enable this for additional checks during memory initialisation. The sanity checks verify aspects of the VM such as the memory model @@ -804,7 +817,7 @@ config ARCH_WANT_FRAME_POINTERS config FRAME_POINTER bool "Compile the kernel with frame pointers" depends on DEBUG_KERNEL && \ - (CRIS || M68K || M68KNOMMU || FRV || UML || \ + (CRIS || M68K || FRV || UML || \ AVR32 || SUPERH || BLACKFIN || MN10300) || \ ARCH_WANT_FRAME_POINTERS default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS @@ -1235,3 +1248,6 @@ source "samples/Kconfig" source "lib/Kconfig.kgdb" source "lib/Kconfig.kmemcheck" + +config TEST_KSTRTOX + tristate "Test kstrto*() family of functions at runtime" diff --git a/lib/Makefile b/lib/Makefile index e6a3763b8212..ef0f28571156 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -8,11 +8,11 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ - rbtree.o radix-tree.o dump_stack.o \ + rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o flex_array.o + is_single_threaded.o plist.o decompress.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -21,7 +21,9 @@ lib-y += kobject.o kref.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - string_helpers.o gcd.o lcm.o list_sort.o uuid.o + string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o +obj-y += kstrtox.o +obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -38,12 +40,12 @@ lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o +lib-$(CONFIG_GENERIC_FIND_BIT_LE) += find_next_bit.o obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o -obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o obj-$(CONFIG_BTREE) += btree.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o @@ -67,13 +69,16 @@ obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ +obj-$(CONFIG_BCH) += bch.o obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_XZ_DEC) += xz/ obj-$(CONFIG_RAID6_PQ) += raid6/ lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o +lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o obj-$(CONFIG_TEXTSEARCH) += textsearch.o @@ -106,6 +111,10 @@ obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o +obj-$(CONFIG_AVERAGE) += average.o + +obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/average.c b/lib/average.c new file mode 100644 index 000000000000..5576c2841496 --- /dev/null +++ b/lib/average.c @@ -0,0 +1,61 @@ +/* + * lib/average.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/module.h> +#include <linux/average.h> +#include <linux/bug.h> +#include <linux/log2.h> + +/** + * DOC: Exponentially Weighted Moving Average (EWMA) + * + * These are generic functions for calculating Exponentially Weighted Moving + * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled + * up internal representation of the average value to prevent rounding errors. + * The factor for scaling up and the exponential weight (or decay rate) have to + * be specified thru the init fuction. The structure should not be accessed + * directly but only thru the helper functions. + */ + +/** + * ewma_init() - Initialize EWMA parameters + * @avg: Average structure + * @factor: Factor to use for the scaled up internal value. The maximum value + * of averages can be ULONG_MAX/(factor*weight). For performance reasons + * factor has to be a power of 2. + * @weight: Exponential weight, or decay rate. This defines how fast the + * influence of older values decreases. For performance reasons weight has + * to be a power of 2. + * + * Initialize the EWMA parameters for a given struct ewma @avg. + */ +void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight) +{ + WARN_ON(!is_power_of_2(weight) || !is_power_of_2(factor)); + + avg->weight = ilog2(weight); + avg->factor = ilog2(factor); + avg->internal = 0; +} +EXPORT_SYMBOL(ewma_init); + +/** + * ewma_add() - Exponentially weighted moving average (EWMA) + * @avg: Average structure + * @val: Current value + * + * Add a sample to the average. + */ +struct ewma *ewma_add(struct ewma *avg, unsigned long val) +{ + avg->internal = avg->internal ? + (((avg->internal << avg->weight) - avg->internal) + + (val << avg->factor)) >> avg->weight : + (val << avg->factor); + return avg; +} +EXPORT_SYMBOL(ewma_add); diff --git a/lib/bch.c b/lib/bch.c new file mode 100644 index 000000000000..bc89dfe4d1b3 --- /dev/null +++ b/lib/bch.c @@ -0,0 +1,1368 @@ +/* + * Generic binary BCH encoding/decoding library + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Copyright © 2011 Parrot S.A. + * + * Author: Ivan Djelic <ivan.djelic@parrot.com> + * + * Description: + * + * This library provides runtime configurable encoding/decoding of binary + * Bose-Chaudhuri-Hocquenghem (BCH) codes. + * + * Call init_bch to get a pointer to a newly allocated bch_control structure for + * the given m (Galois field order), t (error correction capability) and + * (optional) primitive polynomial parameters. + * + * Call encode_bch to compute and store ecc parity bytes to a given buffer. + * Call decode_bch to detect and locate errors in received data. + * + * On systems supporting hw BCH features, intermediate results may be provided + * to decode_bch in order to skip certain steps. See decode_bch() documentation + * for details. + * + * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of + * parameters m and t; thus allowing extra compiler optimizations and providing + * better (up to 2x) encoding performance. Using this option makes sense when + * (m,t) are fixed and known in advance, e.g. when using BCH error correction + * on a particular NAND flash device. + * + * Algorithmic details: + * + * Encoding is performed by processing 32 input bits in parallel, using 4 + * remainder lookup tables. + * + * The final stage of decoding involves the following internal steps: + * a. Syndrome computation + * b. Error locator polynomial computation using Berlekamp-Massey algorithm + * c. Error locator root finding (by far the most expensive step) + * + * In this implementation, step c is not performed using the usual Chien search. + * Instead, an alternative approach described in [1] is used. It consists in + * factoring the error locator polynomial using the Berlekamp Trace algorithm + * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial + * solving techniques [2] are used. The resulting algorithm, called BTZ, yields + * much better performance than Chien search for usual (m,t) values (typically + * m >= 13, t < 32, see [1]). + * + * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields + * of characteristic 2, in: Western European Workshop on Research in Cryptology + * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear. + * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over + * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996. + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/bitops.h> +#include <asm/byteorder.h> +#include <linux/bch.h> + +#if defined(CONFIG_BCH_CONST_PARAMS) +#define GF_M(_p) (CONFIG_BCH_CONST_M) +#define GF_T(_p) (CONFIG_BCH_CONST_T) +#define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1) +#else +#define GF_M(_p) ((_p)->m) +#define GF_T(_p) ((_p)->t) +#define GF_N(_p) ((_p)->n) +#endif + +#define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32) +#define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8) + +#ifndef dbg +#define dbg(_fmt, args...) do {} while (0) +#endif + +/* + * represent a polynomial over GF(2^m) + */ +struct gf_poly { + unsigned int deg; /* polynomial degree */ + unsigned int c[0]; /* polynomial terms */ +}; + +/* given its degree, compute a polynomial size in bytes */ +#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int)) + +/* polynomial of degree 1 */ +struct gf_poly_deg1 { + struct gf_poly poly; + unsigned int c[2]; +}; + +/* + * same as encode_bch(), but process input data one byte at a time + */ +static void encode_bch_unaligned(struct bch_control *bch, + const unsigned char *data, unsigned int len, + uint32_t *ecc) +{ + int i; + const uint32_t *p; + const int l = BCH_ECC_WORDS(bch)-1; + + while (len--) { + p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(*data++)) & 0xff); + + for (i = 0; i < l; i++) + ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++); + + ecc[l] = (ecc[l] << 8)^(*p); + } +} + +/* + * convert ecc bytes to aligned, zero-padded 32-bit ecc words + */ +static void load_ecc8(struct bch_control *bch, uint32_t *dst, + const uint8_t *src) +{ + uint8_t pad[4] = {0, 0, 0, 0}; + unsigned int i, nwords = BCH_ECC_WORDS(bch)-1; + + for (i = 0; i < nwords; i++, src += 4) + dst[i] = (src[0] << 24)|(src[1] << 16)|(src[2] << 8)|src[3]; + + memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords); + dst[nwords] = (pad[0] << 24)|(pad[1] << 16)|(pad[2] << 8)|pad[3]; +} + +/* + * convert 32-bit ecc words to ecc bytes + */ +static void store_ecc8(struct bch_control *bch, uint8_t *dst, + const uint32_t *src) +{ + uint8_t pad[4]; + unsigned int i, nwords = BCH_ECC_WORDS(bch)-1; + + for (i = 0; i < nwords; i++) { + *dst++ = (src[i] >> 24); + *dst++ = (src[i] >> 16) & 0xff; + *dst++ = (src[i] >> 8) & 0xff; + *dst++ = (src[i] >> 0) & 0xff; + } + pad[0] = (src[nwords] >> 24); + pad[1] = (src[nwords] >> 16) & 0xff; + pad[2] = (src[nwords] >> 8) & 0xff; + pad[3] = (src[nwords] >> 0) & 0xff; + memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords); +} + +/** + * encode_bch - calculate BCH ecc parity of data + * @bch: BCH control structure + * @data: data to encode + * @len: data length in bytes + * @ecc: ecc parity data, must be initialized by caller + * + * The @ecc parity array is used both as input and output parameter, in order to + * allow incremental computations. It should be of the size indicated by member + * @ecc_bytes of @bch, and should be initialized to 0 before the first call. + * + * The exact number of computed ecc parity bits is given by member @ecc_bits of + * @bch; it may be less than m*t for large values of t. + */ +void encode_bch(struct bch_control *bch, const uint8_t *data, + unsigned int len, uint8_t *ecc) +{ + const unsigned int l = BCH_ECC_WORDS(bch)-1; + unsigned int i, mlen; + unsigned long m; + uint32_t w, r[l+1]; + const uint32_t * const tab0 = bch->mod8_tab; + const uint32_t * const tab1 = tab0 + 256*(l+1); + const uint32_t * const tab2 = tab1 + 256*(l+1); + const uint32_t * const tab3 = tab2 + 256*(l+1); + const uint32_t *pdata, *p0, *p1, *p2, *p3; + + if (ecc) { + /* load ecc parity bytes into internal 32-bit buffer */ + load_ecc8(bch, bch->ecc_buf, ecc); + } else { + memset(bch->ecc_buf, 0, sizeof(r)); + } + + /* process first unaligned data bytes */ + m = ((unsigned long)data) & 3; + if (m) { + mlen = (len < (4-m)) ? len : 4-m; + encode_bch_unaligned(bch, data, mlen, bch->ecc_buf); + data += mlen; + len -= mlen; + } + + /* process 32-bit aligned data words */ + pdata = (uint32_t *)data; + mlen = len/4; + data += 4*mlen; + len -= 4*mlen; + memcpy(r, bch->ecc_buf, sizeof(r)); + + /* + * split each 32-bit word into 4 polynomials of weight 8 as follows: + * + * 31 ...24 23 ...16 15 ... 8 7 ... 0 + * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt + * tttttttt mod g = r0 (precomputed) + * zzzzzzzz 00000000 mod g = r1 (precomputed) + * yyyyyyyy 00000000 00000000 mod g = r2 (precomputed) + * xxxxxxxx 00000000 00000000 00000000 mod g = r3 (precomputed) + * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt mod g = r0^r1^r2^r3 + */ + while (mlen--) { + /* input data is read in big-endian format */ + w = r[0]^cpu_to_be32(*pdata++); + p0 = tab0 + (l+1)*((w >> 0) & 0xff); + p1 = tab1 + (l+1)*((w >> 8) & 0xff); + p2 = tab2 + (l+1)*((w >> 16) & 0xff); + p3 = tab3 + (l+1)*((w >> 24) & 0xff); + + for (i = 0; i < l; i++) + r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i]; + + r[l] = p0[l]^p1[l]^p2[l]^p3[l]; + } + memcpy(bch->ecc_buf, r, sizeof(r)); + + /* process last unaligned bytes */ + if (len) + encode_bch_unaligned(bch, data, len, bch->ecc_buf); + + /* store ecc parity bytes into original parity buffer */ + if (ecc) + store_ecc8(bch, ecc, bch->ecc_buf); +} +EXPORT_SYMBOL_GPL(encode_bch); + +static inline int modulo(struct bch_control *bch, unsigned int v) +{ + const unsigned int n = GF_N(bch); + while (v >= n) { + v -= n; + v = (v & n) + (v >> GF_M(bch)); + } + return v; +} + +/* + * shorter and faster modulo function, only works when v < 2N. + */ +static inline int mod_s(struct bch_control *bch, unsigned int v) +{ + const unsigned int n = GF_N(bch); + return (v < n) ? v : v-n; +} + +static inline int deg(unsigned int poly) +{ + /* polynomial degree is the most-significant bit index */ + return fls(poly)-1; +} + +static inline int parity(unsigned int x) +{ + /* + * public domain code snippet, lifted from + * http://www-graphics.stanford.edu/~seander/bithacks.html + */ + x ^= x >> 1; + x ^= x >> 2; + x = (x & 0x11111111U) * 0x11111111U; + return (x >> 28) & 1; +} + +/* Galois field basic operations: multiply, divide, inverse, etc. */ + +static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a, + unsigned int b) +{ + return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+ + bch->a_log_tab[b])] : 0; +} + +static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a) +{ + return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0; +} + +static inline unsigned int gf_div(struct bch_control *bch, unsigned int a, + unsigned int b) +{ + return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+ + GF_N(bch)-bch->a_log_tab[b])] : 0; +} + +static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a) +{ + return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]]; +} + +static inline unsigned int a_pow(struct bch_control *bch, int i) +{ + return bch->a_pow_tab[modulo(bch, i)]; +} + +static inline int a_log(struct bch_control *bch, unsigned int x) +{ + return bch->a_log_tab[x]; +} + +static inline int a_ilog(struct bch_control *bch, unsigned int x) +{ + return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]); +} + +/* + * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t + */ +static void compute_syndromes(struct bch_control *bch, uint32_t *ecc, + unsigned int *syn) +{ + int i, j, s; + unsigned int m; + uint32_t poly; + const int t = GF_T(bch); + + s = bch->ecc_bits; + + /* make sure extra bits in last ecc word are cleared */ + m = ((unsigned int)s) & 31; + if (m) + ecc[s/32] &= ~((1u << (32-m))-1); + memset(syn, 0, 2*t*sizeof(*syn)); + + /* compute v(a^j) for j=1 .. 2t-1 */ + do { + poly = *ecc++; + s -= 32; + while (poly) { + i = deg(poly); + for (j = 0; j < 2*t; j += 2) + syn[j] ^= a_pow(bch, (j+1)*(i+s)); + + poly ^= (1 << i); + } + } while (s > 0); + + /* v(a^(2j)) = v(a^j)^2 */ + for (j = 0; j < t; j++) + syn[2*j+1] = gf_sqr(bch, syn[j]); +} + +static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src) +{ + memcpy(dst, src, GF_POLY_SZ(src->deg)); +} + +static int compute_error_locator_polynomial(struct bch_control *bch, + const unsigned int *syn) +{ + const unsigned int t = GF_T(bch); + const unsigned int n = GF_N(bch); + unsigned int i, j, tmp, l, pd = 1, d = syn[0]; + struct gf_poly *elp = bch->elp; + struct gf_poly *pelp = bch->poly_2t[0]; + struct gf_poly *elp_copy = bch->poly_2t[1]; + int k, pp = -1; + + memset(pelp, 0, GF_POLY_SZ(2*t)); + memset(elp, 0, GF_POLY_SZ(2*t)); + + pelp->deg = 0; + pelp->c[0] = 1; + elp->deg = 0; + elp->c[0] = 1; + + /* use simplified binary Berlekamp-Massey algorithm */ + for (i = 0; (i < t) && (elp->deg <= t); i++) { + if (d) { + k = 2*i-pp; + gf_poly_copy(elp_copy, elp); + /* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */ + tmp = a_log(bch, d)+n-a_log(bch, pd); + for (j = 0; j <= pelp->deg; j++) { + if (pelp->c[j]) { + l = a_log(bch, pelp->c[j]); + elp->c[j+k] ^= a_pow(bch, tmp+l); + } + } + /* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */ + tmp = pelp->deg+k; + if (tmp > elp->deg) { + elp->deg = tmp; + gf_poly_copy(pelp, elp_copy); + pd = d; + pp = 2*i; + } + } + /* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */ + if (i < t-1) { + d = syn[2*i+2]; + for (j = 1; j <= elp->deg; j++) + d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]); + } + } + dbg("elp=%s\n", gf_poly_str(elp)); + return (elp->deg > t) ? -1 : (int)elp->deg; +} + +/* + * solve a m x m linear system in GF(2) with an expected number of solutions, + * and return the number of found solutions + */ +static int solve_linear_system(struct bch_control *bch, unsigned int *rows, + unsigned int *sol, int nsol) +{ + const int m = GF_M(bch); + unsigned int tmp, mask; + int rem, c, r, p, k, param[m]; + + k = 0; + mask = 1 << m; + + /* Gaussian elimination */ + for (c = 0; c < m; c++) { + rem = 0; + p = c-k; + /* find suitable row for elimination */ + for (r = p; r < m; r++) { + if (rows[r] & mask) { + if (r != p) { + tmp = rows[r]; + rows[r] = rows[p]; + rows[p] = tmp; + } + rem = r+1; + break; + } + } + if (rem) { + /* perform elimination on remaining rows */ + tmp = rows[p]; + for (r = rem; r < m; r++) { + if (rows[r] & mask) + rows[r] ^= tmp; + } + } else { + /* elimination not needed, store defective row index */ + param[k++] = c; + } + mask >>= 1; + } + /* rewrite system, inserting fake parameter rows */ + if (k > 0) { + p = k; + for (r = m-1; r >= 0; r--) { + if ((r > m-1-k) && rows[r]) + /* system has no solution */ + return 0; + + rows[r] = (p && (r == param[p-1])) ? + p--, 1u << (m-r) : rows[r-p]; + } + } + + if (nsol != (1 << k)) + /* unexpected number of solutions */ + return 0; + + for (p = 0; p < nsol; p++) { + /* set parameters for p-th solution */ + for (c = 0; c < k; c++) + rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1); + + /* compute unique solution */ + tmp = 0; + for (r = m-1; r >= 0; r--) { + mask = rows[r] & (tmp|1); + tmp |= parity(mask) << (m-r); + } + sol[p] = tmp >> 1; + } + return nsol; +} + +/* + * this function builds and solves a linear system for finding roots of a degree + * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m). + */ +static int find_affine4_roots(struct bch_control *bch, unsigned int a, + unsigned int b, unsigned int c, + unsigned int *roots) +{ + int i, j, k; + const int m = GF_M(bch); + unsigned int mask = 0xff, t, rows[16] = {0,}; + + j = a_log(bch, b); + k = a_log(bch, a); + rows[0] = c; + + /* buid linear system to solve X^4+aX^2+bX+c = 0 */ + for (i = 0; i < m; i++) { + rows[i+1] = bch->a_pow_tab[4*i]^ + (a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^ + (b ? bch->a_pow_tab[mod_s(bch, j)] : 0); + j++; + k += 2; + } + /* + * transpose 16x16 matrix before passing it to linear solver + * warning: this code assumes m < 16 + */ + for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) { + for (k = 0; k < 16; k = (k+j+1) & ~j) { + t = ((rows[k] >> j)^rows[k+j]) & mask; + rows[k] ^= (t << j); + rows[k+j] ^= t; + } + } + return solve_linear_system(bch, rows, roots, 4); +} + +/* + * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r)) + */ +static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly, + unsigned int *roots) +{ + int n = 0; + + if (poly->c[0]) + /* poly[X] = bX+c with c!=0, root=c/b */ + roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+ + bch->a_log_tab[poly->c[1]]); + return n; +} + +/* + * compute roots of a degree 2 polynomial over GF(2^m) + */ +static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly, + unsigned int *roots) +{ + int n = 0, i, l0, l1, l2; + unsigned int u, v, r; + + if (poly->c[0] && poly->c[1]) { + + l0 = bch->a_log_tab[poly->c[0]]; + l1 = bch->a_log_tab[poly->c[1]]; + l2 = bch->a_log_tab[poly->c[2]]; + + /* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */ + u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1)); + /* + * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi): + * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) = + * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u) + * i.e. r and r+1 are roots iff Tr(u)=0 + */ + r = 0; + v = u; + while (v) { + i = deg(v); + r ^= bch->xi_tab[i]; + v ^= (1 << i); + } + /* verify root */ + if ((gf_sqr(bch, r)^r) == u) { + /* reverse z=a/bX transformation and compute log(1/r) */ + roots[n++] = modulo(bch, 2*GF_N(bch)-l1- + bch->a_log_tab[r]+l2); + roots[n++] = modulo(bch, 2*GF_N(bch)-l1- + bch->a_log_tab[r^1]+l2); + } + } + return n; +} + +/* + * compute roots of a degree 3 polynomial over GF(2^m) + */ +static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly, + unsigned int *roots) +{ + int i, n = 0; + unsigned int a, b, c, a2, b2, c2, e3, tmp[4]; + + if (poly->c[0]) { + /* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */ + e3 = poly->c[3]; + c2 = gf_div(bch, poly->c[0], e3); + b2 = gf_div(bch, poly->c[1], e3); + a2 = gf_div(bch, poly->c[2], e3); + + /* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */ + c = gf_mul(bch, a2, c2); /* c = a2c2 */ + b = gf_mul(bch, a2, b2)^c2; /* b = a2b2 + c2 */ + a = gf_sqr(bch, a2)^b2; /* a = a2^2 + b2 */ + + /* find the 4 roots of this affine polynomial */ + if (find_affine4_roots(bch, a, b, c, tmp) == 4) { + /* remove a2 from final list of roots */ + for (i = 0; i < 4; i++) { + if (tmp[i] != a2) + roots[n++] = a_ilog(bch, tmp[i]); + } + } + } + return n; +} + +/* + * compute roots of a degree 4 polynomial over GF(2^m) + */ +static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly, + unsigned int *roots) +{ + int i, l, n = 0; + unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4; + + if (poly->c[0] == 0) + return 0; + + /* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */ + e4 = poly->c[4]; + d = gf_div(bch, poly->c[0], e4); + c = gf_div(bch, poly->c[1], e4); + b = gf_div(bch, poly->c[2], e4); + a = gf_div(bch, poly->c[3], e4); + + /* use Y=1/X transformation to get an affine polynomial */ + if (a) { + /* first, eliminate cX by using z=X+e with ae^2+c=0 */ + if (c) { + /* compute e such that e^2 = c/a */ + f = gf_div(bch, c, a); + l = a_log(bch, f); + l += (l & 1) ? GF_N(bch) : 0; + e = a_pow(bch, l/2); + /* + * use transformation z=X+e: + * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d + * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d + * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d + * z^4 + az^3 + b'z^2 + d' + */ + d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d; + b = gf_mul(bch, a, e)^b; + } + /* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */ + if (d == 0) + /* assume all roots have multiplicity 1 */ + return 0; + + c2 = gf_inv(bch, d); + b2 = gf_div(bch, a, d); + a2 = gf_div(bch, b, d); + } else { + /* polynomial is already affine */ + c2 = d; + b2 = c; + a2 = b; + } + /* find the 4 roots of this affine polynomial */ + if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) { + for (i = 0; i < 4; i++) { + /* post-process roots (reverse transformations) */ + f = a ? gf_inv(bch, roots[i]) : roots[i]; + roots[i] = a_ilog(bch, f^e); + } + n = 4; + } + return n; +} + +/* + * build monic, log-based representation of a polynomial + */ +static void gf_poly_logrep(struct bch_control *bch, + const struct gf_poly *a, int *rep) +{ + int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]); + + /* represent 0 values with -1; warning, rep[d] is not set to 1 */ + for (i = 0; i < d; i++) + rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1; +} + +/* + * compute polynomial Euclidean division remainder in GF(2^m)[X] + */ +static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a, + const struct gf_poly *b, int *rep) +{ + int la, p, m; + unsigned int i, j, *c = a->c; + const unsigned int d = b->deg; + + if (a->deg < d) + return; + + /* reuse or compute log representation of denominator */ + if (!rep) { + rep = bch->cache; + gf_poly_logrep(bch, b, rep); + } + + for (j = a->deg; j >= d; j--) { + if (c[j]) { + la = a_log(bch, c[j]); + p = j-d; + for (i = 0; i < d; i++, p++) { + m = rep[i]; + if (m >= 0) + c[p] ^= bch->a_pow_tab[mod_s(bch, + m+la)]; + } + } + } + a->deg = d-1; + while (!c[a->deg] && a->deg) + a->deg--; +} + +/* + * compute polynomial Euclidean division quotient in GF(2^m)[X] + */ +static void gf_poly_div(struct bch_control *bch, struct gf_poly *a, + const struct gf_poly *b, struct gf_poly *q) +{ + if (a->deg >= b->deg) { + q->deg = a->deg-b->deg; + /* compute a mod b (modifies a) */ + gf_poly_mod(bch, a, b, NULL); + /* quotient is stored in upper part of polynomial a */ + memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int)); + } else { + q->deg = 0; + q->c[0] = 0; + } +} + +/* + * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X] + */ +static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a, + struct gf_poly *b) +{ + struct gf_poly *tmp; + + dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b)); + + if (a->deg < b->deg) { + tmp = b; + b = a; + a = tmp; + } + + while (b->deg > 0) { + gf_poly_mod(bch, a, b, NULL); + tmp = b; + b = a; + a = tmp; + } + + dbg("%s\n", gf_poly_str(a)); + + return a; +} + +/* + * Given a polynomial f and an integer k, compute Tr(a^kX) mod f + * This is used in Berlekamp Trace algorithm for splitting polynomials + */ +static void compute_trace_bk_mod(struct bch_control *bch, int k, + const struct gf_poly *f, struct gf_poly *z, + struct gf_poly *out) +{ + const int m = GF_M(bch); + int i, j; + + /* z contains z^2j mod f */ + z->deg = 1; + z->c[0] = 0; + z->c[1] = bch->a_pow_tab[k]; + + out->deg = 0; + memset(out, 0, GF_POLY_SZ(f->deg)); + + /* compute f log representation only once */ + gf_poly_logrep(bch, f, bch->cache); + + for (i = 0; i < m; i++) { + /* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */ + for (j = z->deg; j >= 0; j--) { + out->c[j] ^= z->c[j]; + z->c[2*j] = gf_sqr(bch, z->c[j]); + z->c[2*j+1] = 0; + } + if (z->deg > out->deg) + out->deg = z->deg; + + if (i < m-1) { + z->deg *= 2; + /* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */ + gf_poly_mod(bch, z, f, bch->cache); + } + } + while (!out->c[out->deg] && out->deg) + out->deg--; + + dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out)); +} + +/* + * factor a polynomial using Berlekamp Trace algorithm (BTA) + */ +static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f, + struct gf_poly **g, struct gf_poly **h) +{ + struct gf_poly *f2 = bch->poly_2t[0]; + struct gf_poly *q = bch->poly_2t[1]; + struct gf_poly *tk = bch->poly_2t[2]; + struct gf_poly *z = bch->poly_2t[3]; + struct gf_poly *gcd; + + dbg("factoring %s...\n", gf_poly_str(f)); + + *g = f; + *h = NULL; + + /* tk = Tr(a^k.X) mod f */ + compute_trace_bk_mod(bch, k, f, z, tk); + + if (tk->deg > 0) { + /* compute g = gcd(f, tk) (destructive operation) */ + gf_poly_copy(f2, f); + gcd = gf_poly_gcd(bch, f2, tk); + if (gcd->deg < f->deg) { + /* compute h=f/gcd(f,tk); this will modify f and q */ + gf_poly_div(bch, f, gcd, q); + /* store g and h in-place (clobbering f) */ + *h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly; + gf_poly_copy(*g, gcd); + gf_poly_copy(*h, q); + } + } +} + +/* + * find roots of a polynomial, using BTZ algorithm; see the beginning of this + * file for details + */ +static int find_poly_roots(struct bch_control *bch, unsigned int k, + struct gf_poly *poly, unsigned int *roots) +{ + int cnt; + struct gf_poly *f1, *f2; + + switch (poly->deg) { + /* handle low degree polynomials with ad hoc techniques */ + case 1: + cnt = find_poly_deg1_roots(bch, poly, roots); + break; + case 2: + cnt = find_poly_deg2_roots(bch, poly, roots); + break; + case 3: + cnt = find_poly_deg3_roots(bch, poly, roots); + break; + case 4: + cnt = find_poly_deg4_roots(bch, poly, roots); + break; + default: + /* factor polynomial using Berlekamp Trace Algorithm (BTA) */ + cnt = 0; + if (poly->deg && (k <= GF_M(bch))) { + factor_polynomial(bch, k, poly, &f1, &f2); + if (f1) + cnt += find_poly_roots(bch, k+1, f1, roots); + if (f2) + cnt += find_poly_roots(bch, k+1, f2, roots+cnt); + } + break; + } + return cnt; +} + +#if defined(USE_CHIEN_SEARCH) +/* + * exhaustive root search (Chien) implementation - not used, included only for + * reference/comparison tests + */ +static int chien_search(struct bch_control *bch, unsigned int len, + struct gf_poly *p, unsigned int *roots) +{ + int m; + unsigned int i, j, syn, syn0, count = 0; + const unsigned int k = 8*len+bch->ecc_bits; + + /* use a log-based representation of polynomial */ + gf_poly_logrep(bch, p, bch->cache); + bch->cache[p->deg] = 0; + syn0 = gf_div(bch, p->c[0], p->c[p->deg]); + + for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) { + /* compute elp(a^i) */ + for (j = 1, syn = syn0; j <= p->deg; j++) { + m = bch->cache[j]; + if (m >= 0) + syn ^= a_pow(bch, m+j*i); + } + if (syn == 0) { + roots[count++] = GF_N(bch)-i; + if (count == p->deg) + break; + } + } + return (count == p->deg) ? count : 0; +} +#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc) +#endif /* USE_CHIEN_SEARCH */ + +/** + * decode_bch - decode received codeword and find bit error locations + * @bch: BCH control structure + * @data: received data, ignored if @calc_ecc is provided + * @len: data length in bytes, must always be provided + * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc + * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data + * @syn: hw computed syndrome data (if NULL, syndrome is calculated) + * @errloc: output array of error locations + * + * Returns: + * The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if + * invalid parameters were provided + * + * Depending on the available hw BCH support and the need to compute @calc_ecc + * separately (using encode_bch()), this function should be called with one of + * the following parameter configurations - + * + * by providing @data and @recv_ecc only: + * decode_bch(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc) + * + * by providing @recv_ecc and @calc_ecc: + * decode_bch(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc) + * + * by providing ecc = recv_ecc XOR calc_ecc: + * decode_bch(@bch, NULL, @len, NULL, ecc, NULL, @errloc) + * + * by providing syndrome results @syn: + * decode_bch(@bch, NULL, @len, NULL, NULL, @syn, @errloc) + * + * Once decode_bch() has successfully returned with a positive value, error + * locations returned in array @errloc should be interpreted as follows - + * + * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for + * data correction) + * + * if (errloc[n] < 8*len), then n-th error is located in data and can be + * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8); + * + * Note that this function does not perform any data correction by itself, it + * merely indicates error locations. + */ +int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len, + const uint8_t *recv_ecc, const uint8_t *calc_ecc, + const unsigned int *syn, unsigned int *errloc) +{ + const unsigned int ecc_words = BCH_ECC_WORDS(bch); + unsigned int nbits; + int i, err, nroots; + uint32_t sum; + + /* sanity check: make sure data length can be handled */ + if (8*len > (bch->n-bch->ecc_bits)) + return -EINVAL; + + /* if caller does not provide syndromes, compute them */ + if (!syn) { + if (!calc_ecc) { + /* compute received data ecc into an internal buffer */ + if (!data || !recv_ecc) + return -EINVAL; + encode_bch(bch, data, len, NULL); + } else { + /* load provided calculated ecc */ + load_ecc8(bch, bch->ecc_buf, calc_ecc); + } + /* load received ecc or assume it was XORed in calc_ecc */ + if (recv_ecc) { + load_ecc8(bch, bch->ecc_buf2, recv_ecc); + /* XOR received and calculated ecc */ + for (i = 0, sum = 0; i < (int)ecc_words; i++) { + bch->ecc_buf[i] ^= bch->ecc_buf2[i]; + sum |= bch->ecc_buf[i]; + } + if (!sum) + /* no error found */ + return 0; + } + compute_syndromes(bch, bch->ecc_buf, bch->syn); + syn = bch->syn; + } + + err = compute_error_locator_polynomial(bch, syn); + if (err > 0) { + nroots = find_poly_roots(bch, 1, bch->elp, errloc); + if (err != nroots) + err = -1; + } + if (err > 0) { + /* post-process raw error locations for easier correction */ + nbits = (len*8)+bch->ecc_bits; + for (i = 0; i < err; i++) { + if (errloc[i] >= nbits) { + err = -1; + break; + } + errloc[i] = nbits-1-errloc[i]; + errloc[i] = (errloc[i] & ~7)|(7-(errloc[i] & 7)); + } + } + return (err >= 0) ? err : -EBADMSG; +} +EXPORT_SYMBOL_GPL(decode_bch); + +/* + * generate Galois field lookup tables + */ +static int build_gf_tables(struct bch_control *bch, unsigned int poly) +{ + unsigned int i, x = 1; + const unsigned int k = 1 << deg(poly); + + /* primitive polynomial must be of degree m */ + if (k != (1u << GF_M(bch))) + return -1; + + for (i = 0; i < GF_N(bch); i++) { + bch->a_pow_tab[i] = x; + bch->a_log_tab[x] = i; + if (i && (x == 1)) + /* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */ + return -1; + x <<= 1; + if (x & k) + x ^= poly; + } + bch->a_pow_tab[GF_N(bch)] = 1; + bch->a_log_tab[0] = 0; + + return 0; +} + +/* + * compute generator polynomial remainder tables for fast encoding + */ +static void build_mod8_tables(struct bch_control *bch, const uint32_t *g) +{ + int i, j, b, d; + uint32_t data, hi, lo, *tab; + const int l = BCH_ECC_WORDS(bch); + const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32); + const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32); + + memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab)); + + for (i = 0; i < 256; i++) { + /* p(X)=i is a small polynomial of weight <= 8 */ + for (b = 0; b < 4; b++) { + /* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */ + tab = bch->mod8_tab + (b*256+i)*l; + data = i << (8*b); + while (data) { + d = deg(data); + /* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */ + data ^= g[0] >> (31-d); + for (j = 0; j < ecclen; j++) { + hi = (d < 31) ? g[j] << (d+1) : 0; + lo = (j+1 < plen) ? + g[j+1] >> (31-d) : 0; + tab[j] ^= hi|lo; + } + } + } + } +} + +/* + * build a base for factoring degree 2 polynomials + */ +static int build_deg2_base(struct bch_control *bch) +{ + const int m = GF_M(bch); + int i, j, r; + unsigned int sum, x, y, remaining, ak = 0, xi[m]; + + /* find k s.t. Tr(a^k) = 1 and 0 <= k < m */ + for (i = 0; i < m; i++) { + for (j = 0, sum = 0; j < m; j++) + sum ^= a_pow(bch, i*(1 << j)); + + if (sum) { + ak = bch->a_pow_tab[i]; + break; + } + } + /* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */ + remaining = m; + memset(xi, 0, sizeof(xi)); + + for (x = 0; (x <= GF_N(bch)) && remaining; x++) { + y = gf_sqr(bch, x)^x; + for (i = 0; i < 2; i++) { + r = a_log(bch, y); + if (y && (r < m) && !xi[r]) { + bch->xi_tab[r] = x; + xi[r] = 1; + remaining--; + dbg("x%d = %x\n", r, x); + break; + } + y ^= ak; + } + } + /* should not happen but check anyway */ + return remaining ? -1 : 0; +} + +static void *bch_alloc(size_t size, int *err) +{ + void *ptr; + + ptr = kmalloc(size, GFP_KERNEL); + if (ptr == NULL) + *err = 1; + return ptr; +} + +/* + * compute generator polynomial for given (m,t) parameters. + */ +static uint32_t *compute_generator_polynomial(struct bch_control *bch) +{ + const unsigned int m = GF_M(bch); + const unsigned int t = GF_T(bch); + int n, err = 0; + unsigned int i, j, nbits, r, word, *roots; + struct gf_poly *g; + uint32_t *genpoly; + + g = bch_alloc(GF_POLY_SZ(m*t), &err); + roots = bch_alloc((bch->n+1)*sizeof(*roots), &err); + genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err); + + if (err) { + kfree(genpoly); + genpoly = NULL; + goto finish; + } + + /* enumerate all roots of g(X) */ + memset(roots , 0, (bch->n+1)*sizeof(*roots)); + for (i = 0; i < t; i++) { + for (j = 0, r = 2*i+1; j < m; j++) { + roots[r] = 1; + r = mod_s(bch, 2*r); + } + } + /* build generator polynomial g(X) */ + g->deg = 0; + g->c[0] = 1; + for (i = 0; i < GF_N(bch); i++) { + if (roots[i]) { + /* multiply g(X) by (X+root) */ + r = bch->a_pow_tab[i]; + g->c[g->deg+1] = 1; + for (j = g->deg; j > 0; j--) + g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1]; + + g->c[0] = gf_mul(bch, g->c[0], r); + g->deg++; + } + } + /* store left-justified binary representation of g(X) */ + n = g->deg+1; + i = 0; + + while (n > 0) { + nbits = (n > 32) ? 32 : n; + for (j = 0, word = 0; j < nbits; j++) { + if (g->c[n-1-j]) + word |= 1u << (31-j); + } + genpoly[i++] = word; + n -= nbits; + } + bch->ecc_bits = g->deg; + +finish: + kfree(g); + kfree(roots); + + return genpoly; +} + +/** + * init_bch - initialize a BCH encoder/decoder + * @m: Galois field order, should be in the range 5-15 + * @t: maximum error correction capability, in bits + * @prim_poly: user-provided primitive polynomial (or 0 to use default) + * + * Returns: + * a newly allocated BCH control structure if successful, NULL otherwise + * + * This initialization can take some time, as lookup tables are built for fast + * encoding/decoding; make sure not to call this function from a time critical + * path. Usually, init_bch() should be called on module/driver init and + * free_bch() should be called to release memory on exit. + * + * You may provide your own primitive polynomial of degree @m in argument + * @prim_poly, or let init_bch() use its default polynomial. + * + * Once init_bch() has successfully returned a pointer to a newly allocated + * BCH control structure, ecc length in bytes is given by member @ecc_bytes of + * the structure. + */ +struct bch_control *init_bch(int m, int t, unsigned int prim_poly) +{ + int err = 0; + unsigned int i, words; + uint32_t *genpoly; + struct bch_control *bch = NULL; + + const int min_m = 5; + const int max_m = 15; + + /* default primitive polynomials */ + static const unsigned int prim_poly_tab[] = { + 0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b, + 0x402b, 0x8003, + }; + +#if defined(CONFIG_BCH_CONST_PARAMS) + if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) { + printk(KERN_ERR "bch encoder/decoder was configured to support " + "parameters m=%d, t=%d only!\n", + CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T); + goto fail; + } +#endif + if ((m < min_m) || (m > max_m)) + /* + * values of m greater than 15 are not currently supported; + * supporting m > 15 would require changing table base type + * (uint16_t) and a small patch in matrix transposition + */ + goto fail; + + /* sanity checks */ + if ((t < 1) || (m*t >= ((1 << m)-1))) + /* invalid t value */ + goto fail; + + /* select a primitive polynomial for generating GF(2^m) */ + if (prim_poly == 0) + prim_poly = prim_poly_tab[m-min_m]; + + bch = kzalloc(sizeof(*bch), GFP_KERNEL); + if (bch == NULL) + goto fail; + + bch->m = m; + bch->t = t; + bch->n = (1 << m)-1; + words = DIV_ROUND_UP(m*t, 32); + bch->ecc_bytes = DIV_ROUND_UP(m*t, 8); + bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err); + bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err); + bch->mod8_tab = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err); + bch->ecc_buf = bch_alloc(words*sizeof(*bch->ecc_buf), &err); + bch->ecc_buf2 = bch_alloc(words*sizeof(*bch->ecc_buf2), &err); + bch->xi_tab = bch_alloc(m*sizeof(*bch->xi_tab), &err); + bch->syn = bch_alloc(2*t*sizeof(*bch->syn), &err); + bch->cache = bch_alloc(2*t*sizeof(*bch->cache), &err); + bch->elp = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err); + + for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++) + bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err); + + if (err) + goto fail; + + err = build_gf_tables(bch, prim_poly); + if (err) + goto fail; + + /* use generator polynomial for computing encoding tables */ + genpoly = compute_generator_polynomial(bch); + if (genpoly == NULL) + goto fail; + + build_mod8_tables(bch, genpoly); + kfree(genpoly); + + err = build_deg2_base(bch); + if (err) + goto fail; + + return bch; + +fail: + free_bch(bch); + return NULL; +} +EXPORT_SYMBOL_GPL(init_bch); + +/** + * free_bch - free the BCH control structure + * @bch: BCH control structure to release + */ +void free_bch(struct bch_control *bch) +{ + unsigned int i; + + if (bch) { + kfree(bch->a_pow_tab); + kfree(bch->a_log_tab); + kfree(bch->mod8_tab); + kfree(bch->ecc_buf); + kfree(bch->ecc_buf2); + kfree(bch->xi_tab); + kfree(bch->syn); + kfree(bch->cache); + kfree(bch->elp); + + for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++) + kfree(bch->poly_2t[i]); + + kfree(bch); + } +} +EXPORT_SYMBOL_GPL(free_bch); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>"); +MODULE_DESCRIPTION("Binary BCH encoder/decoder"); diff --git a/lib/bitmap.c b/lib/bitmap.c index 741fae905ae3..91e0ccfdb424 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -830,7 +830,7 @@ EXPORT_SYMBOL(bitmap_bitremap); * @orig (i.e. bits 3, 5, 7 and 9) were also set. * * When bit 11 is set in @orig, it means turn on the bit in - * @dst corresponding to whatever is the twelth bit that is + * @dst corresponding to whatever is the twelfth bit that is * turned on in @relmap. In the above example, there were * only ten bits turned on in @relmap (30..39), so that bit * 11 was set in @orig had no affect on @dst. diff --git a/lib/btree.c b/lib/btree.c index c9c6f0351526..2a34392bcecc 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -11,7 +11,7 @@ * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch * * A relatively simple B+Tree implementation. I have written it as a learning - * excercise to understand how B+Trees work. Turned out to be useful as well. + * exercise to understand how B+Trees work. Turned out to be useful as well. * * B+Trees can be used similar to Linux radix trees (which don't have anything * in common with textbook radix trees, beware). Prerequisite for them working @@ -541,7 +541,7 @@ static void rebalance(struct btree_head *head, struct btree_geo *geo, int i, no_left, no_right; if (fill == 0) { - /* Because we don't steal entries from a neigbour, this case + /* Because we don't steal entries from a neighbour, this case * can happen. Parent node contains a single child, this * node, so merging with a sibling never happens. */ diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c new file mode 100644 index 000000000000..987acfafeb83 --- /dev/null +++ b/lib/cpu_rmap.c @@ -0,0 +1,269 @@ +/* + * cpu_rmap.c: CPU affinity reverse-map support + * Copyright 2011 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include <linux/cpu_rmap.h> +#ifdef CONFIG_GENERIC_HARDIRQS +#include <linux/interrupt.h> +#endif +#include <linux/module.h> + +/* + * These functions maintain a mapping from CPUs to some ordered set of + * objects with CPU affinities. This can be seen as a reverse-map of + * CPU affinity. However, we do not assume that the object affinities + * cover all CPUs in the system. For those CPUs not directly covered + * by object affinities, we attempt to find a nearest object based on + * CPU topology. + */ + +/** + * alloc_cpu_rmap - allocate CPU affinity reverse-map + * @size: Number of objects to be mapped + * @flags: Allocation flags e.g. %GFP_KERNEL + */ +struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) +{ + struct cpu_rmap *rmap; + unsigned int cpu; + size_t obj_offset; + + /* This is a silly number of objects, and we use u16 indices. */ + if (size > 0xffff) + return NULL; + + /* Offset of object pointer array from base structure */ + obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]), + sizeof(void *)); + + rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags); + if (!rmap) + return NULL; + + rmap->obj = (void **)((char *)rmap + obj_offset); + + /* Initially assign CPUs to objects on a rota, since we have + * no idea where the objects are. Use infinite distance, so + * any object with known distance is preferable. Include the + * CPUs that are not present/online, since we definitely want + * any newly-hotplugged CPUs to have some object assigned. + */ + for_each_possible_cpu(cpu) { + rmap->near[cpu].index = cpu % size; + rmap->near[cpu].dist = CPU_RMAP_DIST_INF; + } + + rmap->size = size; + return rmap; +} +EXPORT_SYMBOL(alloc_cpu_rmap); + +/* Reevaluate nearest object for given CPU, comparing with the given + * neighbours at the given distance. + */ +static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu, + const struct cpumask *mask, u16 dist) +{ + int neigh; + + for_each_cpu(neigh, mask) { + if (rmap->near[cpu].dist > dist && + rmap->near[neigh].dist <= dist) { + rmap->near[cpu].index = rmap->near[neigh].index; + rmap->near[cpu].dist = dist; + return true; + } + } + return false; +} + +#ifdef DEBUG +static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) +{ + unsigned index; + unsigned int cpu; + + pr_info("cpu_rmap %p, %s:\n", rmap, prefix); + + for_each_possible_cpu(cpu) { + index = rmap->near[cpu].index; + pr_info("cpu %d -> obj %u (distance %u)\n", + cpu, index, rmap->near[cpu].dist); + } +} +#else +static inline void +debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) +{ +} +#endif + +/** + * cpu_rmap_add - add object to a rmap + * @rmap: CPU rmap allocated with alloc_cpu_rmap() + * @obj: Object to add to rmap + * + * Return index of object. + */ +int cpu_rmap_add(struct cpu_rmap *rmap, void *obj) +{ + u16 index; + + BUG_ON(rmap->used >= rmap->size); + index = rmap->used++; + rmap->obj[index] = obj; + return index; +} +EXPORT_SYMBOL(cpu_rmap_add); + +/** + * cpu_rmap_update - update CPU rmap following a change of object affinity + * @rmap: CPU rmap to update + * @index: Index of object whose affinity changed + * @affinity: New CPU affinity of object + */ +int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, + const struct cpumask *affinity) +{ + cpumask_var_t update_mask; + unsigned int cpu; + + if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL))) + return -ENOMEM; + + /* Invalidate distance for all CPUs for which this used to be + * the nearest object. Mark those CPUs for update. + */ + for_each_online_cpu(cpu) { + if (rmap->near[cpu].index == index) { + rmap->near[cpu].dist = CPU_RMAP_DIST_INF; + cpumask_set_cpu(cpu, update_mask); + } + } + + debug_print_rmap(rmap, "after invalidating old distances"); + + /* Set distance to 0 for all CPUs in the new affinity mask. + * Mark all CPUs within their NUMA nodes for update. + */ + for_each_cpu(cpu, affinity) { + rmap->near[cpu].index = index; + rmap->near[cpu].dist = 0; + cpumask_or(update_mask, update_mask, + cpumask_of_node(cpu_to_node(cpu))); + } + + debug_print_rmap(rmap, "after updating neighbours"); + + /* Update distances based on topology */ + for_each_cpu(cpu, update_mask) { + if (cpu_rmap_copy_neigh(rmap, cpu, + topology_thread_cpumask(cpu), 1)) + continue; + if (cpu_rmap_copy_neigh(rmap, cpu, + topology_core_cpumask(cpu), 2)) + continue; + if (cpu_rmap_copy_neigh(rmap, cpu, + cpumask_of_node(cpu_to_node(cpu)), 3)) + continue; + /* We could continue into NUMA node distances, but for now + * we give up. + */ + } + + debug_print_rmap(rmap, "after copying neighbours"); + + free_cpumask_var(update_mask); + return 0; +} +EXPORT_SYMBOL(cpu_rmap_update); + +#ifdef CONFIG_GENERIC_HARDIRQS + +/* Glue between IRQ affinity notifiers and CPU rmaps */ + +struct irq_glue { + struct irq_affinity_notify notify; + struct cpu_rmap *rmap; + u16 index; +}; + +/** + * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs + * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL + * + * Must be called in process context, before freeing the IRQs, and + * without holding any locks required by global workqueue items. + */ +void free_irq_cpu_rmap(struct cpu_rmap *rmap) +{ + struct irq_glue *glue; + u16 index; + + if (!rmap) + return; + + for (index = 0; index < rmap->used; index++) { + glue = rmap->obj[index]; + irq_set_affinity_notifier(glue->notify.irq, NULL); + } + irq_run_affinity_notifiers(); + + kfree(rmap); +} +EXPORT_SYMBOL(free_irq_cpu_rmap); + +static void +irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) +{ + struct irq_glue *glue = + container_of(notify, struct irq_glue, notify); + int rc; + + rc = cpu_rmap_update(glue->rmap, glue->index, mask); + if (rc) + pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc); +} + +static void irq_cpu_rmap_release(struct kref *ref) +{ + struct irq_glue *glue = + container_of(ref, struct irq_glue, notify.kref); + kfree(glue); +} + +/** + * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map + * @rmap: The reverse-map + * @irq: The IRQ number + * + * This adds an IRQ affinity notifier that will update the reverse-map + * automatically. + * + * Must be called in process context, after the IRQ is allocated but + * before it is bound with request_irq(). + */ +int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) +{ + struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL); + int rc; + + if (!glue) + return -ENOMEM; + glue->notify.notify = irq_cpu_rmap_notify; + glue->notify.release = irq_cpu_rmap_release; + glue->rmap = rmap; + glue->index = cpu_rmap_add(rmap, glue); + rc = irq_set_affinity_notifier(irq, &glue->notify); + if (rc) + kfree(glue); + return rc; +} +EXPORT_SYMBOL(irq_cpu_rmap_add); + +#endif /* CONFIG_GENERIC_HARDIRQS */ diff --git a/lib/debugobjects.c b/lib/debugobjects.c index deebcc57d4e6..9d86e45086f5 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -249,14 +249,17 @@ static struct debug_bucket *get_bucket(unsigned long addr) static void debug_print_object(struct debug_obj *obj, char *msg) { + struct debug_obj_descr *descr = obj->descr; static int limit; - if (limit < 5 && obj->descr != descr_test) { + if (limit < 5 && descr != descr_test) { + void *hint = descr->debug_hint ? + descr->debug_hint(obj->object) : NULL; limit++; WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " - "object type: %s\n", + "object type: %s hint: %pS\n", msg, obj_states[obj->state], obj->astate, - obj->descr->name); + descr->name, hint); } debug_objects_warnings++; } diff --git a/lib/decompress.c b/lib/decompress.c index a7606815541f..3d766b7f60ab 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -8,6 +8,7 @@ #include <linux/decompress/bunzip2.h> #include <linux/decompress/unlzma.h> +#include <linux/decompress/unxz.h> #include <linux/decompress/inflate.h> #include <linux/decompress/unlzo.h> @@ -23,6 +24,9 @@ #ifndef CONFIG_DECOMPRESS_LZMA # define unlzma NULL #endif +#ifndef CONFIG_DECOMPRESS_XZ +# define unxz NULL +#endif #ifndef CONFIG_DECOMPRESS_LZO # define unlzo NULL #endif @@ -36,6 +40,7 @@ static const struct compress_format { { {037, 0236}, "gzip", gunzip }, { {0x42, 0x5a}, "bzip2", bunzip2 }, { {0x5d, 0x00}, "lzma", unlzma }, + { {0xfd, 0x37}, "xz", unxz }, { {0x89, 0x4c}, "lzo", unlzo }, { {0, 0}, NULL, NULL } }; diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 81c8bb1cc6aa..a7b80c1d6a0d 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -49,7 +49,6 @@ #define PREBOOT #else #include <linux/decompress/bunzip2.h> -#include <linux/slab.h> #endif /* STATIC */ #include <linux/decompress/mm.h> @@ -682,13 +681,12 @@ STATIC int INIT bunzip2(unsigned char *buf, int len, int(*flush)(void*, unsigned int), unsigned char *outbuf, int *pos, - void(*error_fn)(char *x)) + void(*error)(char *x)) { struct bunzip_data *bd; int i = -1; unsigned char *inbuf; - set_error_fn(error_fn); if (flush) outbuf = malloc(BZIP2_IOBUF_SIZE); @@ -751,8 +749,8 @@ STATIC int INIT decompress(unsigned char *buf, int len, int(*flush)(void*, unsigned int), unsigned char *outbuf, int *pos, - void(*error_fn)(char *x)) + void(*error)(char *x)) { - return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn); + return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error); } #endif diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index fc686c7a0a0d..19ff89e34eec 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -19,7 +19,6 @@ #include "zlib_inflate/inflate.h" #include "zlib_inflate/infutil.h" -#include <linux/slab.h> #endif /* STATIC */ @@ -27,7 +26,7 @@ #define GZIP_IOBUF_SIZE (16*1024) -static int nofill(void *buffer, unsigned int len) +static int INIT nofill(void *buffer, unsigned int len) { return -1; } @@ -38,13 +37,12 @@ STATIC int INIT gunzip(unsigned char *buf, int len, int(*flush)(void*, unsigned int), unsigned char *out_buf, int *pos, - void(*error_fn)(char *x)) { + void(*error)(char *x)) { u8 *zbuf; struct z_stream_s *strm; int rc; size_t out_len; - set_error_fn(error_fn); rc = -1; if (flush) { out_len = 0x8000; /* 32 K */ @@ -100,13 +98,22 @@ STATIC int INIT gunzip(unsigned char *buf, int len, * possible asciz filename) */ strm->next_in = zbuf + 10; + strm->avail_in = len - 10; /* skip over asciz filename */ if (zbuf[3] & 0x8) { - while (strm->next_in[0]) - strm->next_in++; - strm->next_in++; + do { + /* + * If the filename doesn't fit into the buffer, + * the file is very probably corrupt. Don't try + * to read more data. + */ + if (strm->avail_in == 0) { + error("header error"); + goto gunzip_5; + } + --strm->avail_in; + } while (*strm->next_in++); } - strm->avail_in = len - (strm->next_in - zbuf); strm->next_out = out_buf; strm->avail_out = out_len; diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index ca82fde81c8f..476c65af9709 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -33,7 +33,6 @@ #define PREBOOT #else #include <linux/decompress/unlzma.h> -#include <linux/slab.h> #endif /* STATIC */ #include <linux/decompress/mm.h> @@ -74,6 +73,7 @@ struct rc { uint32_t code; uint32_t range; uint32_t bound; + void (*error)(char *); }; @@ -82,7 +82,7 @@ struct rc { #define RC_MODEL_TOTAL_BITS 11 -static int nofill(void *buffer, unsigned int len) +static int INIT nofill(void *buffer, unsigned int len) { return -1; } @@ -92,7 +92,7 @@ static void INIT rc_read(struct rc *rc) { rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE); if (rc->buffer_size <= 0) - error("unexpected EOF"); + rc->error("unexpected EOF"); rc->ptr = rc->buffer; rc->buffer_end = rc->buffer + rc->buffer_size; } @@ -127,12 +127,6 @@ static inline void INIT rc_init_code(struct rc *rc) } -/* Called once. TODO: bb_maybe_free() */ -static inline void INIT rc_free(struct rc *rc) -{ - free(rc->buffer); -} - /* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */ static void INIT rc_do_normalize(struct rc *rc) { @@ -169,7 +163,7 @@ static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p) rc->range = rc->bound; *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; } -static inline void rc_update_bit_1(struct rc *rc, uint16_t *p) +static inline void INIT rc_update_bit_1(struct rc *rc, uint16_t *p) { rc->range -= rc->bound; rc->code -= rc->bound; @@ -319,32 +313,38 @@ static inline uint8_t INIT peek_old_byte(struct writer *wr, } -static inline void INIT write_byte(struct writer *wr, uint8_t byte) +static inline int INIT write_byte(struct writer *wr, uint8_t byte) { wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte; if (wr->flush && wr->buffer_pos == wr->header->dict_size) { wr->buffer_pos = 0; wr->global_pos += wr->header->dict_size; - wr->flush((char *)wr->buffer, wr->header->dict_size); + if (wr->flush((char *)wr->buffer, wr->header->dict_size) + != wr->header->dict_size) + return -1; } + return 0; } -static inline void INIT copy_byte(struct writer *wr, uint32_t offs) +static inline int INIT copy_byte(struct writer *wr, uint32_t offs) { - write_byte(wr, peek_old_byte(wr, offs)); + return write_byte(wr, peek_old_byte(wr, offs)); } -static inline void INIT copy_bytes(struct writer *wr, +static inline int INIT copy_bytes(struct writer *wr, uint32_t rep0, int len) { do { - copy_byte(wr, rep0); + if (copy_byte(wr, rep0)) + return -1; len--; } while (len != 0 && wr->buffer_pos < wr->header->dst_size); + + return len; } -static inline void INIT process_bit0(struct writer *wr, struct rc *rc, +static inline int INIT process_bit0(struct writer *wr, struct rc *rc, struct cstate *cst, uint16_t *p, int pos_state, uint16_t *prob, int lc, uint32_t literal_pos_mask) { @@ -378,16 +378,17 @@ static inline void INIT process_bit0(struct writer *wr, struct rc *rc, uint16_t *prob_lit = prob + mi; rc_get_bit(rc, prob_lit, &mi); } - write_byte(wr, mi); if (cst->state < 4) cst->state = 0; else if (cst->state < 10) cst->state -= 3; else cst->state -= 6; + + return write_byte(wr, mi); } -static inline void INIT process_bit1(struct writer *wr, struct rc *rc, +static inline int INIT process_bit1(struct writer *wr, struct rc *rc, struct cstate *cst, uint16_t *p, int pos_state, uint16_t *prob) { int offset; @@ -418,8 +419,7 @@ static inline void INIT process_bit1(struct writer *wr, struct rc *rc, cst->state = cst->state < LZMA_NUM_LIT_STATES ? 9 : 11; - copy_byte(wr, cst->rep0); - return; + return copy_byte(wr, cst->rep0); } else { rc_update_bit_1(rc, prob); } @@ -521,12 +521,15 @@ static inline void INIT process_bit1(struct writer *wr, struct rc *rc, } else cst->rep0 = pos_slot; if (++(cst->rep0) == 0) - return; + return 0; + if (cst->rep0 > wr->header->dict_size + || cst->rep0 > get_pos(wr)) + return -1; } len += LZMA_MATCH_MIN_LEN; - copy_bytes(wr, cst->rep0, len); + return copy_bytes(wr, cst->rep0, len); } @@ -536,7 +539,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len, int(*flush)(void*, unsigned int), unsigned char *output, int *posp, - void(*error_fn)(char *x) + void(*error)(char *x) ) { struct lzma_header header; @@ -552,7 +555,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len, unsigned char *inbuf; int ret = -1; - set_error_fn(error_fn); + rc.error = error; if (buf) inbuf = buf; @@ -580,8 +583,10 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len, ((unsigned char *)&header)[i] = *rc.ptr++; } - if (header.pos >= (9 * 5 * 5)) + if (header.pos >= (9 * 5 * 5)) { error("bad header"); + goto exit_1; + } mi = 0; lc = header.pos; @@ -627,21 +632,29 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len, int pos_state = get_pos(&wr) & pos_state_mask; uint16_t *prob = p + LZMA_IS_MATCH + (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state; - if (rc_is_bit_0(&rc, prob)) - process_bit0(&wr, &rc, &cst, p, pos_state, prob, - lc, literal_pos_mask); - else { - process_bit1(&wr, &rc, &cst, p, pos_state, prob); + if (rc_is_bit_0(&rc, prob)) { + if (process_bit0(&wr, &rc, &cst, p, pos_state, prob, + lc, literal_pos_mask)) { + error("LZMA data is corrupt"); + goto exit_3; + } + } else { + if (process_bit1(&wr, &rc, &cst, p, pos_state, prob)) { + error("LZMA data is corrupt"); + goto exit_3; + } if (cst.rep0 == 0) break; } + if (rc.buffer_size <= 0) + goto exit_3; } if (posp) *posp = rc.ptr-rc.buffer; - if (wr.flush) - wr.flush(wr.buffer, wr.buffer_pos); - ret = 0; + if (!wr.flush || wr.flush(wr.buffer, wr.buffer_pos) == wr.buffer_pos) + ret = 0; +exit_3: large_free(p); exit_2: if (!output) @@ -659,9 +672,9 @@ STATIC int INIT decompress(unsigned char *buf, int in_len, int(*flush)(void*, unsigned int), unsigned char *output, int *posp, - void(*error_fn)(char *x) + void(*error)(char *x) ) { - return unlzma(buf, in_len - 4, fill, flush, output, posp, error_fn); + return unlzma(buf, in_len - 4, fill, flush, output, posp, error); } #endif diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c index bcb3a4bd68ff..5a7a2adf4c4c 100644 --- a/lib/decompress_unlzo.c +++ b/lib/decompress_unlzo.c @@ -33,7 +33,6 @@ #ifdef STATIC #include "lzo/lzo1x_decompress.c" #else -#include <linux/slab.h> #include <linux/decompress/unlzo.h> #endif @@ -49,14 +48,25 @@ static const unsigned char lzop_magic[] = { #define LZO_BLOCK_SIZE (256*1024l) #define HEADER_HAS_FILTER 0x00000800L +#define HEADER_SIZE_MIN (9 + 7 + 4 + 8 + 1 + 4) +#define HEADER_SIZE_MAX (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4) -STATIC inline int INIT parse_header(u8 *input, u8 *skip) +STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len) { int l; u8 *parse = input; + u8 *end = input + in_len; u8 level = 0; u16 version; + /* + * Check that there's enough input to possibly have a valid header. + * Then it is possible to parse several fields until the minimum + * size may have been used. + */ + if (in_len < HEADER_SIZE_MIN) + return 0; + /* read magic: 9 first bits */ for (l = 0; l < 9; l++) { if (*parse++ != lzop_magic[l]) @@ -74,6 +84,15 @@ STATIC inline int INIT parse_header(u8 *input, u8 *skip) else parse += 4; /* flags */ + /* + * At least mode, mtime_low, filename length, and checksum must + * be left to be parsed. If also mtime_high is present, it's OK + * because the next input buffer check is after reading the + * filename length. + */ + if (end - parse < 8 + 1 + 4) + return 0; + /* skip mode and mtime_low */ parse += 8; if (version >= 0x0940) @@ -81,6 +100,8 @@ STATIC inline int INIT parse_header(u8 *input, u8 *skip) l = *parse++; /* don't care about the file name, and skip checksum */ + if (end - parse < l + 4) + return 0; parse += l + 4; *skip = parse - input; @@ -91,16 +112,15 @@ STATIC inline int INIT unlzo(u8 *input, int in_len, int (*fill) (void *, unsigned int), int (*flush) (void *, unsigned int), u8 *output, int *posp, - void (*error_fn) (char *x)) + void (*error) (char *x)) { - u8 skip = 0, r = 0; + u8 r = 0; + int skip = 0; u32 src_len, dst_len; size_t tmp; u8 *in_buf, *in_buf_save, *out_buf; int ret = -1; - set_error_fn(error_fn); - if (output) { out_buf = output; } else if (!flush) { @@ -119,8 +139,8 @@ STATIC inline int INIT unlzo(u8 *input, int in_len, goto exit_1; } else if (input) { in_buf = input; - } else if (!fill || !posp) { - error("NULL input pointer and missing position pointer or fill function"); + } else if (!fill) { + error("NULL input pointer and missing fill function"); goto exit_1; } else { in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE)); @@ -134,22 +154,47 @@ STATIC inline int INIT unlzo(u8 *input, int in_len, if (posp) *posp = 0; - if (fill) - fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE)); + if (fill) { + /* + * Start from in_buf + HEADER_SIZE_MAX to make it possible + * to use memcpy() to copy the unused data to the beginning + * of the buffer. This way memmove() isn't needed which + * is missing from pre-boot environments of most archs. + */ + in_buf += HEADER_SIZE_MAX; + in_len = fill(in_buf, HEADER_SIZE_MAX); + } - if (!parse_header(input, &skip)) { + if (!parse_header(in_buf, &skip, in_len)) { error("invalid header"); goto exit_2; } in_buf += skip; + in_len -= skip; + + if (fill) { + /* Move the unused data to the beginning of the buffer. */ + memcpy(in_buf_save, in_buf, in_len); + in_buf = in_buf_save; + } if (posp) *posp = skip; for (;;) { /* read uncompressed block size */ + if (fill && in_len < 4) { + skip = fill(in_buf + in_len, 4 - in_len); + if (skip > 0) + in_len += skip; + } + if (in_len < 4) { + error("file corrupted"); + goto exit_2; + } dst_len = get_unaligned_be32(in_buf); in_buf += 4; + in_len -= 4; /* exit if last block */ if (dst_len == 0) { @@ -164,8 +209,18 @@ STATIC inline int INIT unlzo(u8 *input, int in_len, } /* read compressed block size, and skip block checksum info */ + if (fill && in_len < 8) { + skip = fill(in_buf + in_len, 8 - in_len); + if (skip > 0) + in_len += skip; + } + if (in_len < 8) { + error("file corrupted"); + goto exit_2; + } src_len = get_unaligned_be32(in_buf); in_buf += 8; + in_len -= 8; if (src_len <= 0 || src_len > dst_len) { error("file corrupted"); @@ -173,6 +228,15 @@ STATIC inline int INIT unlzo(u8 *input, int in_len, } /* decompress */ + if (fill && in_len < src_len) { + skip = fill(in_buf + in_len, src_len - in_len); + if (skip > 0) + in_len += skip; + } + if (in_len < src_len) { + error("file corrupted"); + goto exit_2; + } tmp = dst_len; /* When the input data is not compressed at all, @@ -190,17 +254,26 @@ STATIC inline int INIT unlzo(u8 *input, int in_len, } } - if (flush) - flush(out_buf, dst_len); + if (flush && flush(out_buf, dst_len) != dst_len) + goto exit_2; if (output) out_buf += dst_len; if (posp) *posp += src_len + 12; + + in_buf += src_len; + in_len -= src_len; if (fill) { + /* + * If there happens to still be unused data left in + * in_buf, move it to the beginning of the buffer. + * Use a loop to avoid memmove() dependency. + */ + if (in_len > 0) + for (skip = 0; skip < in_len; ++skip) + in_buf_save[skip] = in_buf[skip]; in_buf = in_buf_save; - fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE)); - } else - in_buf += src_len; + } } ret = 0; diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c new file mode 100644 index 000000000000..9f34eb56854d --- /dev/null +++ b/lib/decompress_unxz.c @@ -0,0 +1,397 @@ +/* + * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd + * + * Author: Lasse Collin <lasse.collin@tukaani.org> + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +/* + * Important notes about in-place decompression + * + * At least on x86, the kernel is decompressed in place: the compressed data + * is placed to the end of the output buffer, and the decompressor overwrites + * most of the compressed data. There must be enough safety margin to + * guarantee that the write position is always behind the read position. + * + * The safety margin for XZ with LZMA2 or BCJ+LZMA2 is calculated below. + * Note that the margin with XZ is bigger than with Deflate (gzip)! + * + * The worst case for in-place decompression is that the beginning of + * the file is compressed extremely well, and the rest of the file is + * uncompressible. Thus, we must look for worst-case expansion when the + * compressor is encoding uncompressible data. + * + * The structure of the .xz file in case of a compresed kernel is as follows. + * Sizes (as bytes) of the fields are in parenthesis. + * + * Stream Header (12) + * Block Header: + * Block Header (8-12) + * Compressed Data (N) + * Block Padding (0-3) + * CRC32 (4) + * Index (8-20) + * Stream Footer (12) + * + * Normally there is exactly one Block, but let's assume that there are + * 2-4 Blocks just in case. Because Stream Header and also Block Header + * of the first Block don't make the decompressor produce any uncompressed + * data, we can ignore them from our calculations. Block Headers of possible + * additional Blocks have to be taken into account still. With these + * assumptions, it is safe to assume that the total header overhead is + * less than 128 bytes. + * + * Compressed Data contains LZMA2 or BCJ+LZMA2 encoded data. Since BCJ + * doesn't change the size of the data, it is enough to calculate the + * safety margin for LZMA2. + * + * LZMA2 stores the data in chunks. Each chunk has a header whose size is + * a maximum of 6 bytes, but to get round 2^n numbers, let's assume that + * the maximum chunk header size is 8 bytes. After the chunk header, there + * may be up to 64 KiB of actual payload in the chunk. Often the payload is + * quite a bit smaller though; to be safe, let's assume that an average + * chunk has only 32 KiB of payload. + * + * The maximum uncompressed size of the payload is 2 MiB. The minimum + * uncompressed size of the payload is in practice never less than the + * payload size itself. The LZMA2 format would allow uncompressed size + * to be less than the payload size, but no sane compressor creates such + * files. LZMA2 supports storing uncompressible data in uncompressed form, + * so there's never a need to create payloads whose uncompressed size is + * smaller than the compressed size. + * + * The assumption, that the uncompressed size of the payload is never + * smaller than the payload itself, is valid only when talking about + * the payload as a whole. It is possible that the payload has parts where + * the decompressor consumes more input than it produces output. Calculating + * the worst case for this would be tricky. Instead of trying to do that, + * let's simply make sure that the decompressor never overwrites any bytes + * of the payload which it is currently reading. + * + * Now we have enough information to calculate the safety margin. We need + * - 128 bytes for the .xz file format headers; + * - 8 bytes per every 32 KiB of uncompressed size (one LZMA2 chunk header + * per chunk, each chunk having average payload size of 32 KiB); and + * - 64 KiB (biggest possible LZMA2 chunk payload size) to make sure that + * the decompressor never overwrites anything from the LZMA2 chunk + * payload it is currently reading. + * + * We get the following formula: + * + * safety_margin = 128 + uncompressed_size * 8 / 32768 + 65536 + * = 128 + (uncompressed_size >> 12) + 65536 + * + * For comparison, according to arch/x86/boot/compressed/misc.c, the + * equivalent formula for Deflate is this: + * + * safety_margin = 18 + (uncompressed_size >> 12) + 32768 + * + * Thus, when updating Deflate-only in-place kernel decompressor to + * support XZ, the fixed overhead has to be increased from 18+32768 bytes + * to 128+65536 bytes. + */ + +/* + * STATIC is defined to "static" if we are being built for kernel + * decompression (pre-boot code). <linux/decompress/mm.h> will define + * STATIC to empty if it wasn't already defined. Since we will need to + * know later if we are being used for kernel decompression, we define + * XZ_PREBOOT here. + */ +#ifdef STATIC +# define XZ_PREBOOT +#endif +#ifdef __KERNEL__ +# include <linux/decompress/mm.h> +#endif +#define XZ_EXTERN STATIC + +#ifndef XZ_PREBOOT +# include <linux/slab.h> +# include <linux/xz.h> +#else +/* + * Use the internal CRC32 code instead of kernel's CRC32 module, which + * is not available in early phase of booting. + */ +#define XZ_INTERNAL_CRC32 1 + +/* + * For boot time use, we enable only the BCJ filter of the current + * architecture or none if no BCJ filter is available for the architecture. + */ +#ifdef CONFIG_X86 +# define XZ_DEC_X86 +#endif +#ifdef CONFIG_PPC +# define XZ_DEC_POWERPC +#endif +#ifdef CONFIG_ARM +# define XZ_DEC_ARM +#endif +#ifdef CONFIG_IA64 +# define XZ_DEC_IA64 +#endif +#ifdef CONFIG_SPARC +# define XZ_DEC_SPARC +#endif + +/* + * This will get the basic headers so that memeq() and others + * can be defined. + */ +#include "xz/xz_private.h" + +/* + * Replace the normal allocation functions with the versions from + * <linux/decompress/mm.h>. vfree() needs to support vfree(NULL) + * when XZ_DYNALLOC is used, but the pre-boot free() doesn't support it. + * Workaround it here because the other decompressors don't need it. + */ +#undef kmalloc +#undef kfree +#undef vmalloc +#undef vfree +#define kmalloc(size, flags) malloc(size) +#define kfree(ptr) free(ptr) +#define vmalloc(size) malloc(size) +#define vfree(ptr) do { if (ptr != NULL) free(ptr); } while (0) + +/* + * FIXME: Not all basic memory functions are provided in architecture-specific + * files (yet). We define our own versions here for now, but this should be + * only a temporary solution. + * + * memeq and memzero are not used much and any remotely sane implementation + * is fast enough. memcpy/memmove speed matters in multi-call mode, but + * the kernel image is decompressed in single-call mode, in which only + * memcpy speed can matter and only if there is a lot of uncompressible data + * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the + * functions below should just be kept small; it's probably not worth + * optimizing for speed. + */ + +#ifndef memeq +static bool memeq(const void *a, const void *b, size_t size) +{ + const uint8_t *x = a; + const uint8_t *y = b; + size_t i; + + for (i = 0; i < size; ++i) + if (x[i] != y[i]) + return false; + + return true; +} +#endif + +#ifndef memzero +static void memzero(void *buf, size_t size) +{ + uint8_t *b = buf; + uint8_t *e = b + size; + + while (b != e) + *b++ = '\0'; +} +#endif + +#ifndef memmove +/* Not static to avoid a conflict with the prototype in the Linux headers. */ +void *memmove(void *dest, const void *src, size_t size) +{ + uint8_t *d = dest; + const uint8_t *s = src; + size_t i; + + if (d < s) { + for (i = 0; i < size; ++i) + d[i] = s[i]; + } else if (d > s) { + i = size; + while (i-- > 0) + d[i] = s[i]; + } + + return dest; +} +#endif + +/* + * Since we need memmove anyway, would use it as memcpy too. + * Commented out for now to avoid breaking things. + */ +/* +#ifndef memcpy +# define memcpy memmove +#endif +*/ + +#include "xz/xz_crc32.c" +#include "xz/xz_dec_stream.c" +#include "xz/xz_dec_lzma2.c" +#include "xz/xz_dec_bcj.c" + +#endif /* XZ_PREBOOT */ + +/* Size of the input and output buffers in multi-call mode */ +#define XZ_IOBUF_SIZE 4096 + +/* + * This function implements the API defined in <linux/decompress/generic.h>. + * + * This wrapper will automatically choose single-call or multi-call mode + * of the native XZ decoder API. The single-call mode can be used only when + * both input and output buffers are available as a single chunk, i.e. when + * fill() and flush() won't be used. + */ +STATIC int INIT unxz(unsigned char *in, int in_size, + int (*fill)(void *dest, unsigned int size), + int (*flush)(void *src, unsigned int size), + unsigned char *out, int *in_used, + void (*error)(char *x)) +{ + struct xz_buf b; + struct xz_dec *s; + enum xz_ret ret; + bool must_free_in = false; + +#if XZ_INTERNAL_CRC32 + xz_crc32_init(); +#endif + + if (in_used != NULL) + *in_used = 0; + + if (fill == NULL && flush == NULL) + s = xz_dec_init(XZ_SINGLE, 0); + else + s = xz_dec_init(XZ_DYNALLOC, (uint32_t)-1); + + if (s == NULL) + goto error_alloc_state; + + if (flush == NULL) { + b.out = out; + b.out_size = (size_t)-1; + } else { + b.out_size = XZ_IOBUF_SIZE; + b.out = malloc(XZ_IOBUF_SIZE); + if (b.out == NULL) + goto error_alloc_out; + } + + if (in == NULL) { + must_free_in = true; + in = malloc(XZ_IOBUF_SIZE); + if (in == NULL) + goto error_alloc_in; + } + + b.in = in; + b.in_pos = 0; + b.in_size = in_size; + b.out_pos = 0; + + if (fill == NULL && flush == NULL) { + ret = xz_dec_run(s, &b); + } else { + do { + if (b.in_pos == b.in_size && fill != NULL) { + if (in_used != NULL) + *in_used += b.in_pos; + + b.in_pos = 0; + + in_size = fill(in, XZ_IOBUF_SIZE); + if (in_size < 0) { + /* + * This isn't an optimal error code + * but it probably isn't worth making + * a new one either. + */ + ret = XZ_BUF_ERROR; + break; + } + + b.in_size = in_size; + } + + ret = xz_dec_run(s, &b); + + if (flush != NULL && (b.out_pos == b.out_size + || (ret != XZ_OK && b.out_pos > 0))) { + /* + * Setting ret here may hide an error + * returned by xz_dec_run(), but probably + * it's not too bad. + */ + if (flush(b.out, b.out_pos) != (int)b.out_pos) + ret = XZ_BUF_ERROR; + + b.out_pos = 0; + } + } while (ret == XZ_OK); + + if (must_free_in) + free(in); + + if (flush != NULL) + free(b.out); + } + + if (in_used != NULL) + *in_used += b.in_pos; + + xz_dec_end(s); + + switch (ret) { + case XZ_STREAM_END: + return 0; + + case XZ_MEM_ERROR: + /* This can occur only in multi-call mode. */ + error("XZ decompressor ran out of memory"); + break; + + case XZ_FORMAT_ERROR: + error("Input is not in the XZ format (wrong magic bytes)"); + break; + + case XZ_OPTIONS_ERROR: + error("Input was encoded with settings that are not " + "supported by this XZ decoder"); + break; + + case XZ_DATA_ERROR: + case XZ_BUF_ERROR: + error("XZ-compressed data is corrupt"); + break; + + default: + error("Bug in the XZ decompressor"); + break; + } + + return -1; + +error_alloc_in: + if (flush != NULL) + free(b.out); + +error_alloc_out: + xz_dec_end(s); + +error_alloc_state: + error("XZ decompressor ran out of memory"); + return -1; +} + +/* + * This macro is used by architecture-specific files to decompress + * the kernel image. + */ +#define decompress unxz diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 3094318bfea7..75ca78f3a8c9 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -7,6 +7,7 @@ * Copyright (C) 2008 Jason Baron <jbaron@redhat.com> * By Greg Banks <gnb@melbourne.sgi.com> * Copyright (c) 2008 Silicon Graphics Inc. All Rights Reserved. + * Copyright (C) 2011 Bart Van Assche. All Rights Reserved. */ #include <linux/kernel.h> @@ -27,6 +28,8 @@ #include <linux/debugfs.h> #include <linux/slab.h> #include <linux/jump_label.h> +#include <linux/hardirq.h> +#include <linux/sched.h> extern struct _ddebug __start___verbose[]; extern struct _ddebug __stop___verbose[]; @@ -63,15 +66,25 @@ static inline const char *basename(const char *path) return tail ? tail+1 : path; } +static struct { unsigned flag:8; char opt_char; } opt_array[] = { + { _DPRINTK_FLAGS_PRINT, 'p' }, + { _DPRINTK_FLAGS_INCL_MODNAME, 'm' }, + { _DPRINTK_FLAGS_INCL_FUNCNAME, 'f' }, + { _DPRINTK_FLAGS_INCL_LINENO, 'l' }, + { _DPRINTK_FLAGS_INCL_TID, 't' }, +}; + /* format a string into buf[] which describes the _ddebug's flags */ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, size_t maxlen) { char *p = buf; + int i; BUG_ON(maxlen < 4); - if (dp->flags & _DPRINTK_FLAGS_PRINT) - *p++ = 'p'; + for (i = 0; i < ARRAY_SIZE(opt_array); ++i) + if (dp->flags & opt_array[i].flag) + *p++ = opt_array[i].opt_char; if (p == buf) *p++ = '-'; *p = '\0'; @@ -141,11 +154,10 @@ static void ddebug_change(const struct ddebug_query *query, else if (!dp->flags) dt->num_enabled++; dp->flags = newflags; - if (newflags) { - jump_label_enable(&dp->enabled); - } else { - jump_label_disable(&dp->enabled); - } + if (newflags) + dp->enabled = 1; + else + dp->enabled = 0; if (verbose) printk(KERN_INFO "ddebug: changed %s:%d [%s]%s %s\n", @@ -344,7 +356,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, unsigned int *maskp) { unsigned flags = 0; - int op = '='; + int op = '=', i; switch (*str) { case '+': @@ -359,13 +371,14 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, printk(KERN_INFO "%s: op='%c'\n", __func__, op); for ( ; *str ; ++str) { - switch (*str) { - case 'p': - flags |= _DPRINTK_FLAGS_PRINT; - break; - default: - return -EINVAL; + for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) { + if (*str == opt_array[i].opt_char) { + flags |= opt_array[i].flag; + break; + } } + if (i < 0) + return -EINVAL; } if (flags == 0) return -EINVAL; @@ -414,6 +427,35 @@ static int ddebug_exec_query(char *query_string) return 0; } +int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...) +{ + va_list args; + int res; + + BUG_ON(!descriptor); + BUG_ON(!fmt); + + va_start(args, fmt); + res = printk(KERN_DEBUG); + if (descriptor->flags & _DPRINTK_FLAGS_INCL_TID) { + if (in_interrupt()) + res += printk(KERN_CONT "<intr> "); + else + res += printk(KERN_CONT "[%d] ", task_pid_vnr(current)); + } + if (descriptor->flags & _DPRINTK_FLAGS_INCL_MODNAME) + res += printk(KERN_CONT "%s:", descriptor->modname); + if (descriptor->flags & _DPRINTK_FLAGS_INCL_FUNCNAME) + res += printk(KERN_CONT "%s:", descriptor->function); + if (descriptor->flags & _DPRINTK_FLAGS_INCL_LINENO) + res += printk(KERN_CONT "%d ", descriptor->lineno); + res += vprintk(fmt, args); + va_end(args); + + return res; +} +EXPORT_SYMBOL(__dynamic_pr_debug); + static __initdata char ddebug_setup_string[1024]; static __init int ddebug_setup_query(char *str) { diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c index 24c59ded47a0..b0a8767282bf 100644 --- a/lib/find_next_bit.c +++ b/lib/find_next_bit.c @@ -160,6 +160,7 @@ EXPORT_SYMBOL(find_first_zero_bit); #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifdef __BIG_ENDIAN +#ifdef CONFIG_GENERIC_FIND_BIT_LE /* include/linux/byteorder does not support "unsigned long" type */ static inline unsigned long ext2_swabp(const unsigned long * x) @@ -185,15 +186,16 @@ static inline unsigned long ext2_swab(const unsigned long y) #endif } -unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned +unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset) { - const unsigned long *p = addr + BITOP_WORD(offset); + const unsigned long *p = addr; unsigned long result = offset & ~(BITS_PER_LONG - 1); unsigned long tmp; if (offset >= size) return size; + p += BITOP_WORD(offset); size -= result; offset &= (BITS_PER_LONG - 1UL); if (offset) { @@ -226,18 +228,18 @@ found_middle: found_middle_swap: return result + ffz(ext2_swab(tmp)); } +EXPORT_SYMBOL(find_next_zero_bit_le); -EXPORT_SYMBOL(generic_find_next_zero_le_bit); - -unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned +unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { - const unsigned long *p = addr + BITOP_WORD(offset); + const unsigned long *p = addr; unsigned long result = offset & ~(BITS_PER_LONG - 1); unsigned long tmp; if (offset >= size) return size; + p += BITOP_WORD(offset); size -= result; offset &= (BITS_PER_LONG - 1UL); if (offset) { @@ -271,5 +273,7 @@ found_middle: found_middle_swap: return result + __ffs(ext2_swab(tmp)); } -EXPORT_SYMBOL(generic_find_next_le_bit); +EXPORT_SYMBOL(find_next_bit_le); + +#endif /* CONFIG_GENERIC_FIND_BIT_LE */ #endif /* __BIG_ENDIAN */ diff --git a/lib/flex_array.c b/lib/flex_array.c index 77a6fea7481e..854b57bd7d9d 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -23,6 +23,7 @@ #include <linux/flex_array.h> #include <linux/slab.h> #include <linux/stddef.h> +#include <linux/module.h> struct flex_array_part { char elements[FLEX_ARRAY_PART_SIZE]; @@ -103,6 +104,7 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total, FLEX_ARRAY_BASE_BYTES_LEFT); return ret; } +EXPORT_SYMBOL(flex_array_alloc); static int fa_element_to_part_nr(struct flex_array *fa, unsigned int element_nr) @@ -126,12 +128,14 @@ void flex_array_free_parts(struct flex_array *fa) for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) kfree(fa->parts[part_nr]); } +EXPORT_SYMBOL(flex_array_free_parts); void flex_array_free(struct flex_array *fa) { flex_array_free_parts(fa); kfree(fa); } +EXPORT_SYMBOL(flex_array_free); static unsigned int index_inside_part(struct flex_array *fa, unsigned int element_nr) @@ -196,6 +200,7 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, memcpy(dst, src, fa->element_size); return 0; } +EXPORT_SYMBOL(flex_array_put); /** * flex_array_clear - clear element in array at @element_nr @@ -223,13 +228,14 @@ int flex_array_clear(struct flex_array *fa, unsigned int element_nr) memset(dst, FLEX_ARRAY_FREE, fa->element_size); return 0; } +EXPORT_SYMBOL(flex_array_clear); /** * flex_array_prealloc - guarantee that array space exists - * @fa: the flex array for which to preallocate parts - * @start: index of first array element for which space is allocated - * @end: index of last (inclusive) element for which space is allocated - * @flags: page allocation flags + * @fa: the flex array for which to preallocate parts + * @start: index of first array element for which space is allocated + * @nr_elements: number of elements for which space is allocated + * @flags: page allocation flags * * This will guarantee that no future calls to flex_array_put() * will allocate memory. It can be used if you are expecting to @@ -239,14 +245,24 @@ int flex_array_clear(struct flex_array *fa, unsigned int element_nr) * Locking must be provided by the caller. */ int flex_array_prealloc(struct flex_array *fa, unsigned int start, - unsigned int end, gfp_t flags) + unsigned int nr_elements, gfp_t flags) { int start_part; int end_part; int part_nr; + unsigned int end; struct flex_array_part *part; - if (start >= fa->total_nr_elements || end >= fa->total_nr_elements) + if (!start && !nr_elements) + return 0; + if (start >= fa->total_nr_elements) + return -ENOSPC; + if (!nr_elements) + return 0; + + end = start + nr_elements - 1; + + if (end >= fa->total_nr_elements) return -ENOSPC; if (elements_fit_in_base(fa)) return 0; @@ -259,6 +275,7 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start, } return 0; } +EXPORT_SYMBOL(flex_array_prealloc); /** * flex_array_get - pull data back out of the array @@ -288,6 +305,7 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr) } return &part->elements[index_inside_part(fa, element_nr)]; } +EXPORT_SYMBOL(flex_array_get); /** * flex_array_get_ptr - pull a ptr back out of the array @@ -308,6 +326,7 @@ void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr) return *tmp; } +EXPORT_SYMBOL(flex_array_get_ptr); static int part_is_free(struct flex_array_part *part) { @@ -334,6 +353,8 @@ int flex_array_shrink(struct flex_array *fa) int part_nr; int ret = 0; + if (!fa->total_nr_elements) + return 0; if (elements_fit_in_base(fa)) return ret; for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) { @@ -348,3 +369,4 @@ int flex_array_shrink(struct flex_array *fa) } return ret; } +EXPORT_SYMBOL(flex_array_shrink); diff --git a/lib/hexdump.c b/lib/hexdump.c index 5d7a4802c562..f5fe6ba7a3ab 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -34,6 +34,22 @@ int hex_to_bin(char ch) EXPORT_SYMBOL(hex_to_bin); /** + * hex2bin - convert an ascii hexadecimal string to its binary representation + * @dst: binary result + * @src: ascii hexadecimal string + * @count: result length + */ +void hex2bin(u8 *dst, const char *src, size_t count) +{ + while (count--) { + *dst = hex_to_bin(*src++) << 4; + *dst += hex_to_bin(*src++); + dst++; + } +} +EXPORT_SYMBOL(hex2bin); + +/** * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory * @buf: data blob to dump * @len: number of bytes in the @buf @@ -138,6 +154,7 @@ nil: } EXPORT_SYMBOL(hex_dump_to_buffer); +#ifdef CONFIG_PRINTK /** * print_hex_dump - print a text hex dump to syslog for a binary blob of data * @level: kernel log level (e.g. KERN_DEBUG) @@ -222,3 +239,4 @@ void print_hex_dump_bytes(const char *prefix_str, int prefix_type, buf, len, true); } EXPORT_SYMBOL(print_hex_dump_bytes); +#endif diff --git a/lib/ioremap.c b/lib/ioremap.c index 5730ecd3eb66..da4e2ad74b68 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/io.h> +#include <linux/module.h> #include <asm/cacheflush.h> #include <asm/pgtable.h> @@ -90,3 +91,4 @@ int ioremap_page_range(unsigned long addr, return err; } +EXPORT_SYMBOL_GPL(ioremap_page_range); diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c deleted file mode 100644 index b135d04aa48a..000000000000 --- a/lib/kernel_lock.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * lib/kernel_lock.c - * - * This is the traditional BKL - big kernel lock. Largely - * relegated to obsolescence, but used by various less - * important (or lazy) subsystems. - */ -#include <linux/module.h> -#include <linux/kallsyms.h> -#include <linux/semaphore.h> -#include <linux/smp_lock.h> - -#define CREATE_TRACE_POINTS -#include <trace/events/bkl.h> - -/* - * The 'big kernel lock' - * - * This spinlock is taken and released recursively by lock_kernel() - * and unlock_kernel(). It is transparently dropped and reacquired - * over schedule(). It is used to protect legacy code that hasn't - * been migrated to a proper locking design yet. - * - * Don't use in new code. - */ -static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag); - - -/* - * Acquire/release the underlying lock from the scheduler. - * - * This is called with preemption disabled, and should - * return an error value if it cannot get the lock and - * TIF_NEED_RESCHED gets set. - * - * If it successfully gets the lock, it should increment - * the preemption count like any spinlock does. - * - * (This works on UP too - do_raw_spin_trylock will never - * return false in that case) - */ -int __lockfunc __reacquire_kernel_lock(void) -{ - while (!do_raw_spin_trylock(&kernel_flag)) { - if (need_resched()) - return -EAGAIN; - cpu_relax(); - } - preempt_disable(); - return 0; -} - -void __lockfunc __release_kernel_lock(void) -{ - do_raw_spin_unlock(&kernel_flag); - preempt_enable_no_resched(); -} - -/* - * These are the BKL spinlocks - we try to be polite about preemption. - * If SMP is not on (ie UP preemption), this all goes away because the - * do_raw_spin_trylock() will always succeed. - */ -#ifdef CONFIG_PREEMPT -static inline void __lock_kernel(void) -{ - preempt_disable(); - if (unlikely(!do_raw_spin_trylock(&kernel_flag))) { - /* - * If preemption was disabled even before this - * was called, there's nothing we can be polite - * about - just spin. - */ - if (preempt_count() > 1) { - do_raw_spin_lock(&kernel_flag); - return; - } - - /* - * Otherwise, let's wait for the kernel lock - * with preemption enabled.. - */ - do { - preempt_enable(); - while (raw_spin_is_locked(&kernel_flag)) - cpu_relax(); - preempt_disable(); - } while (!do_raw_spin_trylock(&kernel_flag)); - } -} - -#else - -/* - * Non-preemption case - just get the spinlock - */ -static inline void __lock_kernel(void) -{ - do_raw_spin_lock(&kernel_flag); -} -#endif - -static inline void __unlock_kernel(void) -{ - /* - * the BKL is not covered by lockdep, so we open-code the - * unlocking sequence (and thus avoid the dep-chain ops): - */ - do_raw_spin_unlock(&kernel_flag); - preempt_enable(); -} - -/* - * Getting the big kernel lock. - * - * This cannot happen asynchronously, so we only need to - * worry about other CPU's. - */ -void __lockfunc _lock_kernel(const char *func, const char *file, int line) -{ - int depth = current->lock_depth + 1; - - trace_lock_kernel(func, file, line); - - if (likely(!depth)) { - might_sleep(); - __lock_kernel(); - } - current->lock_depth = depth; -} - -void __lockfunc _unlock_kernel(const char *func, const char *file, int line) -{ - BUG_ON(current->lock_depth < 0); - if (likely(--current->lock_depth < 0)) - __unlock_kernel(); - - trace_unlock_kernel(func, file, line); -} - -EXPORT_SYMBOL(_lock_kernel); -EXPORT_SYMBOL(_unlock_kernel); - diff --git a/lib/kref.c b/lib/kref.c index d3d227a08a4b..3efb882b11db 100644 --- a/lib/kref.c +++ b/lib/kref.c @@ -62,6 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref)) return 0; } + +/** + * kref_sub - subtract a number of refcounts for object. + * @kref: object. + * @count: Number of recounts to subtract. + * @release: pointer to the function that will clean up the object when the + * last reference to the object is released. + * This pointer is required, and it is not acceptable to pass kfree + * in as this function. + * + * Subtract @count from the refcount, and if 0, call release(). + * Return 1 if the object was removed, otherwise return 0. Beware, if this + * function returns 0, you still can not count on the kref from remaining in + * memory. Only use the return value if you want to see if the kref is now + * gone, not present. + */ +int kref_sub(struct kref *kref, unsigned int count, + void (*release)(struct kref *kref)) +{ + WARN_ON(release == NULL); + WARN_ON(release == (void (*)(struct kref *))kfree); + + if (atomic_sub_and_test((int) count, &kref->refcount)) { + release(kref); + return 1; + } + return 0; +} + EXPORT_SYMBOL(kref_init); EXPORT_SYMBOL(kref_get); EXPORT_SYMBOL(kref_put); +EXPORT_SYMBOL(kref_sub); diff --git a/lib/kstrtox.c b/lib/kstrtox.c new file mode 100644 index 000000000000..a235f3cc471c --- /dev/null +++ b/lib/kstrtox.c @@ -0,0 +1,224 @@ +/* + * Convert integer string representation to an integer. + * If an integer doesn't fit into specified type, -E is returned. + * + * Integer starts with optional sign. + * kstrtou*() functions do not accept sign "-". + * + * Radix 0 means autodetection: leading "0x" implies radix 16, + * leading "0" implies radix 8, otherwise radix is 10. + * Autodetection hints work after optional sign, but not before. + * + * If -E is returned, result is not touched. + */ +#include <linux/ctype.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/math64.h> +#include <linux/module.h> +#include <linux/types.h> + +static inline char _tolower(const char c) +{ + return c | 0x20; +} + +static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) +{ + unsigned long long acc; + int ok; + + if (base == 0) { + if (s[0] == '0') { + if (_tolower(s[1]) == 'x' && isxdigit(s[2])) + base = 16; + else + base = 8; + } else + base = 10; + } + if (base == 16 && s[0] == '0' && _tolower(s[1]) == 'x') + s += 2; + + acc = 0; + ok = 0; + while (*s) { + unsigned int val; + + if ('0' <= *s && *s <= '9') + val = *s - '0'; + else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f') + val = _tolower(*s) - 'a' + 10; + else if (*s == '\n' && *(s + 1) == '\0') + break; + else + return -EINVAL; + + if (val >= base) + return -EINVAL; + if (acc > div_u64(ULLONG_MAX - val, base)) + return -ERANGE; + acc = acc * base + val; + ok = 1; + + s++; + } + if (!ok) + return -EINVAL; + *res = acc; + return 0; +} + +int kstrtoull(const char *s, unsigned int base, unsigned long long *res) +{ + if (s[0] == '+') + s++; + return _kstrtoull(s, base, res); +} +EXPORT_SYMBOL(kstrtoull); + +int kstrtoll(const char *s, unsigned int base, long long *res) +{ + unsigned long long tmp; + int rv; + + if (s[0] == '-') { + rv = _kstrtoull(s + 1, base, &tmp); + if (rv < 0) + return rv; + if ((long long)(-tmp) >= 0) + return -ERANGE; + *res = -tmp; + } else { + rv = kstrtoull(s, base, &tmp); + if (rv < 0) + return rv; + if ((long long)tmp < 0) + return -ERANGE; + *res = tmp; + } + return 0; +} +EXPORT_SYMBOL(kstrtoll); + +/* Internal, do not use. */ +int _kstrtoul(const char *s, unsigned int base, unsigned long *res) +{ + unsigned long long tmp; + int rv; + + rv = kstrtoull(s, base, &tmp); + if (rv < 0) + return rv; + if (tmp != (unsigned long long)(unsigned long)tmp) + return -ERANGE; + *res = tmp; + return 0; +} +EXPORT_SYMBOL(_kstrtoul); + +/* Internal, do not use. */ +int _kstrtol(const char *s, unsigned int base, long *res) +{ + long long tmp; + int rv; + + rv = kstrtoll(s, base, &tmp); + if (rv < 0) + return rv; + if (tmp != (long long)(long)tmp) + return -ERANGE; + *res = tmp; + return 0; +} +EXPORT_SYMBOL(_kstrtol); + +int kstrtouint(const char *s, unsigned int base, unsigned int *res) +{ + unsigned long long tmp; + int rv; + + rv = kstrtoull(s, base, &tmp); + if (rv < 0) + return rv; + if (tmp != (unsigned long long)(unsigned int)tmp) + return -ERANGE; + *res = tmp; + return 0; +} +EXPORT_SYMBOL(kstrtouint); + +int kstrtoint(const char *s, unsigned int base, int *res) +{ + long long tmp; + int rv; + + rv = kstrtoll(s, base, &tmp); + if (rv < 0) + return rv; + if (tmp != (long long)(int)tmp) + return -ERANGE; + *res = tmp; + return 0; +} +EXPORT_SYMBOL(kstrtoint); + +int kstrtou16(const char *s, unsigned int base, u16 *res) +{ + unsigned long long tmp; + int rv; + + rv = kstrtoull(s, base, &tmp); + if (rv < 0) + return rv; + if (tmp != (unsigned long long)(u16)tmp) + return -ERANGE; + *res = tmp; + return 0; +} +EXPORT_SYMBOL(kstrtou16); + +int kstrtos16(const char *s, unsigned int base, s16 *res) +{ + long long tmp; + int rv; + + rv = kstrtoll(s, base, &tmp); + if (rv < 0) + return rv; + if (tmp != (long long)(s16)tmp) + return -ERANGE; + *res = tmp; + return 0; +} +EXPORT_SYMBOL(kstrtos16); + +int kstrtou8(const char *s, unsigned int base, u8 *res) +{ + unsigned long long tmp; + int rv; + + rv = kstrtoull(s, base, &tmp); + if (rv < 0) + return rv; + if (tmp != (unsigned long long)(u8)tmp) + return -ERANGE; + *res = tmp; + return 0; +} +EXPORT_SYMBOL(kstrtou8); + +int kstrtos8(const char *s, unsigned int base, s8 *res) +{ + long long tmp; + int rv; + + rv = kstrtoll(s, base, &tmp); + if (rv < 0) + return rv; + if (tmp != (long long)(s8)tmp) + return -ERANGE; + *res = tmp; + return 0; +} +EXPORT_SYMBOL(kstrtos8); diff --git a/lib/list_debug.c b/lib/list_debug.c index 344c710d16ca..b8029a5583ff 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -35,6 +35,31 @@ void __list_add(struct list_head *new, } EXPORT_SYMBOL(__list_add); +void __list_del_entry(struct list_head *entry) +{ + struct list_head *prev, *next; + + prev = entry->prev; + next = entry->next; + + if (WARN(next == LIST_POISON1, + "list_del corruption, %p->next is LIST_POISON1 (%p)\n", + entry, LIST_POISON1) || + WARN(prev == LIST_POISON2, + "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", + entry, LIST_POISON2) || + WARN(prev->next != entry, + "list_del corruption. prev->next should be %p, " + "but was %p\n", entry, prev->next) || + WARN(next->prev != entry, + "list_del corruption. next->prev should be %p, " + "but was %p\n", entry, next->prev)) + return; + + __list_del(prev, next); +} +EXPORT_SYMBOL(__list_del_entry); + /** * list_del - deletes entry from list. * @entry: the element to delete from the list. @@ -43,19 +68,7 @@ EXPORT_SYMBOL(__list_add); */ void list_del(struct list_head *entry) { - WARN(entry->next == LIST_POISON1, - "list_del corruption, next is LIST_POISON1 (%p)\n", - LIST_POISON1); - WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2, - "list_del corruption, prev is LIST_POISON2 (%p)\n", - LIST_POISON2); - WARN(entry->prev->next != entry, - "list_del corruption. prev->next should be %p, " - "but was %p\n", entry, entry->prev->next); - WARN(entry->next->prev != entry, - "list_del corruption. next->prev should be %p, " - "but was %p\n", entry, entry->next->prev); - __list_del(entry->prev, entry->next); + __list_del_entry(entry); entry->next = LIST_POISON1; entry->prev = LIST_POISON2; } diff --git a/lib/nlattr.c b/lib/nlattr.c index c4706eb98d3d..ac09f2226dc7 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -15,7 +15,7 @@ #include <linux/types.h> #include <net/netlink.h> -static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { +static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_U8] = sizeof(u8), [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), @@ -23,7 +23,7 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { [NLA_NESTED] = NLA_HDRLEN, }; -static int validate_nla(struct nlattr *nla, int maxtype, +static int validate_nla(const struct nlattr *nla, int maxtype, const struct nla_policy *policy) { const struct nla_policy *pt; @@ -115,10 +115,10 @@ static int validate_nla(struct nlattr *nla, int maxtype, * * Returns 0 on success or a negative error code. */ -int nla_validate(struct nlattr *head, int len, int maxtype, +int nla_validate(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy) { - struct nlattr *nla; + const struct nlattr *nla; int rem, err; nla_for_each_attr(nla, head, len, rem) { @@ -148,7 +148,7 @@ nla_policy_len(const struct nla_policy *p, int n) { int i, len = 0; - for (i = 0; i < n; i++) { + for (i = 0; i < n; i++, p++) { if (p->len) len += nla_total_size(p->len); else if (nla_attr_minlen[p->type]) @@ -167,16 +167,16 @@ nla_policy_len(const struct nla_policy *p, int n) * @policy: validation policy * * Parses a stream of attributes and stores a pointer to each attribute in - * the tb array accessable via the attribute type. Attributes with a type + * the tb array accessible via the attribute type. Attributes with a type * exceeding maxtype will be silently ignored for backwards compatibility * reasons. policy may be set to NULL if no validation is required. * * Returns 0 on success or a negative error code. */ -int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - const struct nla_policy *policy) +int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, + int len, const struct nla_policy *policy) { - struct nlattr *nla; + const struct nlattr *nla; int rem, err; memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); @@ -191,7 +191,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, goto errout; } - tb[type] = nla; + tb[type] = (struct nlattr *)nla; } } @@ -212,14 +212,14 @@ errout: * * Returns the first attribute in the stream matching the specified type. */ -struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) +struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype) { - struct nlattr *nla; + const struct nlattr *nla; int rem; nla_for_each_attr(nla, head, len, rem) if (nla_type(nla) == attrtype) - return nla; + return (struct nlattr *)nla; return NULL; } diff --git a/lib/parser.c b/lib/parser.c index 6e89eca5cca0..dcbaaef6cf11 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -13,7 +13,7 @@ /** * match_one: - Determines if a string matches a simple pattern - * @s: the string to examine for presense of the pattern + * @s: the string to examine for presence of the pattern * @p: the string containing the pattern * @args: array of %MAX_OPT_ARGS &substring_t elements. Used to return match * locations. diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 604678d7d06d..28f2c33c6b53 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -72,18 +72,16 @@ EXPORT_SYMBOL(percpu_counter_set); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; - s32 *pcount; preempt_disable(); - pcount = this_cpu_ptr(fbc->counters); - count = *pcount + amount; + count = __this_cpu_read(*fbc->counters) + amount; if (count >= batch || count <= -batch) { spin_lock(&fbc->lock); fbc->count += count; - *pcount = 0; + __this_cpu_write(*fbc->counters, 0); spin_unlock(&fbc->lock); } else { - *pcount = count; + __this_cpu_write(*fbc->counters, count); } preempt_enable(); } diff --git a/lib/plist.c b/lib/plist.c index 1471988d9190..0ae7e6431726 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -28,6 +28,8 @@ #ifdef CONFIG_DEBUG_PI_LIST +static struct plist_head test_head; + static void plist_check_prev_next(struct list_head *t, struct list_head *p, struct list_head *n) { @@ -54,12 +56,13 @@ static void plist_check_list(struct list_head *top) static void plist_check_head(struct plist_head *head) { - WARN_ON(!head->rawlock && !head->spinlock); + WARN_ON(head != &test_head && !head->rawlock && !head->spinlock); if (head->rawlock) WARN_ON_SMP(!raw_spin_is_locked(head->rawlock)); if (head->spinlock) WARN_ON_SMP(!spin_is_locked(head->spinlock)); - plist_check_list(&head->prio_list); + if (!plist_head_empty(head)) + plist_check_list(&plist_first(head)->prio_list); plist_check_list(&head->node_list); } @@ -75,25 +78,33 @@ static void plist_check_head(struct plist_head *head) */ void plist_add(struct plist_node *node, struct plist_head *head) { - struct plist_node *iter; + struct plist_node *first, *iter, *prev = NULL; + struct list_head *node_next = &head->node_list; plist_check_head(head); WARN_ON(!plist_node_empty(node)); + WARN_ON(!list_empty(&node->prio_list)); + + if (plist_head_empty(head)) + goto ins_node; - list_for_each_entry(iter, &head->prio_list, plist.prio_list) { - if (node->prio < iter->prio) - goto lt_prio; - else if (node->prio == iter->prio) { - iter = list_entry(iter->plist.prio_list.next, - struct plist_node, plist.prio_list); - goto eq_prio; + first = iter = plist_first(head); + + do { + if (node->prio < iter->prio) { + node_next = &iter->node_list; + break; } - } -lt_prio: - list_add_tail(&node->plist.prio_list, &iter->plist.prio_list); -eq_prio: - list_add_tail(&node->plist.node_list, &iter->plist.node_list); + prev = iter; + iter = list_entry(iter->prio_list.next, + struct plist_node, prio_list); + } while (iter != first); + + if (!prev || prev->prio != node->prio) + list_add_tail(&node->prio_list, &iter->prio_list); +ins_node: + list_add_tail(&node->node_list, node_next); plist_check_head(head); } @@ -108,14 +119,98 @@ void plist_del(struct plist_node *node, struct plist_head *head) { plist_check_head(head); - if (!list_empty(&node->plist.prio_list)) { - struct plist_node *next = plist_first(&node->plist); + if (!list_empty(&node->prio_list)) { + if (node->node_list.next != &head->node_list) { + struct plist_node *next; + + next = list_entry(node->node_list.next, + struct plist_node, node_list); - list_move_tail(&next->plist.prio_list, &node->plist.prio_list); - list_del_init(&node->plist.prio_list); + /* add the next plist_node into prio_list */ + if (list_empty(&next->prio_list)) + list_add(&next->prio_list, &node->prio_list); + } + list_del_init(&node->prio_list); } - list_del_init(&node->plist.node_list); + list_del_init(&node->node_list); plist_check_head(head); } + +#ifdef CONFIG_DEBUG_PI_LIST +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/init.h> + +static struct plist_node __initdata test_node[241]; + +static void __init plist_test_check(int nr_expect) +{ + struct plist_node *first, *prio_pos, *node_pos; + + if (plist_head_empty(&test_head)) { + BUG_ON(nr_expect != 0); + return; + } + + prio_pos = first = plist_first(&test_head); + plist_for_each(node_pos, &test_head) { + if (nr_expect-- < 0) + break; + if (node_pos == first) + continue; + if (node_pos->prio == prio_pos->prio) { + BUG_ON(!list_empty(&node_pos->prio_list)); + continue; + } + + BUG_ON(prio_pos->prio > node_pos->prio); + BUG_ON(prio_pos->prio_list.next != &node_pos->prio_list); + prio_pos = node_pos; + } + + BUG_ON(nr_expect != 0); + BUG_ON(prio_pos->prio_list.next != &first->prio_list); +} + +static int __init plist_test(void) +{ + int nr_expect = 0, i, loop; + unsigned int r = local_clock(); + + printk(KERN_INFO "start plist test\n"); + plist_head_init(&test_head, NULL); + for (i = 0; i < ARRAY_SIZE(test_node); i++) + plist_node_init(test_node + i, 0); + + for (loop = 0; loop < 1000; loop++) { + r = r * 193939 % 47629; + i = r % ARRAY_SIZE(test_node); + if (plist_node_empty(test_node + i)) { + r = r * 193939 % 47629; + test_node[i].prio = r % 99; + plist_add(test_node + i, &test_head); + nr_expect++; + } else { + plist_del(test_node + i, &test_head); + nr_expect--; + } + plist_test_check(nr_expect); + } + + for (i = 0; i < ARRAY_SIZE(test_node); i++) { + if (plist_node_empty(test_node + i)) + continue; + plist_del(test_node + i, &test_head); + nr_expect--; + plist_test_check(nr_expect); + } + + printk(KERN_INFO "end plist test\n"); + return 0; +} + +module_init(plist_test); + +#endif diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 5086bb962b4d..7ea2e033d715 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -736,10 +736,11 @@ next: } } /* - * The iftag must have been set somewhere because otherwise - * we would return immediated at the beginning of the function + * We need not to tag the root tag if there is no tag which is set with + * settag within the range from *first_indexp to last_index. */ - root_tag_set(root, settag); + if (tagged > 0) + root_tag_set(root, settag); *first_indexp = index; return tagged; diff --git a/lib/rbtree.c b/lib/rbtree.c index 4693f79195d3..a16be19a1305 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -315,6 +315,7 @@ void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data) rb_augment_path(node, func, data); } +EXPORT_SYMBOL(rb_augment_insert); /* * before removing the node, find the deepest node on the rebalance path @@ -340,6 +341,7 @@ struct rb_node *rb_augment_erase_begin(struct rb_node *node) return deepest; } +EXPORT_SYMBOL(rb_augment_erase_begin); /* * after removal, update the tree to account for the removed entry @@ -350,6 +352,7 @@ void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data) if (node) rb_augment_path(node, func, data); } +EXPORT_SYMBOL(rb_augment_erase_end); /* * This function returns the first node (in sort order) of the tree. diff --git a/lib/rwsem.c b/lib/rwsem.c index f236d7cd5cf3..aa7c3052261f 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -222,8 +222,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem, /* * wait for the read lock to be granted */ -asmregparm struct rw_semaphore __sched * -rwsem_down_read_failed(struct rw_semaphore *sem) +struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) { return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, -RWSEM_ACTIVE_READ_BIAS); @@ -232,8 +231,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem) /* * wait for the write lock to be granted */ -asmregparm struct rw_semaphore __sched * -rwsem_down_write_failed(struct rw_semaphore *sem) +struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) { return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, -RWSEM_ACTIVE_WRITE_BIAS); @@ -243,7 +241,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem) * handle waking up a waiter on the semaphore * - up_read/up_write has decremented the active part of count if we come here */ -asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) +struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) { unsigned long flags; @@ -263,7 +261,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) * - caller incremented waiting part of count and discovered it still negative * - just wake up any readers at the front of the queue */ -asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) +struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) { unsigned long flags; diff --git a/lib/show_mem.c b/lib/show_mem.c index fdc77c82f922..90cbe4bb5960 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -9,14 +9,14 @@ #include <linux/nmi.h> #include <linux/quicklist.h> -void show_mem(void) +void show_mem(unsigned int filter) { pg_data_t *pgdat; unsigned long total = 0, reserved = 0, shared = 0, nonshared = 0, highmem = 0; printk("Mem-Info:\n"); - show_free_areas(); + __show_free_areas(filter); for_each_online_pgdat(pgdat) { unsigned long i, flags; diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 7c06ee51a29a..93ca08b8a451 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -60,7 +60,7 @@ int swiotlb_force; static char *io_tlb_start, *io_tlb_end; /* - * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and + * The number of IO TLB blocks (in groups of 64) between io_tlb_start and * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. */ static unsigned long io_tlb_nslabs; @@ -686,8 +686,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, /* * Ensure that the address returned is DMA'ble */ - if (!dma_capable(dev, dev_addr, size)) - panic("map_single: bounce buffer is not DMA'ble"); + if (!dma_capable(dev, dev_addr, size)) { + swiotlb_tbl_unmap_single(dev, map, size, dir); + dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer); + } return dev_addr; } diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c new file mode 100644 index 000000000000..d55769d63cb8 --- /dev/null +++ b/lib/test-kstrtox.c @@ -0,0 +1,739 @@ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#define for_each_test(i, test) \ + for (i = 0; i < sizeof(test) / sizeof(test[0]); i++) + +struct test_fail { + const char *str; + unsigned int base; +}; + +#define DEFINE_TEST_FAIL(test) \ + const struct test_fail test[] __initdata + +#define DECLARE_TEST_OK(type, test_type) \ + test_type { \ + const char *str; \ + unsigned int base; \ + type expected_res; \ + } + +#define DEFINE_TEST_OK(type, test) \ + const type test[] __initdata + +#define TEST_FAIL(fn, type, fmt, test) \ +{ \ + unsigned int i; \ + \ + for_each_test(i, test) { \ + const struct test_fail *t = &test[i]; \ + type tmp; \ + int rv; \ + \ + tmp = 0; \ + rv = fn(t->str, t->base, &tmp); \ + if (rv >= 0) { \ + WARN(1, "str '%s', base %u, expected -E, got %d/" fmt "\n", \ + t->str, t->base, rv, tmp); \ + continue; \ + } \ + } \ +} + +#define TEST_OK(fn, type, fmt, test) \ +{ \ + unsigned int i; \ + \ + for_each_test(i, test) { \ + const typeof(test[0]) *t = &test[i]; \ + type res; \ + int rv; \ + \ + rv = fn(t->str, t->base, &res); \ + if (rv != 0) { \ + WARN(1, "str '%s', base %u, expected 0/" fmt ", got %d\n", \ + t->str, t->base, t->expected_res, rv); \ + continue; \ + } \ + if (res != t->expected_res) { \ + WARN(1, "str '%s', base %u, expected " fmt ", got " fmt "\n", \ + t->str, t->base, t->expected_res, res); \ + continue; \ + } \ + } \ +} + +static void __init test_kstrtoull_ok(void) +{ + DECLARE_TEST_OK(unsigned long long, struct test_ull); + static DEFINE_TEST_OK(struct test_ull, test_ull_ok) = { + {"0", 10, 0ULL}, + {"1", 10, 1ULL}, + {"127", 10, 127ULL}, + {"128", 10, 128ULL}, + {"129", 10, 129ULL}, + {"255", 10, 255ULL}, + {"256", 10, 256ULL}, + {"257", 10, 257ULL}, + {"32767", 10, 32767ULL}, + {"32768", 10, 32768ULL}, + {"32769", 10, 32769ULL}, + {"65535", 10, 65535ULL}, + {"65536", 10, 65536ULL}, + {"65537", 10, 65537ULL}, + {"2147483647", 10, 2147483647ULL}, + {"2147483648", 10, 2147483648ULL}, + {"2147483649", 10, 2147483649ULL}, + {"4294967295", 10, 4294967295ULL}, + {"4294967296", 10, 4294967296ULL}, + {"4294967297", 10, 4294967297ULL}, + {"9223372036854775807", 10, 9223372036854775807ULL}, + {"9223372036854775808", 10, 9223372036854775808ULL}, + {"9223372036854775809", 10, 9223372036854775809ULL}, + {"18446744073709551614", 10, 18446744073709551614ULL}, + {"18446744073709551615", 10, 18446744073709551615ULL}, + + {"00", 8, 00ULL}, + {"01", 8, 01ULL}, + {"0177", 8, 0177ULL}, + {"0200", 8, 0200ULL}, + {"0201", 8, 0201ULL}, + {"0377", 8, 0377ULL}, + {"0400", 8, 0400ULL}, + {"0401", 8, 0401ULL}, + {"077777", 8, 077777ULL}, + {"0100000", 8, 0100000ULL}, + {"0100001", 8, 0100001ULL}, + {"0177777", 8, 0177777ULL}, + {"0200000", 8, 0200000ULL}, + {"0200001", 8, 0200001ULL}, + {"017777777777", 8, 017777777777ULL}, + {"020000000000", 8, 020000000000ULL}, + {"020000000001", 8, 020000000001ULL}, + {"037777777777", 8, 037777777777ULL}, + {"040000000000", 8, 040000000000ULL}, + {"040000000001", 8, 040000000001ULL}, + {"0777777777777777777777", 8, 0777777777777777777777ULL}, + {"01000000000000000000000", 8, 01000000000000000000000ULL}, + {"01000000000000000000001", 8, 01000000000000000000001ULL}, + {"01777777777777777777776", 8, 01777777777777777777776ULL}, + {"01777777777777777777777", 8, 01777777777777777777777ULL}, + + {"0x0", 16, 0x0ULL}, + {"0x1", 16, 0x1ULL}, + {"0x7f", 16, 0x7fULL}, + {"0x80", 16, 0x80ULL}, + {"0x81", 16, 0x81ULL}, + {"0xff", 16, 0xffULL}, + {"0x100", 16, 0x100ULL}, + {"0x101", 16, 0x101ULL}, + {"0x7fff", 16, 0x7fffULL}, + {"0x8000", 16, 0x8000ULL}, + {"0x8001", 16, 0x8001ULL}, + {"0xffff", 16, 0xffffULL}, + {"0x10000", 16, 0x10000ULL}, + {"0x10001", 16, 0x10001ULL}, + {"0x7fffffff", 16, 0x7fffffffULL}, + {"0x80000000", 16, 0x80000000ULL}, + {"0x80000001", 16, 0x80000001ULL}, + {"0xffffffff", 16, 0xffffffffULL}, + {"0x100000000", 16, 0x100000000ULL}, + {"0x100000001", 16, 0x100000001ULL}, + {"0x7fffffffffffffff", 16, 0x7fffffffffffffffULL}, + {"0x8000000000000000", 16, 0x8000000000000000ULL}, + {"0x8000000000000001", 16, 0x8000000000000001ULL}, + {"0xfffffffffffffffe", 16, 0xfffffffffffffffeULL}, + {"0xffffffffffffffff", 16, 0xffffffffffffffffULL}, + + {"0\n", 0, 0ULL}, + }; + TEST_OK(kstrtoull, unsigned long long, "%llu", test_ull_ok); +} + +static void __init test_kstrtoull_fail(void) +{ + static DEFINE_TEST_FAIL(test_ull_fail) = { + {"", 0}, + {"", 8}, + {"", 10}, + {"", 16}, + {"\n", 0}, + {"\n", 8}, + {"\n", 10}, + {"\n", 16}, + {"\n0", 0}, + {"\n0", 8}, + {"\n0", 10}, + {"\n0", 16}, + {"+", 0}, + {"+", 8}, + {"+", 10}, + {"+", 16}, + {"-", 0}, + {"-", 8}, + {"-", 10}, + {"-", 16}, + {"0x", 0}, + {"0x", 16}, + {"0X", 0}, + {"0X", 16}, + {"0 ", 0}, + {"1+", 0}, + {"1-", 0}, + {" 2", 0}, + /* base autodetection */ + {"0x0z", 0}, + {"0z", 0}, + {"a", 0}, + /* digit >= base */ + {"2", 2}, + {"8", 8}, + {"a", 10}, + {"A", 10}, + {"g", 16}, + {"G", 16}, + /* overflow */ + {"10000000000000000000000000000000000000000000000000000000000000000", 2}, + {"2000000000000000000000", 8}, + {"18446744073709551616", 10}, + {"10000000000000000", 16}, + /* negative */ + {"-0", 0}, + {"-0", 8}, + {"-0", 10}, + {"-0", 16}, + {"-1", 0}, + {"-1", 8}, + {"-1", 10}, + {"-1", 16}, + /* sign is first character if any */ + {"-+1", 0}, + {"-+1", 8}, + {"-+1", 10}, + {"-+1", 16}, + /* nothing after \n */ + {"0\n0", 0}, + {"0\n0", 8}, + {"0\n0", 10}, + {"0\n0", 16}, + {"0\n+", 0}, + {"0\n+", 8}, + {"0\n+", 10}, + {"0\n+", 16}, + {"0\n-", 0}, + {"0\n-", 8}, + {"0\n-", 10}, + {"0\n-", 16}, + {"0\n ", 0}, + {"0\n ", 8}, + {"0\n ", 10}, + {"0\n ", 16}, + }; + TEST_FAIL(kstrtoull, unsigned long long, "%llu", test_ull_fail); +} + +static void __init test_kstrtoll_ok(void) +{ + DECLARE_TEST_OK(long long, struct test_ll); + static DEFINE_TEST_OK(struct test_ll, test_ll_ok) = { + {"0", 10, 0LL}, + {"1", 10, 1LL}, + {"127", 10, 127LL}, + {"128", 10, 128LL}, + {"129", 10, 129LL}, + {"255", 10, 255LL}, + {"256", 10, 256LL}, + {"257", 10, 257LL}, + {"32767", 10, 32767LL}, + {"32768", 10, 32768LL}, + {"32769", 10, 32769LL}, + {"65535", 10, 65535LL}, + {"65536", 10, 65536LL}, + {"65537", 10, 65537LL}, + {"2147483647", 10, 2147483647LL}, + {"2147483648", 10, 2147483648LL}, + {"2147483649", 10, 2147483649LL}, + {"4294967295", 10, 4294967295LL}, + {"4294967296", 10, 4294967296LL}, + {"4294967297", 10, 4294967297LL}, + {"9223372036854775807", 10, 9223372036854775807LL}, + + {"-1", 10, -1LL}, + {"-2", 10, -2LL}, + {"-9223372036854775808", 10, LLONG_MIN}, + }; + TEST_OK(kstrtoll, long long, "%lld", test_ll_ok); +} + +static void __init test_kstrtoll_fail(void) +{ + static DEFINE_TEST_FAIL(test_ll_fail) = { + {"9223372036854775808", 10}, + {"9223372036854775809", 10}, + {"18446744073709551614", 10}, + {"18446744073709551615", 10}, + {"-9223372036854775809", 10}, + {"-18446744073709551614", 10}, + {"-18446744073709551615", 10}, + /* negative zero isn't an integer in Linux */ + {"-0", 0}, + {"-0", 8}, + {"-0", 10}, + {"-0", 16}, + /* sign is first character if any */ + {"-+1", 0}, + {"-+1", 8}, + {"-+1", 10}, + {"-+1", 16}, + }; + TEST_FAIL(kstrtoll, long long, "%lld", test_ll_fail); +} + +static void __init test_kstrtou64_ok(void) +{ + DECLARE_TEST_OK(u64, struct test_u64); + static DEFINE_TEST_OK(struct test_u64, test_u64_ok) = { + {"0", 10, 0}, + {"1", 10, 1}, + {"126", 10, 126}, + {"127", 10, 127}, + {"128", 10, 128}, + {"129", 10, 129}, + {"254", 10, 254}, + {"255", 10, 255}, + {"256", 10, 256}, + {"257", 10, 257}, + {"32766", 10, 32766}, + {"32767", 10, 32767}, + {"32768", 10, 32768}, + {"32769", 10, 32769}, + {"65534", 10, 65534}, + {"65535", 10, 65535}, + {"65536", 10, 65536}, + {"65537", 10, 65537}, + {"2147483646", 10, 2147483646}, + {"2147483647", 10, 2147483647}, + {"2147483648", 10, 2147483648ULL}, + {"2147483649", 10, 2147483649ULL}, + {"4294967294", 10, 4294967294ULL}, + {"4294967295", 10, 4294967295ULL}, + {"4294967296", 10, 4294967296ULL}, + {"4294967297", 10, 4294967297ULL}, + {"9223372036854775806", 10, 9223372036854775806ULL}, + {"9223372036854775807", 10, 9223372036854775807ULL}, + {"9223372036854775808", 10, 9223372036854775808ULL}, + {"9223372036854775809", 10, 9223372036854775809ULL}, + {"18446744073709551614", 10, 18446744073709551614ULL}, + {"18446744073709551615", 10, 18446744073709551615ULL}, + }; + TEST_OK(kstrtou64, u64, "%llu", test_u64_ok); +} + +static void __init test_kstrtou64_fail(void) +{ + static DEFINE_TEST_FAIL(test_u64_fail) = { + {"-2", 10}, + {"-1", 10}, + {"18446744073709551616", 10}, + {"18446744073709551617", 10}, + }; + TEST_FAIL(kstrtou64, u64, "%llu", test_u64_fail); +} + +static void __init test_kstrtos64_ok(void) +{ + DECLARE_TEST_OK(s64, struct test_s64); + static DEFINE_TEST_OK(struct test_s64, test_s64_ok) = { + {"-128", 10, -128}, + {"-127", 10, -127}, + {"-1", 10, -1}, + {"0", 10, 0}, + {"1", 10, 1}, + {"126", 10, 126}, + {"127", 10, 127}, + {"128", 10, 128}, + {"129", 10, 129}, + {"254", 10, 254}, + {"255", 10, 255}, + {"256", 10, 256}, + {"257", 10, 257}, + {"32766", 10, 32766}, + {"32767", 10, 32767}, + {"32768", 10, 32768}, + {"32769", 10, 32769}, + {"65534", 10, 65534}, + {"65535", 10, 65535}, + {"65536", 10, 65536}, + {"65537", 10, 65537}, + {"2147483646", 10, 2147483646}, + {"2147483647", 10, 2147483647}, + {"2147483648", 10, 2147483648LL}, + {"2147483649", 10, 2147483649LL}, + {"4294967294", 10, 4294967294LL}, + {"4294967295", 10, 4294967295LL}, + {"4294967296", 10, 4294967296LL}, + {"4294967297", 10, 4294967297LL}, + {"9223372036854775806", 10, 9223372036854775806LL}, + {"9223372036854775807", 10, 9223372036854775807LL}, + }; + TEST_OK(kstrtos64, s64, "%lld", test_s64_ok); +} + +static void __init test_kstrtos64_fail(void) +{ + static DEFINE_TEST_FAIL(test_s64_fail) = { + {"9223372036854775808", 10}, + {"9223372036854775809", 10}, + {"18446744073709551614", 10}, + {"18446744073709551615", 10}, + {"18446744073709551616", 10}, + {"18446744073709551617", 10}, + }; + TEST_FAIL(kstrtos64, s64, "%lld", test_s64_fail); +} + +static void __init test_kstrtou32_ok(void) +{ + DECLARE_TEST_OK(u32, struct test_u32); + static DEFINE_TEST_OK(struct test_u32, test_u32_ok) = { + {"0", 10, 0}, + {"1", 10, 1}, + {"126", 10, 126}, + {"127", 10, 127}, + {"128", 10, 128}, + {"129", 10, 129}, + {"254", 10, 254}, + {"255", 10, 255}, + {"256", 10, 256}, + {"257", 10, 257}, + {"32766", 10, 32766}, + {"32767", 10, 32767}, + {"32768", 10, 32768}, + {"32769", 10, 32769}, + {"65534", 10, 65534}, + {"65535", 10, 65535}, + {"65536", 10, 65536}, + {"65537", 10, 65537}, + {"2147483646", 10, 2147483646}, + {"2147483647", 10, 2147483647}, + {"2147483648", 10, 2147483648U}, + {"2147483649", 10, 2147483649U}, + {"4294967294", 10, 4294967294U}, + {"4294967295", 10, 4294967295U}, + }; + TEST_OK(kstrtou32, u32, "%u", test_u32_ok); +} + +static void __init test_kstrtou32_fail(void) +{ + static DEFINE_TEST_FAIL(test_u32_fail) = { + {"-2", 10}, + {"-1", 10}, + {"4294967296", 10}, + {"4294967297", 10}, + {"9223372036854775806", 10}, + {"9223372036854775807", 10}, + {"9223372036854775808", 10}, + {"9223372036854775809", 10}, + {"18446744073709551614", 10}, + {"18446744073709551615", 10}, + {"18446744073709551616", 10}, + {"18446744073709551617", 10}, + }; + TEST_FAIL(kstrtou32, u32, "%u", test_u32_fail); +} + +static void __init test_kstrtos32_ok(void) +{ + DECLARE_TEST_OK(s32, struct test_s32); + static DEFINE_TEST_OK(struct test_s32, test_s32_ok) = { + {"-128", 10, -128}, + {"-127", 10, -127}, + {"-1", 10, -1}, + {"0", 10, 0}, + {"1", 10, 1}, + {"126", 10, 126}, + {"127", 10, 127}, + {"128", 10, 128}, + {"129", 10, 129}, + {"254", 10, 254}, + {"255", 10, 255}, + {"256", 10, 256}, + {"257", 10, 257}, + {"32766", 10, 32766}, + {"32767", 10, 32767}, + {"32768", 10, 32768}, + {"32769", 10, 32769}, + {"65534", 10, 65534}, + {"65535", 10, 65535}, + {"65536", 10, 65536}, + {"65537", 10, 65537}, + {"2147483646", 10, 2147483646}, + {"2147483647", 10, 2147483647}, + }; + TEST_OK(kstrtos32, s32, "%d", test_s32_ok); +} + +static void __init test_kstrtos32_fail(void) +{ + static DEFINE_TEST_FAIL(test_s32_fail) = { + {"2147483648", 10}, + {"2147483649", 10}, + {"4294967294", 10}, + {"4294967295", 10}, + {"4294967296", 10}, + {"4294967297", 10}, + {"9223372036854775806", 10}, + {"9223372036854775807", 10}, + {"9223372036854775808", 10}, + {"9223372036854775809", 10}, + {"18446744073709551614", 10}, + {"18446744073709551615", 10}, + {"18446744073709551616", 10}, + {"18446744073709551617", 10}, + }; + TEST_FAIL(kstrtos32, s32, "%d", test_s32_fail); +} + +static void __init test_kstrtou16_ok(void) +{ + DECLARE_TEST_OK(u16, struct test_u16); + static DEFINE_TEST_OK(struct test_u16, test_u16_ok) = { + {"0", 10, 0}, + {"1", 10, 1}, + {"126", 10, 126}, + {"127", 10, 127}, + {"128", 10, 128}, + {"129", 10, 129}, + {"254", 10, 254}, + {"255", 10, 255}, + {"256", 10, 256}, + {"257", 10, 257}, + {"32766", 10, 32766}, + {"32767", 10, 32767}, + {"32768", 10, 32768}, + {"32769", 10, 32769}, + {"65534", 10, 65534}, + {"65535", 10, 65535}, + }; + TEST_OK(kstrtou16, u16, "%hu", test_u16_ok); +} + +static void __init test_kstrtou16_fail(void) +{ + static DEFINE_TEST_FAIL(test_u16_fail) = { + {"-2", 10}, + {"-1", 10}, + {"65536", 10}, + {"65537", 10}, + {"2147483646", 10}, + {"2147483647", 10}, + {"2147483648", 10}, + {"2147483649", 10}, + {"4294967294", 10}, + {"4294967295", 10}, + {"4294967296", 10}, + {"4294967297", 10}, + {"9223372036854775806", 10}, + {"9223372036854775807", 10}, + {"9223372036854775808", 10}, + {"9223372036854775809", 10}, + {"18446744073709551614", 10}, + {"18446744073709551615", 10}, + {"18446744073709551616", 10}, + {"18446744073709551617", 10}, + }; + TEST_FAIL(kstrtou16, u16, "%hu", test_u16_fail); +} + +static void __init test_kstrtos16_ok(void) +{ + DECLARE_TEST_OK(s16, struct test_s16); + static DEFINE_TEST_OK(struct test_s16, test_s16_ok) = { + {"-130", 10, -130}, + {"-129", 10, -129}, + {"-128", 10, -128}, + {"-127", 10, -127}, + {"-1", 10, -1}, + {"0", 10, 0}, + {"1", 10, 1}, + {"126", 10, 126}, + {"127", 10, 127}, + {"128", 10, 128}, + {"129", 10, 129}, + {"254", 10, 254}, + {"255", 10, 255}, + {"256", 10, 256}, + {"257", 10, 257}, + {"32766", 10, 32766}, + {"32767", 10, 32767}, + }; + TEST_OK(kstrtos16, s16, "%hd", test_s16_ok); +} + +static void __init test_kstrtos16_fail(void) +{ + static DEFINE_TEST_FAIL(test_s16_fail) = { + {"32768", 10}, + {"32769", 10}, + {"65534", 10}, + {"65535", 10}, + {"65536", 10}, + {"65537", 10}, + {"2147483646", 10}, + {"2147483647", 10}, + {"2147483648", 10}, + {"2147483649", 10}, + {"4294967294", 10}, + {"4294967295", 10}, + {"4294967296", 10}, + {"4294967297", 10}, + {"9223372036854775806", 10}, + {"9223372036854775807", 10}, + {"9223372036854775808", 10}, + {"9223372036854775809", 10}, + {"18446744073709551614", 10}, + {"18446744073709551615", 10}, + {"18446744073709551616", 10}, + {"18446744073709551617", 10}, + }; + TEST_FAIL(kstrtos16, s16, "%hd", test_s16_fail); +} + +static void __init test_kstrtou8_ok(void) +{ + DECLARE_TEST_OK(u8, struct test_u8); + static DEFINE_TEST_OK(struct test_u8, test_u8_ok) = { + {"0", 10, 0}, + {"1", 10, 1}, + {"126", 10, 126}, + {"127", 10, 127}, + {"128", 10, 128}, + {"129", 10, 129}, + {"254", 10, 254}, + {"255", 10, 255}, + }; + TEST_OK(kstrtou8, u8, "%hhu", test_u8_ok); +} + +static void __init test_kstrtou8_fail(void) +{ + static DEFINE_TEST_FAIL(test_u8_fail) = { + {"-2", 10}, + {"-1", 10}, + {"256", 10}, + {"257", 10}, + {"32766", 10}, + {"32767", 10}, + {"32768", 10}, + {"32769", 10}, + {"65534", 10}, + {"65535", 10}, + {"65536", 10}, + {"65537", 10}, + {"2147483646", 10}, + {"2147483647", 10}, + {"2147483648", 10}, + {"2147483649", 10}, + {"4294967294", 10}, + {"4294967295", 10}, + {"4294967296", 10}, + {"4294967297", 10}, + {"9223372036854775806", 10}, + {"9223372036854775807", 10}, + {"9223372036854775808", 10}, + {"9223372036854775809", 10}, + {"18446744073709551614", 10}, + {"18446744073709551615", 10}, + {"18446744073709551616", 10}, + {"18446744073709551617", 10}, + }; + TEST_FAIL(kstrtou8, u8, "%hhu", test_u8_fail); +} + +static void __init test_kstrtos8_ok(void) +{ + DECLARE_TEST_OK(s8, struct test_s8); + static DEFINE_TEST_OK(struct test_s8, test_s8_ok) = { + {"-128", 10, -128}, + {"-127", 10, -127}, + {"-1", 10, -1}, + {"0", 10, 0}, + {"1", 10, 1}, + {"126", 10, 126}, + {"127", 10, 127}, + }; + TEST_OK(kstrtos8, s8, "%hhd", test_s8_ok); +} + +static void __init test_kstrtos8_fail(void) +{ + static DEFINE_TEST_FAIL(test_s8_fail) = { + {"-130", 10}, + {"-129", 10}, + {"128", 10}, + {"129", 10}, + {"254", 10}, + {"255", 10}, + {"256", 10}, + {"257", 10}, + {"32766", 10}, + {"32767", 10}, + {"32768", 10}, + {"32769", 10}, + {"65534", 10}, + {"65535", 10}, + {"65536", 10}, + {"65537", 10}, + {"2147483646", 10}, + {"2147483647", 10}, + {"2147483648", 10}, + {"2147483649", 10}, + {"4294967294", 10}, + {"4294967295", 10}, + {"4294967296", 10}, + {"4294967297", 10}, + {"9223372036854775806", 10}, + {"9223372036854775807", 10}, + {"9223372036854775808", 10}, + {"9223372036854775809", 10}, + {"18446744073709551614", 10}, + {"18446744073709551615", 10}, + {"18446744073709551616", 10}, + {"18446744073709551617", 10}, + }; + TEST_FAIL(kstrtos8, s8, "%hhd", test_s8_fail); +} + +static int __init test_kstrtox_init(void) +{ + test_kstrtoull_ok(); + test_kstrtoull_fail(); + test_kstrtoll_ok(); + test_kstrtoll_fail(); + + test_kstrtou64_ok(); + test_kstrtou64_fail(); + test_kstrtos64_ok(); + test_kstrtos64_fail(); + + test_kstrtou32_ok(); + test_kstrtou32_fail(); + test_kstrtos32_ok(); + test_kstrtos32_fail(); + + test_kstrtou16_ok(); + test_kstrtou16_fail(); + test_kstrtos16_ok(); + test_kstrtos16_fail(); + + test_kstrtou8_ok(); + test_kstrtou8_fail(); + test_kstrtos8_ok(); + test_kstrtos8_fail(); + return -EINVAL; +} +module_init(test_kstrtox_init); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/textsearch.c b/lib/textsearch.c index d608331b3e47..e0cc0146ae62 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -13,7 +13,7 @@ * * INTRODUCTION * - * The textsearch infrastructure provides text searching facitilies for + * The textsearch infrastructure provides text searching facilities for * both linear and non-linear data. Individual search algorithms are * implemented in modules and chosen by the user. * @@ -43,7 +43,7 @@ * to the algorithm to store persistent variables. * (4) Core eventually resets the search offset and forwards the find() * request to the algorithm. - * (5) Algorithm calls get_next_block() provided by the user continously + * (5) Algorithm calls get_next_block() provided by the user continuously * to fetch the data to be searched in block by block. * (6) Algorithm invokes finish() after the last call to get_next_block * to clean up any leftovers from get_next_block. (Optional) @@ -58,15 +58,15 @@ * the pattern to look for and flags. As a flag, you can set TS_IGNORECASE * to perform case insensitive matching. But it might slow down * performance of algorithm, so you should use it at own your risk. - * The returned configuration may then be used for an arbitary + * The returned configuration may then be used for an arbitrary * amount of times and even in parallel as long as a separate struct * ts_state variable is provided to every instance. * * The actual search is performed by either calling textsearch_find_- * continuous() for linear data or by providing an own get_next_block() * implementation and calling textsearch_find(). Both functions return - * the position of the first occurrence of the patern or UINT_MAX if - * no match was found. Subsequent occurences can be found by calling + * the position of the first occurrence of the pattern or UINT_MAX if + * no match was found. Subsequent occurrences can be found by calling * textsearch_next() regardless of the linearity of the data. * * Once you're done using a configuration it must be given back via diff --git a/lib/timerqueue.c b/lib/timerqueue.c new file mode 100644 index 000000000000..191176a43e9a --- /dev/null +++ b/lib/timerqueue.c @@ -0,0 +1,107 @@ +/* + * Generic Timer-queue + * + * Manages a simple queue of timers, ordered by expiration time. + * Uses rbtrees for quick list adds and expiration. + * + * NOTE: All of the following functions need to be serialized + * to avoid races. No locking is done by this library code. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/timerqueue.h> +#include <linux/rbtree.h> +#include <linux/module.h> + +/** + * timerqueue_add - Adds timer to timerqueue. + * + * @head: head of timerqueue + * @node: timer node to be added + * + * Adds the timer node to the timerqueue, sorted by the + * node's expires value. + */ +void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) +{ + struct rb_node **p = &head->head.rb_node; + struct rb_node *parent = NULL; + struct timerqueue_node *ptr; + + /* Make sure we don't add nodes that are already added */ + WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node)); + + while (*p) { + parent = *p; + ptr = rb_entry(parent, struct timerqueue_node, node); + if (node->expires.tv64 < ptr->expires.tv64) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&node->node, parent, p); + rb_insert_color(&node->node, &head->head); + + if (!head->next || node->expires.tv64 < head->next->expires.tv64) + head->next = node; +} +EXPORT_SYMBOL_GPL(timerqueue_add); + +/** + * timerqueue_del - Removes a timer from the timerqueue. + * + * @head: head of timerqueue + * @node: timer node to be removed + * + * Removes the timer node from the timerqueue. + */ +void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) +{ + WARN_ON_ONCE(RB_EMPTY_NODE(&node->node)); + + /* update next pointer */ + if (head->next == node) { + struct rb_node *rbn = rb_next(&node->node); + + head->next = rbn ? + rb_entry(rbn, struct timerqueue_node, node) : NULL; + } + rb_erase(&node->node, &head->head); + RB_CLEAR_NODE(&node->node); +} +EXPORT_SYMBOL_GPL(timerqueue_del); + +/** + * timerqueue_iterate_next - Returns the timer after the provided timer + * + * @node: Pointer to a timer. + * + * Provides the timer that is after the given node. This is used, when + * necessary, to iterate through the list of timers in a timer list + * without modifying the list. + */ +struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node) +{ + struct rb_node *next; + + if (!node) + return NULL; + next = rb_next(&node->node); + if (!next) + return NULL; + return container_of(next, struct timerqueue_node, node); +} +EXPORT_SYMBOL_GPL(timerqueue_iterate_next); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index c150d3dafff4..bc0ac6b333dc 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -120,147 +120,6 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base) } EXPORT_SYMBOL(simple_strtoll); -/** - * strict_strtoul - convert a string to an unsigned long strictly - * @cp: The string to be converted - * @base: The number base to use - * @res: The converted result value - * - * strict_strtoul converts a string to an unsigned long only if the - * string is really an unsigned long string, any string containing - * any invalid char at the tail will be rejected and -EINVAL is returned, - * only a newline char at the tail is acceptible because people generally - * change a module parameter in the following way: - * - * echo 1024 > /sys/module/e1000/parameters/copybreak - * - * echo will append a newline to the tail. - * - * It returns 0 if conversion is successful and *res is set to the converted - * value, otherwise it returns -EINVAL and *res is set to 0. - * - * simple_strtoul just ignores the successive invalid characters and - * return the converted value of prefix part of the string. - */ -int strict_strtoul(const char *cp, unsigned int base, unsigned long *res) -{ - char *tail; - unsigned long val; - - *res = 0; - if (!*cp) - return -EINVAL; - - val = simple_strtoul(cp, &tail, base); - if (tail == cp) - return -EINVAL; - - if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) { - *res = val; - return 0; - } - - return -EINVAL; -} -EXPORT_SYMBOL(strict_strtoul); - -/** - * strict_strtol - convert a string to a long strictly - * @cp: The string to be converted - * @base: The number base to use - * @res: The converted result value - * - * strict_strtol is similiar to strict_strtoul, but it allows the first - * character of a string is '-'. - * - * It returns 0 if conversion is successful and *res is set to the converted - * value, otherwise it returns -EINVAL and *res is set to 0. - */ -int strict_strtol(const char *cp, unsigned int base, long *res) -{ - int ret; - if (*cp == '-') { - ret = strict_strtoul(cp + 1, base, (unsigned long *)res); - if (!ret) - *res = -(*res); - } else { - ret = strict_strtoul(cp, base, (unsigned long *)res); - } - - return ret; -} -EXPORT_SYMBOL(strict_strtol); - -/** - * strict_strtoull - convert a string to an unsigned long long strictly - * @cp: The string to be converted - * @base: The number base to use - * @res: The converted result value - * - * strict_strtoull converts a string to an unsigned long long only if the - * string is really an unsigned long long string, any string containing - * any invalid char at the tail will be rejected and -EINVAL is returned, - * only a newline char at the tail is acceptible because people generally - * change a module parameter in the following way: - * - * echo 1024 > /sys/module/e1000/parameters/copybreak - * - * echo will append a newline to the tail of the string. - * - * It returns 0 if conversion is successful and *res is set to the converted - * value, otherwise it returns -EINVAL and *res is set to 0. - * - * simple_strtoull just ignores the successive invalid characters and - * return the converted value of prefix part of the string. - */ -int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res) -{ - char *tail; - unsigned long long val; - - *res = 0; - if (!*cp) - return -EINVAL; - - val = simple_strtoull(cp, &tail, base); - if (tail == cp) - return -EINVAL; - if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) { - *res = val; - return 0; - } - - return -EINVAL; -} -EXPORT_SYMBOL(strict_strtoull); - -/** - * strict_strtoll - convert a string to a long long strictly - * @cp: The string to be converted - * @base: The number base to use - * @res: The converted result value - * - * strict_strtoll is similiar to strict_strtoull, but it allows the first - * character of a string is '-'. - * - * It returns 0 if conversion is successful and *res is set to the converted - * value, otherwise it returns -EINVAL and *res is set to 0. - */ -int strict_strtoll(const char *cp, unsigned int base, long long *res) -{ - int ret; - if (*cp == '-') { - ret = strict_strtoull(cp + 1, base, (unsigned long long *)res); - if (!ret) - *res = -(*res); - } else { - ret = strict_strtoull(cp, base, (unsigned long long *)res); - } - - return ret; -} -EXPORT_SYMBOL(strict_strtoll); - static noinline_for_stack int skip_atoi(const char **s) { @@ -574,7 +433,9 @@ char *symbol_string(char *buf, char *end, void *ptr, unsigned long value = (unsigned long) ptr; #ifdef CONFIG_KALLSYMS char sym[KSYM_SYMBOL_LEN]; - if (ext != 'f' && ext != 's') + if (ext == 'B') + sprint_backtrace(sym, value); + else if (ext != 'f' && ext != 's') sprint_symbol(sym, value); else kallsyms_lookup(value, NULL, NULL, NULL, sym); @@ -936,6 +797,8 @@ char *uuid_string(char *buf, char *end, const u8 *addr, return string(buf, end, uuid, spec); } +int kptr_restrict = 1; + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -947,6 +810,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr, * - 'f' For simple symbolic function names without offset * - 'S' For symbolic direct pointers with offset * - 's' For symbolic direct pointers without offset + * - 'B' For backtraced symbolic direct pointers with offset * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] * - 'M' For a 6-byte MAC address, it prints the address in the @@ -979,6 +843,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr, * Implements a "recursive vsnprintf". * Do not use this feature without some mechanism to verify the * correctness of the format string and va_list arguments. + * - 'K' For a kernel pointer that should be hidden from unprivileged users * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a @@ -988,7 +853,7 @@ static noinline_for_stack char *pointer(const char *fmt, char *buf, char *end, void *ptr, struct printf_spec spec) { - if (!ptr) { + if (!ptr && *fmt != 'K') { /* * Print (null) with the same width as a pointer so it makes * tabular output look nice. @@ -1005,6 +870,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, /* Fallthrough */ case 'S': case 's': + case 'B': return symbol_string(buf, end, ptr, spec, *fmt); case 'R': case 'r': @@ -1035,6 +901,21 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return buf + vsnprintf(buf, end - buf, ((struct va_format *)ptr)->fmt, *(((struct va_format *)ptr)->va)); + case 'K': + /* + * %pK cannot be used in IRQ context because its test + * for CAP_SYSLOG would be meaningless. + */ + if (in_irq() || in_serving_softirq() || in_nmi()) { + if (spec.field_width == -1) + spec.field_width = 2 * sizeof(void *); + return string(buf, end, "pK-error", spec); + } + if (!((kptr_restrict == 0) || + (kptr_restrict == 1 && + has_capability_noaudit(current, CAP_SYSLOG)))) + ptr = NULL; + break; } spec.flags |= SMALL; if (spec.field_width == -1) { @@ -1257,6 +1138,7 @@ qualifier: * %ps output the name of a text symbol without offset * %pF output the name of a function pointer with its offset * %pf output the name of a function pointer without its offset + * %pB output the name of a backtrace symbol with its offset * %pR output the address range in a struct resource with decoded flags * %pr output the address range in a struct resource with raw flags * %pM output a 6-byte MAC address with colons @@ -1451,7 +1333,7 @@ EXPORT_SYMBOL(vsnprintf); * @args: Arguments for the format string * * The return value is the number of characters which have been written into - * the @buf not including the trailing '\0'. If @size is <= 0 the function + * the @buf not including the trailing '\0'. If @size is == 0 the function * returns 0. * * Call this function if you are already dealing with a va_list. @@ -1465,7 +1347,11 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) i = vsnprintf(buf, size, fmt, args); - return (i >= size) ? (size - 1) : i; + if (likely(i < size)) + return i; + if (size != 0) + return size - 1; + return 0; } EXPORT_SYMBOL(vscnprintf); @@ -1513,14 +1399,10 @@ int scnprintf(char *buf, size_t size, const char *fmt, ...) int i; va_start(args, fmt); - i = vsnprintf(buf, size, fmt, args); + i = vscnprintf(buf, size, fmt, args); va_end(args); - if (likely(i < size)) - return i; - if (size != 0) - return size - 1; - return 0; + return i; } EXPORT_SYMBOL(scnprintf); diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig new file mode 100644 index 000000000000..60a6088d0e5e --- /dev/null +++ b/lib/xz/Kconfig @@ -0,0 +1,59 @@ +config XZ_DEC + tristate "XZ decompression support" + select CRC32 + help + LZMA2 compression algorithm and BCJ filters are supported using + the .xz file format as the container. For integrity checking, + CRC32 is supported. See Documentation/xz.txt for more information. + +config XZ_DEC_X86 + bool "x86 BCJ filter decoder" if EXPERT + default y + depends on XZ_DEC + select XZ_DEC_BCJ + +config XZ_DEC_POWERPC + bool "PowerPC BCJ filter decoder" if EXPERT + default y + depends on XZ_DEC + select XZ_DEC_BCJ + +config XZ_DEC_IA64 + bool "IA-64 BCJ filter decoder" if EXPERT + default y + depends on XZ_DEC + select XZ_DEC_BCJ + +config XZ_DEC_ARM + bool "ARM BCJ filter decoder" if EXPERT + default y + depends on XZ_DEC + select XZ_DEC_BCJ + +config XZ_DEC_ARMTHUMB + bool "ARM-Thumb BCJ filter decoder" if EXPERT + default y + depends on XZ_DEC + select XZ_DEC_BCJ + +config XZ_DEC_SPARC + bool "SPARC BCJ filter decoder" if EXPERT + default y + depends on XZ_DEC + select XZ_DEC_BCJ + +config XZ_DEC_BCJ + bool + default n + +config XZ_DEC_TEST + tristate "XZ decompressor tester" + default n + depends on XZ_DEC + help + This allows passing .xz files to the in-kernel XZ decoder via + a character special file. It calculates CRC32 of the decompressed + data and writes diagnostics to the system log. + + Unless you are developing the XZ decoder, you don't need this + and should say N. diff --git a/lib/xz/Makefile b/lib/xz/Makefile new file mode 100644 index 000000000000..a7fa7693f0f3 --- /dev/null +++ b/lib/xz/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_XZ_DEC) += xz_dec.o +xz_dec-y := xz_dec_syms.o xz_dec_stream.o xz_dec_lzma2.o +xz_dec-$(CONFIG_XZ_DEC_BCJ) += xz_dec_bcj.o + +obj-$(CONFIG_XZ_DEC_TEST) += xz_dec_test.o diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c new file mode 100644 index 000000000000..34532d14fd4c --- /dev/null +++ b/lib/xz/xz_crc32.c @@ -0,0 +1,59 @@ +/* + * CRC32 using the polynomial from IEEE-802.3 + * + * Authors: Lasse Collin <lasse.collin@tukaani.org> + * Igor Pavlov <http://7-zip.org/> + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +/* + * This is not the fastest implementation, but it is pretty compact. + * The fastest versions of xz_crc32() on modern CPUs without hardware + * accelerated CRC instruction are 3-5 times as fast as this version, + * but they are bigger and use more memory for the lookup table. + */ + +#include "xz_private.h" + +/* + * STATIC_RW_DATA is used in the pre-boot environment on some architectures. + * See <linux/decompress/mm.h> for details. + */ +#ifndef STATIC_RW_DATA +# define STATIC_RW_DATA static +#endif + +STATIC_RW_DATA uint32_t xz_crc32_table[256]; + +XZ_EXTERN void xz_crc32_init(void) +{ + const uint32_t poly = 0xEDB88320; + + uint32_t i; + uint32_t j; + uint32_t r; + + for (i = 0; i < 256; ++i) { + r = i; + for (j = 0; j < 8; ++j) + r = (r >> 1) ^ (poly & ~((r & 1) - 1)); + + xz_crc32_table[i] = r; + } + + return; +} + +XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc) +{ + crc = ~crc; + + while (size != 0) { + crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8); + --size; + } + + return ~crc; +} diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c new file mode 100644 index 000000000000..e51e2558ca9d --- /dev/null +++ b/lib/xz/xz_dec_bcj.c @@ -0,0 +1,561 @@ +/* + * Branch/Call/Jump (BCJ) filter decoders + * + * Authors: Lasse Collin <lasse.collin@tukaani.org> + * Igor Pavlov <http://7-zip.org/> + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" + +/* + * The rest of the file is inside this ifdef. It makes things a little more + * convenient when building without support for any BCJ filters. + */ +#ifdef XZ_DEC_BCJ + +struct xz_dec_bcj { + /* Type of the BCJ filter being used */ + enum { + BCJ_X86 = 4, /* x86 or x86-64 */ + BCJ_POWERPC = 5, /* Big endian only */ + BCJ_IA64 = 6, /* Big or little endian */ + BCJ_ARM = 7, /* Little endian only */ + BCJ_ARMTHUMB = 8, /* Little endian only */ + BCJ_SPARC = 9 /* Big or little endian */ + } type; + + /* + * Return value of the next filter in the chain. We need to preserve + * this information across calls, because we must not call the next + * filter anymore once it has returned XZ_STREAM_END. + */ + enum xz_ret ret; + + /* True if we are operating in single-call mode. */ + bool single_call; + + /* + * Absolute position relative to the beginning of the uncompressed + * data (in a single .xz Block). We care only about the lowest 32 + * bits so this doesn't need to be uint64_t even with big files. + */ + uint32_t pos; + + /* x86 filter state */ + uint32_t x86_prev_mask; + + /* Temporary space to hold the variables from struct xz_buf */ + uint8_t *out; + size_t out_pos; + size_t out_size; + + struct { + /* Amount of already filtered data in the beginning of buf */ + size_t filtered; + + /* Total amount of data currently stored in buf */ + size_t size; + + /* + * Buffer to hold a mix of filtered and unfiltered data. This + * needs to be big enough to hold Alignment + 2 * Look-ahead: + * + * Type Alignment Look-ahead + * x86 1 4 + * PowerPC 4 0 + * IA-64 16 0 + * ARM 4 0 + * ARM-Thumb 2 2 + * SPARC 4 0 + */ + uint8_t buf[16]; + } temp; +}; + +#ifdef XZ_DEC_X86 +/* + * This is used to test the most significant byte of a memory address + * in an x86 instruction. + */ +static inline int bcj_x86_test_msbyte(uint8_t b) +{ + return b == 0x00 || b == 0xFF; +} + +static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + static const bool mask_to_allowed_status[8] + = { true, true, true, false, true, false, false, false }; + + static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 }; + + size_t i; + size_t prev_pos = (size_t)-1; + uint32_t prev_mask = s->x86_prev_mask; + uint32_t src; + uint32_t dest; + uint32_t j; + uint8_t b; + + if (size <= 4) + return 0; + + size -= 4; + for (i = 0; i < size; ++i) { + if ((buf[i] & 0xFE) != 0xE8) + continue; + + prev_pos = i - prev_pos; + if (prev_pos > 3) { + prev_mask = 0; + } else { + prev_mask = (prev_mask << (prev_pos - 1)) & 7; + if (prev_mask != 0) { + b = buf[i + 4 - mask_to_bit_num[prev_mask]]; + if (!mask_to_allowed_status[prev_mask] + || bcj_x86_test_msbyte(b)) { + prev_pos = i; + prev_mask = (prev_mask << 1) | 1; + continue; + } + } + } + + prev_pos = i; + + if (bcj_x86_test_msbyte(buf[i + 4])) { + src = get_unaligned_le32(buf + i + 1); + while (true) { + dest = src - (s->pos + (uint32_t)i + 5); + if (prev_mask == 0) + break; + + j = mask_to_bit_num[prev_mask] * 8; + b = (uint8_t)(dest >> (24 - j)); + if (!bcj_x86_test_msbyte(b)) + break; + + src = dest ^ (((uint32_t)1 << (32 - j)) - 1); + } + + dest &= 0x01FFFFFF; + dest |= (uint32_t)0 - (dest & 0x01000000); + put_unaligned_le32(dest, buf + i + 1); + i += 4; + } else { + prev_mask = (prev_mask << 1) | 1; + } + } + + prev_pos = i - prev_pos; + s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1); + return i; +} +#endif + +#ifdef XZ_DEC_POWERPC +static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t instr; + + for (i = 0; i + 4 <= size; i += 4) { + instr = get_unaligned_be32(buf + i); + if ((instr & 0xFC000003) == 0x48000001) { + instr &= 0x03FFFFFC; + instr -= s->pos + (uint32_t)i; + instr &= 0x03FFFFFC; + instr |= 0x48000001; + put_unaligned_be32(instr, buf + i); + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_IA64 +static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + static const uint8_t branch_table[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0 + }; + + /* + * The local variables take a little bit stack space, but it's less + * than what LZMA2 decoder takes, so it doesn't make sense to reduce + * stack usage here without doing that for the LZMA2 decoder too. + */ + + /* Loop counters */ + size_t i; + size_t j; + + /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ + uint32_t slot; + + /* Bitwise offset of the instruction indicated by slot */ + uint32_t bit_pos; + + /* bit_pos split into byte and bit parts */ + uint32_t byte_pos; + uint32_t bit_res; + + /* Address part of an instruction */ + uint32_t addr; + + /* Mask used to detect which instructions to convert */ + uint32_t mask; + + /* 41-bit instruction stored somewhere in the lowest 48 bits */ + uint64_t instr; + + /* Instruction normalized with bit_res for easier manipulation */ + uint64_t norm; + + for (i = 0; i + 16 <= size; i += 16) { + mask = branch_table[buf[i] & 0x1F]; + for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) { + if (((mask >> slot) & 1) == 0) + continue; + + byte_pos = bit_pos >> 3; + bit_res = bit_pos & 7; + instr = 0; + for (j = 0; j < 6; ++j) + instr |= (uint64_t)(buf[i + j + byte_pos]) + << (8 * j); + + norm = instr >> bit_res; + + if (((norm >> 37) & 0x0F) == 0x05 + && ((norm >> 9) & 0x07) == 0) { + addr = (norm >> 13) & 0x0FFFFF; + addr |= ((uint32_t)(norm >> 36) & 1) << 20; + addr <<= 4; + addr -= s->pos + (uint32_t)i; + addr >>= 4; + + norm &= ~((uint64_t)0x8FFFFF << 13); + norm |= (uint64_t)(addr & 0x0FFFFF) << 13; + norm |= (uint64_t)(addr & 0x100000) + << (36 - 20); + + instr &= (1 << bit_res) - 1; + instr |= norm << bit_res; + + for (j = 0; j < 6; j++) + buf[i + j + byte_pos] + = (uint8_t)(instr >> (8 * j)); + } + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_ARM +static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t addr; + + for (i = 0; i + 4 <= size; i += 4) { + if (buf[i + 3] == 0xEB) { + addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8) + | ((uint32_t)buf[i + 2] << 16); + addr <<= 2; + addr -= s->pos + (uint32_t)i + 8; + addr >>= 2; + buf[i] = (uint8_t)addr; + buf[i + 1] = (uint8_t)(addr >> 8); + buf[i + 2] = (uint8_t)(addr >> 16); + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_ARMTHUMB +static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t addr; + + for (i = 0; i + 4 <= size; i += 2) { + if ((buf[i + 1] & 0xF8) == 0xF0 + && (buf[i + 3] & 0xF8) == 0xF8) { + addr = (((uint32_t)buf[i + 1] & 0x07) << 19) + | ((uint32_t)buf[i] << 11) + | (((uint32_t)buf[i + 3] & 0x07) << 8) + | (uint32_t)buf[i + 2]; + addr <<= 1; + addr -= s->pos + (uint32_t)i + 4; + addr >>= 1; + buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07)); + buf[i] = (uint8_t)(addr >> 11); + buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07)); + buf[i + 2] = (uint8_t)addr; + i += 2; + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_SPARC +static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t instr; + + for (i = 0; i + 4 <= size; i += 4) { + instr = get_unaligned_be32(buf + i); + if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) { + instr <<= 2; + instr -= s->pos + (uint32_t)i; + instr >>= 2; + instr = ((uint32_t)0x40000000 - (instr & 0x400000)) + | 0x40000000 | (instr & 0x3FFFFF); + put_unaligned_be32(instr, buf + i); + } + } + + return i; +} +#endif + +/* + * Apply the selected BCJ filter. Update *pos and s->pos to match the amount + * of data that got filtered. + * + * NOTE: This is implemented as a switch statement to avoid using function + * pointers, which could be problematic in the kernel boot code, which must + * avoid pointers to static data (at least on x86). + */ +static void bcj_apply(struct xz_dec_bcj *s, + uint8_t *buf, size_t *pos, size_t size) +{ + size_t filtered; + + buf += *pos; + size -= *pos; + + switch (s->type) { +#ifdef XZ_DEC_X86 + case BCJ_X86: + filtered = bcj_x86(s, buf, size); + break; +#endif +#ifdef XZ_DEC_POWERPC + case BCJ_POWERPC: + filtered = bcj_powerpc(s, buf, size); + break; +#endif +#ifdef XZ_DEC_IA64 + case BCJ_IA64: + filtered = bcj_ia64(s, buf, size); + break; +#endif +#ifdef XZ_DEC_ARM + case BCJ_ARM: + filtered = bcj_arm(s, buf, size); + break; +#endif +#ifdef XZ_DEC_ARMTHUMB + case BCJ_ARMTHUMB: + filtered = bcj_armthumb(s, buf, size); + break; +#endif +#ifdef XZ_DEC_SPARC + case BCJ_SPARC: + filtered = bcj_sparc(s, buf, size); + break; +#endif + default: + /* Never reached but silence compiler warnings. */ + filtered = 0; + break; + } + + *pos += filtered; + s->pos += filtered; +} + +/* + * Flush pending filtered data from temp to the output buffer. + * Move the remaining mixture of possibly filtered and unfiltered + * data to the beginning of temp. + */ +static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b) +{ + size_t copy_size; + + copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos); + memcpy(b->out + b->out_pos, s->temp.buf, copy_size); + b->out_pos += copy_size; + + s->temp.filtered -= copy_size; + s->temp.size -= copy_size; + memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size); +} + +/* + * The BCJ filter functions are primitive in sense that they process the + * data in chunks of 1-16 bytes. To hide this issue, this function does + * some buffering. + */ +XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, + struct xz_dec_lzma2 *lzma2, + struct xz_buf *b) +{ + size_t out_start; + + /* + * Flush pending already filtered data to the output buffer. Return + * immediatelly if we couldn't flush everything, or if the next + * filter in the chain had already returned XZ_STREAM_END. + */ + if (s->temp.filtered > 0) { + bcj_flush(s, b); + if (s->temp.filtered > 0) + return XZ_OK; + + if (s->ret == XZ_STREAM_END) + return XZ_STREAM_END; + } + + /* + * If we have more output space than what is currently pending in + * temp, copy the unfiltered data from temp to the output buffer + * and try to fill the output buffer by decoding more data from the + * next filter in the chain. Apply the BCJ filter on the new data + * in the output buffer. If everything cannot be filtered, copy it + * to temp and rewind the output buffer position accordingly. + */ + if (s->temp.size < b->out_size - b->out_pos) { + out_start = b->out_pos; + memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size); + b->out_pos += s->temp.size; + + s->ret = xz_dec_lzma2_run(lzma2, b); + if (s->ret != XZ_STREAM_END + && (s->ret != XZ_OK || s->single_call)) + return s->ret; + + bcj_apply(s, b->out, &out_start, b->out_pos); + + /* + * As an exception, if the next filter returned XZ_STREAM_END, + * we can do that too, since the last few bytes that remain + * unfiltered are meant to remain unfiltered. + */ + if (s->ret == XZ_STREAM_END) + return XZ_STREAM_END; + + s->temp.size = b->out_pos - out_start; + b->out_pos -= s->temp.size; + memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size); + } + + /* + * If we have unfiltered data in temp, try to fill by decoding more + * data from the next filter. Apply the BCJ filter on temp. Then we + * hopefully can fill the actual output buffer by copying filtered + * data from temp. A mix of filtered and unfiltered data may be left + * in temp; it will be taken care on the next call to this function. + */ + if (s->temp.size > 0) { + /* Make b->out{,_pos,_size} temporarily point to s->temp. */ + s->out = b->out; + s->out_pos = b->out_pos; + s->out_size = b->out_size; + b->out = s->temp.buf; + b->out_pos = s->temp.size; + b->out_size = sizeof(s->temp.buf); + + s->ret = xz_dec_lzma2_run(lzma2, b); + + s->temp.size = b->out_pos; + b->out = s->out; + b->out_pos = s->out_pos; + b->out_size = s->out_size; + + if (s->ret != XZ_OK && s->ret != XZ_STREAM_END) + return s->ret; + + bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size); + + /* + * If the next filter returned XZ_STREAM_END, we mark that + * everything is filtered, since the last unfiltered bytes + * of the stream are meant to be left as is. + */ + if (s->ret == XZ_STREAM_END) + s->temp.filtered = s->temp.size; + + bcj_flush(s, b); + if (s->temp.filtered > 0) + return XZ_OK; + } + + return s->ret; +} + +XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call) +{ + struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s != NULL) + s->single_call = single_call; + + return s; +} + +XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id) +{ + switch (id) { +#ifdef XZ_DEC_X86 + case BCJ_X86: +#endif +#ifdef XZ_DEC_POWERPC + case BCJ_POWERPC: +#endif +#ifdef XZ_DEC_IA64 + case BCJ_IA64: +#endif +#ifdef XZ_DEC_ARM + case BCJ_ARM: +#endif +#ifdef XZ_DEC_ARMTHUMB + case BCJ_ARMTHUMB: +#endif +#ifdef XZ_DEC_SPARC + case BCJ_SPARC: +#endif + break; + + default: + /* Unsupported Filter ID */ + return XZ_OPTIONS_ERROR; + } + + s->type = id; + s->ret = XZ_OK; + s->pos = 0; + s->x86_prev_mask = 0; + s->temp.filtered = 0; + s->temp.size = 0; + + return XZ_OK; +} + +#endif diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c new file mode 100644 index 000000000000..a6cdc969ea42 --- /dev/null +++ b/lib/xz/xz_dec_lzma2.c @@ -0,0 +1,1171 @@ +/* + * LZMA2 decoder + * + * Authors: Lasse Collin <lasse.collin@tukaani.org> + * Igor Pavlov <http://7-zip.org/> + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" +#include "xz_lzma2.h" + +/* + * Range decoder initialization eats the first five bytes of each LZMA chunk. + */ +#define RC_INIT_BYTES 5 + +/* + * Minimum number of usable input buffer to safely decode one LZMA symbol. + * The worst case is that we decode 22 bits using probabilities and 26 + * direct bits. This may decode at maximum of 20 bytes of input. However, + * lzma_main() does an extra normalization before returning, thus we + * need to put 21 here. + */ +#define LZMA_IN_REQUIRED 21 + +/* + * Dictionary (history buffer) + * + * These are always true: + * start <= pos <= full <= end + * pos <= limit <= end + * + * In multi-call mode, also these are true: + * end == size + * size <= size_max + * allocated <= size + * + * Most of these variables are size_t to support single-call mode, + * in which the dictionary variables address the actual output + * buffer directly. + */ +struct dictionary { + /* Beginning of the history buffer */ + uint8_t *buf; + + /* Old position in buf (before decoding more data) */ + size_t start; + + /* Position in buf */ + size_t pos; + + /* + * How full dictionary is. This is used to detect corrupt input that + * would read beyond the beginning of the uncompressed stream. + */ + size_t full; + + /* Write limit; we don't write to buf[limit] or later bytes. */ + size_t limit; + + /* + * End of the dictionary buffer. In multi-call mode, this is + * the same as the dictionary size. In single-call mode, this + * indicates the size of the output buffer. + */ + size_t end; + + /* + * Size of the dictionary as specified in Block Header. This is used + * together with "full" to detect corrupt input that would make us + * read beyond the beginning of the uncompressed stream. + */ + uint32_t size; + + /* + * Maximum allowed dictionary size in multi-call mode. + * This is ignored in single-call mode. + */ + uint32_t size_max; + + /* + * Amount of memory currently allocated for the dictionary. + * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC, + * size_max is always the same as the allocated size.) + */ + uint32_t allocated; + + /* Operation mode */ + enum xz_mode mode; +}; + +/* Range decoder */ +struct rc_dec { + uint32_t range; + uint32_t code; + + /* + * Number of initializing bytes remaining to be read + * by rc_read_init(). + */ + uint32_t init_bytes_left; + + /* + * Buffer from which we read our input. It can be either + * temp.buf or the caller-provided input buffer. + */ + const uint8_t *in; + size_t in_pos; + size_t in_limit; +}; + +/* Probabilities for a length decoder. */ +struct lzma_len_dec { + /* Probability of match length being at least 10 */ + uint16_t choice; + + /* Probability of match length being at least 18 */ + uint16_t choice2; + + /* Probabilities for match lengths 2-9 */ + uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + + /* Probabilities for match lengths 10-17 */ + uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + + /* Probabilities for match lengths 18-273 */ + uint16_t high[LEN_HIGH_SYMBOLS]; +}; + +struct lzma_dec { + /* Distances of latest four matches */ + uint32_t rep0; + uint32_t rep1; + uint32_t rep2; + uint32_t rep3; + + /* Types of the most recently seen LZMA symbols */ + enum lzma_state state; + + /* + * Length of a match. This is updated so that dict_repeat can + * be called again to finish repeating the whole match. + */ + uint32_t len; + + /* + * LZMA properties or related bit masks (number of literal + * context bits, a mask dervied from the number of literal + * position bits, and a mask dervied from the number + * position bits) + */ + uint32_t lc; + uint32_t literal_pos_mask; /* (1 << lp) - 1 */ + uint32_t pos_mask; /* (1 << pb) - 1 */ + + /* If 1, it's a match. Otherwise it's a single 8-bit literal. */ + uint16_t is_match[STATES][POS_STATES_MAX]; + + /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */ + uint16_t is_rep[STATES]; + + /* + * If 0, distance of a repeated match is rep0. + * Otherwise check is_rep1. + */ + uint16_t is_rep0[STATES]; + + /* + * If 0, distance of a repeated match is rep1. + * Otherwise check is_rep2. + */ + uint16_t is_rep1[STATES]; + + /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */ + uint16_t is_rep2[STATES]; + + /* + * If 1, the repeated match has length of one byte. Otherwise + * the length is decoded from rep_len_decoder. + */ + uint16_t is_rep0_long[STATES][POS_STATES_MAX]; + + /* + * Probability tree for the highest two bits of the match + * distance. There is a separate probability tree for match + * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + */ + uint16_t dist_slot[DIST_STATES][DIST_SLOTS]; + + /* + * Probility trees for additional bits for match distance + * when the distance is in the range [4, 127]. + */ + uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END]; + + /* + * Probability tree for the lowest four bits of a match + * distance that is equal to or greater than 128. + */ + uint16_t dist_align[ALIGN_SIZE]; + + /* Length of a normal match */ + struct lzma_len_dec match_len_dec; + + /* Length of a repeated match */ + struct lzma_len_dec rep_len_dec; + + /* Probabilities of literals */ + uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; +}; + +struct lzma2_dec { + /* Position in xz_dec_lzma2_run(). */ + enum lzma2_seq { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA_PREPARE, + SEQ_LZMA_RUN, + SEQ_COPY + } sequence; + + /* Next position after decoding the compressed size of the chunk. */ + enum lzma2_seq next_sequence; + + /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ + uint32_t uncompressed; + + /* + * Compressed size of LZMA chunk or compressed/uncompressed + * size of uncompressed chunk (64 KiB at maximum) + */ + uint32_t compressed; + + /* + * True if dictionary reset is needed. This is false before + * the first chunk (LZMA or uncompressed). + */ + bool need_dict_reset; + + /* + * True if new LZMA properties are needed. This is false + * before the first LZMA chunk. + */ + bool need_props; +}; + +struct xz_dec_lzma2 { + /* + * The order below is important on x86 to reduce code size and + * it shouldn't hurt on other platforms. Everything up to and + * including lzma.pos_mask are in the first 128 bytes on x86-32, + * which allows using smaller instructions to access those + * variables. On x86-64, fewer variables fit into the first 128 + * bytes, but this is still the best order without sacrificing + * the readability by splitting the structures. + */ + struct rc_dec rc; + struct dictionary dict; + struct lzma2_dec lzma2; + struct lzma_dec lzma; + + /* + * Temporary buffer which holds small number of input bytes between + * decoder calls. See lzma2_lzma() for details. + */ + struct { + uint32_t size; + uint8_t buf[3 * LZMA_IN_REQUIRED]; + } temp; +}; + +/************** + * Dictionary * + **************/ + +/* + * Reset the dictionary state. When in single-call mode, set up the beginning + * of the dictionary to point to the actual output buffer. + */ +static void dict_reset(struct dictionary *dict, struct xz_buf *b) +{ + if (DEC_IS_SINGLE(dict->mode)) { + dict->buf = b->out + b->out_pos; + dict->end = b->out_size - b->out_pos; + } + + dict->start = 0; + dict->pos = 0; + dict->limit = 0; + dict->full = 0; +} + +/* Set dictionary write limit */ +static void dict_limit(struct dictionary *dict, size_t out_max) +{ + if (dict->end - dict->pos <= out_max) + dict->limit = dict->end; + else + dict->limit = dict->pos + out_max; +} + +/* Return true if at least one byte can be written into the dictionary. */ +static inline bool dict_has_space(const struct dictionary *dict) +{ + return dict->pos < dict->limit; +} + +/* + * Get a byte from the dictionary at the given distance. The distance is + * assumed to valid, or as a special case, zero when the dictionary is + * still empty. This special case is needed for single-call decoding to + * avoid writing a '\0' to the end of the destination buffer. + */ +static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist) +{ + size_t offset = dict->pos - dist - 1; + + if (dist >= dict->pos) + offset += dict->end; + + return dict->full > 0 ? dict->buf[offset] : 0; +} + +/* + * Put one byte into the dictionary. It is assumed that there is space for it. + */ +static inline void dict_put(struct dictionary *dict, uint8_t byte) +{ + dict->buf[dict->pos++] = byte; + + if (dict->full < dict->pos) + dict->full = dict->pos; +} + +/* + * Repeat given number of bytes from the given distance. If the distance is + * invalid, false is returned. On success, true is returned and *len is + * updated to indicate how many bytes were left to be repeated. + */ +static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist) +{ + size_t back; + uint32_t left; + + if (dist >= dict->full || dist >= dict->size) + return false; + + left = min_t(size_t, dict->limit - dict->pos, *len); + *len -= left; + + back = dict->pos - dist - 1; + if (dist >= dict->pos) + back += dict->end; + + do { + dict->buf[dict->pos++] = dict->buf[back++]; + if (back == dict->end) + back = 0; + } while (--left > 0); + + if (dict->full < dict->pos) + dict->full = dict->pos; + + return true; +} + +/* Copy uncompressed data as is from input to dictionary and output buffers. */ +static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, + uint32_t *left) +{ + size_t copy_size; + + while (*left > 0 && b->in_pos < b->in_size + && b->out_pos < b->out_size) { + copy_size = min(b->in_size - b->in_pos, + b->out_size - b->out_pos); + if (copy_size > dict->end - dict->pos) + copy_size = dict->end - dict->pos; + if (copy_size > *left) + copy_size = *left; + + *left -= copy_size; + + memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size); + dict->pos += copy_size; + + if (dict->full < dict->pos) + dict->full = dict->pos; + + if (DEC_IS_MULTI(dict->mode)) { + if (dict->pos == dict->end) + dict->pos = 0; + + memcpy(b->out + b->out_pos, b->in + b->in_pos, + copy_size); + } + + dict->start = dict->pos; + + b->out_pos += copy_size; + b->in_pos += copy_size; + } +} + +/* + * Flush pending data from dictionary to b->out. It is assumed that there is + * enough space in b->out. This is guaranteed because caller uses dict_limit() + * before decoding data into the dictionary. + */ +static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b) +{ + size_t copy_size = dict->pos - dict->start; + + if (DEC_IS_MULTI(dict->mode)) { + if (dict->pos == dict->end) + dict->pos = 0; + + memcpy(b->out + b->out_pos, dict->buf + dict->start, + copy_size); + } + + dict->start = dict->pos; + b->out_pos += copy_size; + return copy_size; +} + +/***************** + * Range decoder * + *****************/ + +/* Reset the range decoder. */ +static void rc_reset(struct rc_dec *rc) +{ + rc->range = (uint32_t)-1; + rc->code = 0; + rc->init_bytes_left = RC_INIT_BYTES; +} + +/* + * Read the first five initial bytes into rc->code if they haven't been + * read already. (Yes, the first byte gets completely ignored.) + */ +static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b) +{ + while (rc->init_bytes_left > 0) { + if (b->in_pos == b->in_size) + return false; + + rc->code = (rc->code << 8) + b->in[b->in_pos++]; + --rc->init_bytes_left; + } + + return true; +} + +/* Return true if there may not be enough input for the next decoding loop. */ +static inline bool rc_limit_exceeded(const struct rc_dec *rc) +{ + return rc->in_pos > rc->in_limit; +} + +/* + * Return true if it is possible (from point of view of range decoder) that + * we have reached the end of the LZMA chunk. + */ +static inline bool rc_is_finished(const struct rc_dec *rc) +{ + return rc->code == 0; +} + +/* Read the next input byte if needed. */ +static __always_inline void rc_normalize(struct rc_dec *rc) +{ + if (rc->range < RC_TOP_VALUE) { + rc->range <<= RC_SHIFT_BITS; + rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++]; + } +} + +/* + * Decode one bit. In some versions, this function has been splitted in three + * functions so that the compiler is supposed to be able to more easily avoid + * an extra branch. In this particular version of the LZMA decoder, this + * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3 + * on x86). Using a non-splitted version results in nicer looking code too. + * + * NOTE: This must return an int. Do not make it return a bool or the speed + * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care, + * and it generates 10-20 % faster code than GCC 3.x from this file anyway.) + */ +static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob) +{ + uint32_t bound; + int bit; + + rc_normalize(rc); + bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob; + if (rc->code < bound) { + rc->range = bound; + *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS; + bit = 0; + } else { + rc->range -= bound; + rc->code -= bound; + *prob -= *prob >> RC_MOVE_BITS; + bit = 1; + } + + return bit; +} + +/* Decode a bittree starting from the most significant bit. */ +static __always_inline uint32_t rc_bittree(struct rc_dec *rc, + uint16_t *probs, uint32_t limit) +{ + uint32_t symbol = 1; + + do { + if (rc_bit(rc, &probs[symbol])) + symbol = (symbol << 1) + 1; + else + symbol <<= 1; + } while (symbol < limit); + + return symbol; +} + +/* Decode a bittree starting from the least significant bit. */ +static __always_inline void rc_bittree_reverse(struct rc_dec *rc, + uint16_t *probs, + uint32_t *dest, uint32_t limit) +{ + uint32_t symbol = 1; + uint32_t i = 0; + + do { + if (rc_bit(rc, &probs[symbol])) { + symbol = (symbol << 1) + 1; + *dest += 1 << i; + } else { + symbol <<= 1; + } + } while (++i < limit); +} + +/* Decode direct bits (fixed fifty-fifty probability) */ +static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit) +{ + uint32_t mask; + + do { + rc_normalize(rc); + rc->range >>= 1; + rc->code -= rc->range; + mask = (uint32_t)0 - (rc->code >> 31); + rc->code += rc->range & mask; + *dest = (*dest << 1) + (mask + 1); + } while (--limit > 0); +} + +/******** + * LZMA * + ********/ + +/* Get pointer to literal coder probability array. */ +static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s) +{ + uint32_t prev_byte = dict_get(&s->dict, 0); + uint32_t low = prev_byte >> (8 - s->lzma.lc); + uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc; + return s->lzma.literal[low + high]; +} + +/* Decode a literal (one 8-bit byte) */ +static void lzma_literal(struct xz_dec_lzma2 *s) +{ + uint16_t *probs; + uint32_t symbol; + uint32_t match_byte; + uint32_t match_bit; + uint32_t offset; + uint32_t i; + + probs = lzma_literal_probs(s); + + if (lzma_state_is_literal(s->lzma.state)) { + symbol = rc_bittree(&s->rc, probs, 0x100); + } else { + symbol = 1; + match_byte = dict_get(&s->dict, s->lzma.rep0) << 1; + offset = 0x100; + + do { + match_bit = match_byte & offset; + match_byte <<= 1; + i = offset + match_bit + symbol; + + if (rc_bit(&s->rc, &probs[i])) { + symbol = (symbol << 1) + 1; + offset &= match_bit; + } else { + symbol <<= 1; + offset &= ~match_bit; + } + } while (symbol < 0x100); + } + + dict_put(&s->dict, (uint8_t)symbol); + lzma_state_literal(&s->lzma.state); +} + +/* Decode the length of the match into s->lzma.len. */ +static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l, + uint32_t pos_state) +{ + uint16_t *probs; + uint32_t limit; + + if (!rc_bit(&s->rc, &l->choice)) { + probs = l->low[pos_state]; + limit = LEN_LOW_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN; + } else { + if (!rc_bit(&s->rc, &l->choice2)) { + probs = l->mid[pos_state]; + limit = LEN_MID_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; + } else { + probs = l->high; + limit = LEN_HIGH_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS + + LEN_MID_SYMBOLS; + } + } + + s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit; +} + +/* Decode a match. The distance will be stored in s->lzma.rep0. */ +static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state) +{ + uint16_t *probs; + uint32_t dist_slot; + uint32_t limit; + + lzma_state_match(&s->lzma.state); + + s->lzma.rep3 = s->lzma.rep2; + s->lzma.rep2 = s->lzma.rep1; + s->lzma.rep1 = s->lzma.rep0; + + lzma_len(s, &s->lzma.match_len_dec, pos_state); + + probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)]; + dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS; + + if (dist_slot < DIST_MODEL_START) { + s->lzma.rep0 = dist_slot; + } else { + limit = (dist_slot >> 1) - 1; + s->lzma.rep0 = 2 + (dist_slot & 1); + + if (dist_slot < DIST_MODEL_END) { + s->lzma.rep0 <<= limit; + probs = s->lzma.dist_special + s->lzma.rep0 + - dist_slot - 1; + rc_bittree_reverse(&s->rc, probs, + &s->lzma.rep0, limit); + } else { + rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS); + s->lzma.rep0 <<= ALIGN_BITS; + rc_bittree_reverse(&s->rc, s->lzma.dist_align, + &s->lzma.rep0, ALIGN_BITS); + } + } +} + +/* + * Decode a repeated match. The distance is one of the four most recently + * seen matches. The distance will be stored in s->lzma.rep0. + */ +static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state) +{ + uint32_t tmp; + + if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) { + if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[ + s->lzma.state][pos_state])) { + lzma_state_short_rep(&s->lzma.state); + s->lzma.len = 1; + return; + } + } else { + if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) { + tmp = s->lzma.rep1; + } else { + if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) { + tmp = s->lzma.rep2; + } else { + tmp = s->lzma.rep3; + s->lzma.rep3 = s->lzma.rep2; + } + + s->lzma.rep2 = s->lzma.rep1; + } + + s->lzma.rep1 = s->lzma.rep0; + s->lzma.rep0 = tmp; + } + + lzma_state_long_rep(&s->lzma.state); + lzma_len(s, &s->lzma.rep_len_dec, pos_state); +} + +/* LZMA decoder core */ +static bool lzma_main(struct xz_dec_lzma2 *s) +{ + uint32_t pos_state; + + /* + * If the dictionary was reached during the previous call, try to + * finish the possibly pending repeat in the dictionary. + */ + if (dict_has_space(&s->dict) && s->lzma.len > 0) + dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0); + + /* + * Decode more LZMA symbols. One iteration may consume up to + * LZMA_IN_REQUIRED - 1 bytes. + */ + while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) { + pos_state = s->dict.pos & s->lzma.pos_mask; + + if (!rc_bit(&s->rc, &s->lzma.is_match[ + s->lzma.state][pos_state])) { + lzma_literal(s); + } else { + if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state])) + lzma_rep_match(s, pos_state); + else + lzma_match(s, pos_state); + + if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0)) + return false; + } + } + + /* + * Having the range decoder always normalized when we are outside + * this function makes it easier to correctly handle end of the chunk. + */ + rc_normalize(&s->rc); + + return true; +} + +/* + * Reset the LZMA decoder and range decoder state. Dictionary is nore reset + * here, because LZMA state may be reset without resetting the dictionary. + */ +static void lzma_reset(struct xz_dec_lzma2 *s) +{ + uint16_t *probs; + size_t i; + + s->lzma.state = STATE_LIT_LIT; + s->lzma.rep0 = 0; + s->lzma.rep1 = 0; + s->lzma.rep2 = 0; + s->lzma.rep3 = 0; + + /* + * All probabilities are initialized to the same value. This hack + * makes the code smaller by avoiding a separate loop for each + * probability array. + * + * This could be optimized so that only that part of literal + * probabilities that are actually required. In the common case + * we would write 12 KiB less. + */ + probs = s->lzma.is_match[0]; + for (i = 0; i < PROBS_TOTAL; ++i) + probs[i] = RC_BIT_MODEL_TOTAL / 2; + + rc_reset(&s->rc); +} + +/* + * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks + * from the decoded lp and pb values. On success, the LZMA decoder state is + * reset and true is returned. + */ +static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props) +{ + if (props > (4 * 5 + 4) * 9 + 8) + return false; + + s->lzma.pos_mask = 0; + while (props >= 9 * 5) { + props -= 9 * 5; + ++s->lzma.pos_mask; + } + + s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1; + + s->lzma.literal_pos_mask = 0; + while (props >= 9) { + props -= 9; + ++s->lzma.literal_pos_mask; + } + + s->lzma.lc = props; + + if (s->lzma.lc + s->lzma.literal_pos_mask > 4) + return false; + + s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1; + + lzma_reset(s); + + return true; +} + +/********* + * LZMA2 * + *********/ + +/* + * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't + * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This + * wrapper function takes care of making the LZMA decoder's assumption safe. + * + * As long as there is plenty of input left to be decoded in the current LZMA + * chunk, we decode directly from the caller-supplied input buffer until + * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into + * s->temp.buf, which (hopefully) gets filled on the next call to this + * function. We decode a few bytes from the temporary buffer so that we can + * continue decoding from the caller-supplied input buffer again. + */ +static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b) +{ + size_t in_avail; + uint32_t tmp; + + in_avail = b->in_size - b->in_pos; + if (s->temp.size > 0 || s->lzma2.compressed == 0) { + tmp = 2 * LZMA_IN_REQUIRED - s->temp.size; + if (tmp > s->lzma2.compressed - s->temp.size) + tmp = s->lzma2.compressed - s->temp.size; + if (tmp > in_avail) + tmp = in_avail; + + memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp); + + if (s->temp.size + tmp == s->lzma2.compressed) { + memzero(s->temp.buf + s->temp.size + tmp, + sizeof(s->temp.buf) + - s->temp.size - tmp); + s->rc.in_limit = s->temp.size + tmp; + } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) { + s->temp.size += tmp; + b->in_pos += tmp; + return true; + } else { + s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED; + } + + s->rc.in = s->temp.buf; + s->rc.in_pos = 0; + + if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp) + return false; + + s->lzma2.compressed -= s->rc.in_pos; + + if (s->rc.in_pos < s->temp.size) { + s->temp.size -= s->rc.in_pos; + memmove(s->temp.buf, s->temp.buf + s->rc.in_pos, + s->temp.size); + return true; + } + + b->in_pos += s->rc.in_pos - s->temp.size; + s->temp.size = 0; + } + + in_avail = b->in_size - b->in_pos; + if (in_avail >= LZMA_IN_REQUIRED) { + s->rc.in = b->in; + s->rc.in_pos = b->in_pos; + + if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED) + s->rc.in_limit = b->in_pos + s->lzma2.compressed; + else + s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED; + + if (!lzma_main(s)) + return false; + + in_avail = s->rc.in_pos - b->in_pos; + if (in_avail > s->lzma2.compressed) + return false; + + s->lzma2.compressed -= in_avail; + b->in_pos = s->rc.in_pos; + } + + in_avail = b->in_size - b->in_pos; + if (in_avail < LZMA_IN_REQUIRED) { + if (in_avail > s->lzma2.compressed) + in_avail = s->lzma2.compressed; + + memcpy(s->temp.buf, b->in + b->in_pos, in_avail); + s->temp.size = in_avail; + b->in_pos += in_avail; + } + + return true; +} + +/* + * Take care of the LZMA2 control layer, and forward the job of actual LZMA + * decoding or copying of uncompressed chunks to other functions. + */ +XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, + struct xz_buf *b) +{ + uint32_t tmp; + + while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) { + switch (s->lzma2.sequence) { + case SEQ_CONTROL: + /* + * LZMA2 control byte + * + * Exact values: + * 0x00 End marker + * 0x01 Dictionary reset followed by + * an uncompressed chunk + * 0x02 Uncompressed chunk (no dictionary reset) + * + * Highest three bits (s->control & 0xE0): + * 0xE0 Dictionary reset, new properties and state + * reset, followed by LZMA compressed chunk + * 0xC0 New properties and state reset, followed + * by LZMA compressed chunk (no dictionary + * reset) + * 0xA0 State reset using old properties, + * followed by LZMA compressed chunk (no + * dictionary reset) + * 0x80 LZMA chunk (no dictionary or state reset) + * + * For LZMA compressed chunks, the lowest five bits + * (s->control & 1F) are the highest bits of the + * uncompressed size (bits 16-20). + * + * A new LZMA2 stream must begin with a dictionary + * reset. The first LZMA chunk must set new + * properties and reset the LZMA state. + * + * Values that don't match anything described above + * are invalid and we return XZ_DATA_ERROR. + */ + tmp = b->in[b->in_pos++]; + + if (tmp == 0x00) + return XZ_STREAM_END; + + if (tmp >= 0xE0 || tmp == 0x01) { + s->lzma2.need_props = true; + s->lzma2.need_dict_reset = false; + dict_reset(&s->dict, b); + } else if (s->lzma2.need_dict_reset) { + return XZ_DATA_ERROR; + } + + if (tmp >= 0x80) { + s->lzma2.uncompressed = (tmp & 0x1F) << 16; + s->lzma2.sequence = SEQ_UNCOMPRESSED_1; + + if (tmp >= 0xC0) { + /* + * When there are new properties, + * state reset is done at + * SEQ_PROPERTIES. + */ + s->lzma2.need_props = false; + s->lzma2.next_sequence + = SEQ_PROPERTIES; + + } else if (s->lzma2.need_props) { + return XZ_DATA_ERROR; + + } else { + s->lzma2.next_sequence + = SEQ_LZMA_PREPARE; + if (tmp >= 0xA0) + lzma_reset(s); + } + } else { + if (tmp > 0x02) + return XZ_DATA_ERROR; + + s->lzma2.sequence = SEQ_COMPRESSED_0; + s->lzma2.next_sequence = SEQ_COPY; + } + + break; + + case SEQ_UNCOMPRESSED_1: + s->lzma2.uncompressed + += (uint32_t)b->in[b->in_pos++] << 8; + s->lzma2.sequence = SEQ_UNCOMPRESSED_2; + break; + + case SEQ_UNCOMPRESSED_2: + s->lzma2.uncompressed + += (uint32_t)b->in[b->in_pos++] + 1; + s->lzma2.sequence = SEQ_COMPRESSED_0; + break; + + case SEQ_COMPRESSED_0: + s->lzma2.compressed + = (uint32_t)b->in[b->in_pos++] << 8; + s->lzma2.sequence = SEQ_COMPRESSED_1; + break; + + case SEQ_COMPRESSED_1: + s->lzma2.compressed + += (uint32_t)b->in[b->in_pos++] + 1; + s->lzma2.sequence = s->lzma2.next_sequence; + break; + + case SEQ_PROPERTIES: + if (!lzma_props(s, b->in[b->in_pos++])) + return XZ_DATA_ERROR; + + s->lzma2.sequence = SEQ_LZMA_PREPARE; + + case SEQ_LZMA_PREPARE: + if (s->lzma2.compressed < RC_INIT_BYTES) + return XZ_DATA_ERROR; + + if (!rc_read_init(&s->rc, b)) + return XZ_OK; + + s->lzma2.compressed -= RC_INIT_BYTES; + s->lzma2.sequence = SEQ_LZMA_RUN; + + case SEQ_LZMA_RUN: + /* + * Set dictionary limit to indicate how much we want + * to be encoded at maximum. Decode new data into the + * dictionary. Flush the new data from dictionary to + * b->out. Check if we finished decoding this chunk. + * In case the dictionary got full but we didn't fill + * the output buffer yet, we may run this loop + * multiple times without changing s->lzma2.sequence. + */ + dict_limit(&s->dict, min_t(size_t, + b->out_size - b->out_pos, + s->lzma2.uncompressed)); + if (!lzma2_lzma(s, b)) + return XZ_DATA_ERROR; + + s->lzma2.uncompressed -= dict_flush(&s->dict, b); + + if (s->lzma2.uncompressed == 0) { + if (s->lzma2.compressed > 0 || s->lzma.len > 0 + || !rc_is_finished(&s->rc)) + return XZ_DATA_ERROR; + + rc_reset(&s->rc); + s->lzma2.sequence = SEQ_CONTROL; + + } else if (b->out_pos == b->out_size + || (b->in_pos == b->in_size + && s->temp.size + < s->lzma2.compressed)) { + return XZ_OK; + } + + break; + + case SEQ_COPY: + dict_uncompressed(&s->dict, b, &s->lzma2.compressed); + if (s->lzma2.compressed > 0) + return XZ_OK; + + s->lzma2.sequence = SEQ_CONTROL; + break; + } + } + + return XZ_OK; +} + +XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, + uint32_t dict_max) +{ + struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->dict.mode = mode; + s->dict.size_max = dict_max; + + if (DEC_IS_PREALLOC(mode)) { + s->dict.buf = vmalloc(dict_max); + if (s->dict.buf == NULL) { + kfree(s); + return NULL; + } + } else if (DEC_IS_DYNALLOC(mode)) { + s->dict.buf = NULL; + s->dict.allocated = 0; + } + + return s; +} + +XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) +{ + /* This limits dictionary size to 3 GiB to keep parsing simpler. */ + if (props > 39) + return XZ_OPTIONS_ERROR; + + s->dict.size = 2 + (props & 1); + s->dict.size <<= (props >> 1) + 11; + + if (DEC_IS_MULTI(s->dict.mode)) { + if (s->dict.size > s->dict.size_max) + return XZ_MEMLIMIT_ERROR; + + s->dict.end = s->dict.size; + + if (DEC_IS_DYNALLOC(s->dict.mode)) { + if (s->dict.allocated < s->dict.size) { + vfree(s->dict.buf); + s->dict.buf = vmalloc(s->dict.size); + if (s->dict.buf == NULL) { + s->dict.allocated = 0; + return XZ_MEM_ERROR; + } + } + } + } + + s->lzma.len = 0; + + s->lzma2.sequence = SEQ_CONTROL; + s->lzma2.need_dict_reset = true; + + s->temp.size = 0; + + return XZ_OK; +} + +XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s) +{ + if (DEC_IS_MULTI(s->dict.mode)) + vfree(s->dict.buf); + + kfree(s); +} diff --git a/lib/xz/xz_dec_stream.c b/lib/xz/xz_dec_stream.c new file mode 100644 index 000000000000..ac809b1e64f7 --- /dev/null +++ b/lib/xz/xz_dec_stream.c @@ -0,0 +1,821 @@ +/* + * .xz Stream decoder + * + * Author: Lasse Collin <lasse.collin@tukaani.org> + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" +#include "xz_stream.h" + +/* Hash used to validate the Index field */ +struct xz_dec_hash { + vli_type unpadded; + vli_type uncompressed; + uint32_t crc32; +}; + +struct xz_dec { + /* Position in dec_main() */ + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_START, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_UNCOMPRESS, + SEQ_BLOCK_PADDING, + SEQ_BLOCK_CHECK, + SEQ_INDEX, + SEQ_INDEX_PADDING, + SEQ_INDEX_CRC32, + SEQ_STREAM_FOOTER + } sequence; + + /* Position in variable-length integers and Check fields */ + uint32_t pos; + + /* Variable-length integer decoded by dec_vli() */ + vli_type vli; + + /* Saved in_pos and out_pos */ + size_t in_start; + size_t out_start; + + /* CRC32 value in Block or Index */ + uint32_t crc32; + + /* Type of the integrity check calculated from uncompressed data */ + enum xz_check check_type; + + /* Operation mode */ + enum xz_mode mode; + + /* + * True if the next call to xz_dec_run() is allowed to return + * XZ_BUF_ERROR. + */ + bool allow_buf_error; + + /* Information stored in Block Header */ + struct { + /* + * Value stored in the Compressed Size field, or + * VLI_UNKNOWN if Compressed Size is not present. + */ + vli_type compressed; + + /* + * Value stored in the Uncompressed Size field, or + * VLI_UNKNOWN if Uncompressed Size is not present. + */ + vli_type uncompressed; + + /* Size of the Block Header field */ + uint32_t size; + } block_header; + + /* Information collected when decoding Blocks */ + struct { + /* Observed compressed size of the current Block */ + vli_type compressed; + + /* Observed uncompressed size of the current Block */ + vli_type uncompressed; + + /* Number of Blocks decoded so far */ + vli_type count; + + /* + * Hash calculated from the Block sizes. This is used to + * validate the Index field. + */ + struct xz_dec_hash hash; + } block; + + /* Variables needed when verifying the Index field */ + struct { + /* Position in dec_index() */ + enum { + SEQ_INDEX_COUNT, + SEQ_INDEX_UNPADDED, + SEQ_INDEX_UNCOMPRESSED + } sequence; + + /* Size of the Index in bytes */ + vli_type size; + + /* Number of Records (matches block.count in valid files) */ + vli_type count; + + /* + * Hash calculated from the Records (matches block.hash in + * valid files). + */ + struct xz_dec_hash hash; + } index; + + /* + * Temporary buffer needed to hold Stream Header, Block Header, + * and Stream Footer. The Block Header is the biggest (1 KiB) + * so we reserve space according to that. buf[] has to be aligned + * to a multiple of four bytes; the size_t variables before it + * should guarantee this. + */ + struct { + size_t pos; + size_t size; + uint8_t buf[1024]; + } temp; + + struct xz_dec_lzma2 *lzma2; + +#ifdef XZ_DEC_BCJ + struct xz_dec_bcj *bcj; + bool bcj_active; +#endif +}; + +#ifdef XZ_DEC_ANY_CHECK +/* Sizes of the Check field with different Check IDs */ +static const uint8_t check_sizes[16] = { + 0, + 4, 4, 4, + 8, 8, 8, + 16, 16, 16, + 32, 32, 32, + 64, 64, 64 +}; +#endif + +/* + * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller + * must have set s->temp.pos to indicate how much data we are supposed + * to copy into s->temp.buf. Return true once s->temp.pos has reached + * s->temp.size. + */ +static bool fill_temp(struct xz_dec *s, struct xz_buf *b) +{ + size_t copy_size = min_t(size_t, + b->in_size - b->in_pos, s->temp.size - s->temp.pos); + + memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size); + b->in_pos += copy_size; + s->temp.pos += copy_size; + + if (s->temp.pos == s->temp.size) { + s->temp.pos = 0; + return true; + } + + return false; +} + +/* Decode a variable-length integer (little-endian base-128 encoding) */ +static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in, + size_t *in_pos, size_t in_size) +{ + uint8_t byte; + + if (s->pos == 0) + s->vli = 0; + + while (*in_pos < in_size) { + byte = in[*in_pos]; + ++*in_pos; + + s->vli |= (vli_type)(byte & 0x7F) << s->pos; + + if ((byte & 0x80) == 0) { + /* Don't allow non-minimal encodings. */ + if (byte == 0 && s->pos != 0) + return XZ_DATA_ERROR; + + s->pos = 0; + return XZ_STREAM_END; + } + + s->pos += 7; + if (s->pos == 7 * VLI_BYTES_MAX) + return XZ_DATA_ERROR; + } + + return XZ_OK; +} + +/* + * Decode the Compressed Data field from a Block. Update and validate + * the observed compressed and uncompressed sizes of the Block so that + * they don't exceed the values possibly stored in the Block Header + * (validation assumes that no integer overflow occurs, since vli_type + * is normally uint64_t). Update the CRC32 if presence of the CRC32 + * field was indicated in Stream Header. + * + * Once the decoding is finished, validate that the observed sizes match + * the sizes possibly stored in the Block Header. Update the hash and + * Block count, which are later used to validate the Index field. + */ +static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + s->in_start = b->in_pos; + s->out_start = b->out_pos; + +#ifdef XZ_DEC_BCJ + if (s->bcj_active) + ret = xz_dec_bcj_run(s->bcj, s->lzma2, b); + else +#endif + ret = xz_dec_lzma2_run(s->lzma2, b); + + s->block.compressed += b->in_pos - s->in_start; + s->block.uncompressed += b->out_pos - s->out_start; + + /* + * There is no need to separately check for VLI_UNKNOWN, since + * the observed sizes are always smaller than VLI_UNKNOWN. + */ + if (s->block.compressed > s->block_header.compressed + || s->block.uncompressed + > s->block_header.uncompressed) + return XZ_DATA_ERROR; + + if (s->check_type == XZ_CHECK_CRC32) + s->crc32 = xz_crc32(b->out + s->out_start, + b->out_pos - s->out_start, s->crc32); + + if (ret == XZ_STREAM_END) { + if (s->block_header.compressed != VLI_UNKNOWN + && s->block_header.compressed + != s->block.compressed) + return XZ_DATA_ERROR; + + if (s->block_header.uncompressed != VLI_UNKNOWN + && s->block_header.uncompressed + != s->block.uncompressed) + return XZ_DATA_ERROR; + + s->block.hash.unpadded += s->block_header.size + + s->block.compressed; + +#ifdef XZ_DEC_ANY_CHECK + s->block.hash.unpadded += check_sizes[s->check_type]; +#else + if (s->check_type == XZ_CHECK_CRC32) + s->block.hash.unpadded += 4; +#endif + + s->block.hash.uncompressed += s->block.uncompressed; + s->block.hash.crc32 = xz_crc32( + (const uint8_t *)&s->block.hash, + sizeof(s->block.hash), s->block.hash.crc32); + + ++s->block.count; + } + + return ret; +} + +/* Update the Index size and the CRC32 value. */ +static void index_update(struct xz_dec *s, const struct xz_buf *b) +{ + size_t in_used = b->in_pos - s->in_start; + s->index.size += in_used; + s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32); +} + +/* + * Decode the Number of Records, Unpadded Size, and Uncompressed Size + * fields from the Index field. That is, Index Padding and CRC32 are not + * decoded by this function. + * + * This can return XZ_OK (more input needed), XZ_STREAM_END (everything + * successfully decoded), or XZ_DATA_ERROR (input is corrupt). + */ +static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + do { + ret = dec_vli(s, b->in, &b->in_pos, b->in_size); + if (ret != XZ_STREAM_END) { + index_update(s, b); + return ret; + } + + switch (s->index.sequence) { + case SEQ_INDEX_COUNT: + s->index.count = s->vli; + + /* + * Validate that the Number of Records field + * indicates the same number of Records as + * there were Blocks in the Stream. + */ + if (s->index.count != s->block.count) + return XZ_DATA_ERROR; + + s->index.sequence = SEQ_INDEX_UNPADDED; + break; + + case SEQ_INDEX_UNPADDED: + s->index.hash.unpadded += s->vli; + s->index.sequence = SEQ_INDEX_UNCOMPRESSED; + break; + + case SEQ_INDEX_UNCOMPRESSED: + s->index.hash.uncompressed += s->vli; + s->index.hash.crc32 = xz_crc32( + (const uint8_t *)&s->index.hash, + sizeof(s->index.hash), + s->index.hash.crc32); + --s->index.count; + s->index.sequence = SEQ_INDEX_UNPADDED; + break; + } + } while (s->index.count > 0); + + return XZ_STREAM_END; +} + +/* + * Validate that the next four input bytes match the value of s->crc32. + * s->pos must be zero when starting to validate the first byte. + */ +static enum xz_ret crc32_validate(struct xz_dec *s, struct xz_buf *b) +{ + do { + if (b->in_pos == b->in_size) + return XZ_OK; + + if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++]) + return XZ_DATA_ERROR; + + s->pos += 8; + + } while (s->pos < 32); + + s->crc32 = 0; + s->pos = 0; + + return XZ_STREAM_END; +} + +#ifdef XZ_DEC_ANY_CHECK +/* + * Skip over the Check field when the Check ID is not supported. + * Returns true once the whole Check field has been skipped over. + */ +static bool check_skip(struct xz_dec *s, struct xz_buf *b) +{ + while (s->pos < check_sizes[s->check_type]) { + if (b->in_pos == b->in_size) + return false; + + ++b->in_pos; + ++s->pos; + } + + s->pos = 0; + + return true; +} +#endif + +/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ +static enum xz_ret dec_stream_header(struct xz_dec *s) +{ + if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE)) + return XZ_FORMAT_ERROR; + + if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0) + != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2)) + return XZ_DATA_ERROR; + + if (s->temp.buf[HEADER_MAGIC_SIZE] != 0) + return XZ_OPTIONS_ERROR; + + /* + * Of integrity checks, we support only none (Check ID = 0) and + * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined, + * we will accept other check types too, but then the check won't + * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given. + */ + s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; + +#ifdef XZ_DEC_ANY_CHECK + if (s->check_type > XZ_CHECK_MAX) + return XZ_OPTIONS_ERROR; + + if (s->check_type > XZ_CHECK_CRC32) + return XZ_UNSUPPORTED_CHECK; +#else + if (s->check_type > XZ_CHECK_CRC32) + return XZ_OPTIONS_ERROR; +#endif + + return XZ_OK; +} + +/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ +static enum xz_ret dec_stream_footer(struct xz_dec *s) +{ + if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE)) + return XZ_DATA_ERROR; + + if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf)) + return XZ_DATA_ERROR; + + /* + * Validate Backward Size. Note that we never added the size of the + * Index CRC32 field to s->index.size, thus we use s->index.size / 4 + * instead of s->index.size / 4 - 1. + */ + if ((s->index.size >> 2) != get_le32(s->temp.buf + 4)) + return XZ_DATA_ERROR; + + if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type) + return XZ_DATA_ERROR; + + /* + * Use XZ_STREAM_END instead of XZ_OK to be more convenient + * for the caller. + */ + return XZ_STREAM_END; +} + +/* Decode the Block Header and initialize the filter chain. */ +static enum xz_ret dec_block_header(struct xz_dec *s) +{ + enum xz_ret ret; + + /* + * Validate the CRC32. We know that the temp buffer is at least + * eight bytes so this is safe. + */ + s->temp.size -= 4; + if (xz_crc32(s->temp.buf, s->temp.size, 0) + != get_le32(s->temp.buf + s->temp.size)) + return XZ_DATA_ERROR; + + s->temp.pos = 2; + + /* + * Catch unsupported Block Flags. We support only one or two filters + * in the chain, so we catch that with the same test. + */ +#ifdef XZ_DEC_BCJ + if (s->temp.buf[1] & 0x3E) +#else + if (s->temp.buf[1] & 0x3F) +#endif + return XZ_OPTIONS_ERROR; + + /* Compressed Size */ + if (s->temp.buf[1] & 0x40) { + if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) + != XZ_STREAM_END) + return XZ_DATA_ERROR; + + s->block_header.compressed = s->vli; + } else { + s->block_header.compressed = VLI_UNKNOWN; + } + + /* Uncompressed Size */ + if (s->temp.buf[1] & 0x80) { + if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) + != XZ_STREAM_END) + return XZ_DATA_ERROR; + + s->block_header.uncompressed = s->vli; + } else { + s->block_header.uncompressed = VLI_UNKNOWN; + } + +#ifdef XZ_DEC_BCJ + /* If there are two filters, the first one must be a BCJ filter. */ + s->bcj_active = s->temp.buf[1] & 0x01; + if (s->bcj_active) { + if (s->temp.size - s->temp.pos < 2) + return XZ_OPTIONS_ERROR; + + ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]); + if (ret != XZ_OK) + return ret; + + /* + * We don't support custom start offset, + * so Size of Properties must be zero. + */ + if (s->temp.buf[s->temp.pos++] != 0x00) + return XZ_OPTIONS_ERROR; + } +#endif + + /* Valid Filter Flags always take at least two bytes. */ + if (s->temp.size - s->temp.pos < 2) + return XZ_DATA_ERROR; + + /* Filter ID = LZMA2 */ + if (s->temp.buf[s->temp.pos++] != 0x21) + return XZ_OPTIONS_ERROR; + + /* Size of Properties = 1-byte Filter Properties */ + if (s->temp.buf[s->temp.pos++] != 0x01) + return XZ_OPTIONS_ERROR; + + /* Filter Properties contains LZMA2 dictionary size. */ + if (s->temp.size - s->temp.pos < 1) + return XZ_DATA_ERROR; + + ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]); + if (ret != XZ_OK) + return ret; + + /* The rest must be Header Padding. */ + while (s->temp.pos < s->temp.size) + if (s->temp.buf[s->temp.pos++] != 0x00) + return XZ_OPTIONS_ERROR; + + s->temp.pos = 0; + s->block.compressed = 0; + s->block.uncompressed = 0; + + return XZ_OK; +} + +static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + /* + * Store the start position for the case when we are in the middle + * of the Index field. + */ + s->in_start = b->in_pos; + + while (true) { + switch (s->sequence) { + case SEQ_STREAM_HEADER: + /* + * Stream Header is copied to s->temp, and then + * decoded from there. This way if the caller + * gives us only little input at a time, we can + * still keep the Stream Header decoding code + * simple. Similar approach is used in many places + * in this file. + */ + if (!fill_temp(s, b)) + return XZ_OK; + + /* + * If dec_stream_header() returns + * XZ_UNSUPPORTED_CHECK, it is still possible + * to continue decoding if working in multi-call + * mode. Thus, update s->sequence before calling + * dec_stream_header(). + */ + s->sequence = SEQ_BLOCK_START; + + ret = dec_stream_header(s); + if (ret != XZ_OK) + return ret; + + case SEQ_BLOCK_START: + /* We need one byte of input to continue. */ + if (b->in_pos == b->in_size) + return XZ_OK; + + /* See if this is the beginning of the Index field. */ + if (b->in[b->in_pos] == 0) { + s->in_start = b->in_pos++; + s->sequence = SEQ_INDEX; + break; + } + + /* + * Calculate the size of the Block Header and + * prepare to decode it. + */ + s->block_header.size + = ((uint32_t)b->in[b->in_pos] + 1) * 4; + + s->temp.size = s->block_header.size; + s->temp.pos = 0; + s->sequence = SEQ_BLOCK_HEADER; + + case SEQ_BLOCK_HEADER: + if (!fill_temp(s, b)) + return XZ_OK; + + ret = dec_block_header(s); + if (ret != XZ_OK) + return ret; + + s->sequence = SEQ_BLOCK_UNCOMPRESS; + + case SEQ_BLOCK_UNCOMPRESS: + ret = dec_block(s, b); + if (ret != XZ_STREAM_END) + return ret; + + s->sequence = SEQ_BLOCK_PADDING; + + case SEQ_BLOCK_PADDING: + /* + * Size of Compressed Data + Block Padding + * must be a multiple of four. We don't need + * s->block.compressed for anything else + * anymore, so we use it here to test the size + * of the Block Padding field. + */ + while (s->block.compressed & 3) { + if (b->in_pos == b->in_size) + return XZ_OK; + + if (b->in[b->in_pos++] != 0) + return XZ_DATA_ERROR; + + ++s->block.compressed; + } + + s->sequence = SEQ_BLOCK_CHECK; + + case SEQ_BLOCK_CHECK: + if (s->check_type == XZ_CHECK_CRC32) { + ret = crc32_validate(s, b); + if (ret != XZ_STREAM_END) + return ret; + } +#ifdef XZ_DEC_ANY_CHECK + else if (!check_skip(s, b)) { + return XZ_OK; + } +#endif + + s->sequence = SEQ_BLOCK_START; + break; + + case SEQ_INDEX: + ret = dec_index(s, b); + if (ret != XZ_STREAM_END) + return ret; + + s->sequence = SEQ_INDEX_PADDING; + + case SEQ_INDEX_PADDING: + while ((s->index.size + (b->in_pos - s->in_start)) + & 3) { + if (b->in_pos == b->in_size) { + index_update(s, b); + return XZ_OK; + } + + if (b->in[b->in_pos++] != 0) + return XZ_DATA_ERROR; + } + + /* Finish the CRC32 value and Index size. */ + index_update(s, b); + + /* Compare the hashes to validate the Index field. */ + if (!memeq(&s->block.hash, &s->index.hash, + sizeof(s->block.hash))) + return XZ_DATA_ERROR; + + s->sequence = SEQ_INDEX_CRC32; + + case SEQ_INDEX_CRC32: + ret = crc32_validate(s, b); + if (ret != XZ_STREAM_END) + return ret; + + s->temp.size = STREAM_HEADER_SIZE; + s->sequence = SEQ_STREAM_FOOTER; + + case SEQ_STREAM_FOOTER: + if (!fill_temp(s, b)) + return XZ_OK; + + return dec_stream_footer(s); + } + } + + /* Never reached */ +} + +/* + * xz_dec_run() is a wrapper for dec_main() to handle some special cases in + * multi-call and single-call decoding. + * + * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we + * are not going to make any progress anymore. This is to prevent the caller + * from calling us infinitely when the input file is truncated or otherwise + * corrupt. Since zlib-style API allows that the caller fills the input buffer + * only when the decoder doesn't produce any new output, we have to be careful + * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only + * after the second consecutive call to xz_dec_run() that makes no progress. + * + * In single-call mode, if we couldn't decode everything and no error + * occurred, either the input is truncated or the output buffer is too small. + * Since we know that the last input byte never produces any output, we know + * that if all the input was consumed and decoding wasn't finished, the file + * must be corrupt. Otherwise the output buffer has to be too small or the + * file is corrupt in a way that decoding it produces too big output. + * + * If single-call decoding fails, we reset b->in_pos and b->out_pos back to + * their original values. This is because with some filter chains there won't + * be any valid uncompressed data in the output buffer unless the decoding + * actually succeeds (that's the price to pay of using the output buffer as + * the workspace). + */ +XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) +{ + size_t in_start; + size_t out_start; + enum xz_ret ret; + + if (DEC_IS_SINGLE(s->mode)) + xz_dec_reset(s); + + in_start = b->in_pos; + out_start = b->out_pos; + ret = dec_main(s, b); + + if (DEC_IS_SINGLE(s->mode)) { + if (ret == XZ_OK) + ret = b->in_pos == b->in_size + ? XZ_DATA_ERROR : XZ_BUF_ERROR; + + if (ret != XZ_STREAM_END) { + b->in_pos = in_start; + b->out_pos = out_start; + } + + } else if (ret == XZ_OK && in_start == b->in_pos + && out_start == b->out_pos) { + if (s->allow_buf_error) + ret = XZ_BUF_ERROR; + + s->allow_buf_error = true; + } else { + s->allow_buf_error = false; + } + + return ret; +} + +XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) +{ + struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->mode = mode; + +#ifdef XZ_DEC_BCJ + s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); + if (s->bcj == NULL) + goto error_bcj; +#endif + + s->lzma2 = xz_dec_lzma2_create(mode, dict_max); + if (s->lzma2 == NULL) + goto error_lzma2; + + xz_dec_reset(s); + return s; + +error_lzma2: +#ifdef XZ_DEC_BCJ + xz_dec_bcj_end(s->bcj); +error_bcj: +#endif + kfree(s); + return NULL; +} + +XZ_EXTERN void xz_dec_reset(struct xz_dec *s) +{ + s->sequence = SEQ_STREAM_HEADER; + s->allow_buf_error = false; + s->pos = 0; + s->crc32 = 0; + memzero(&s->block, sizeof(s->block)); + memzero(&s->index, sizeof(s->index)); + s->temp.pos = 0; + s->temp.size = STREAM_HEADER_SIZE; +} + +XZ_EXTERN void xz_dec_end(struct xz_dec *s) +{ + if (s != NULL) { + xz_dec_lzma2_end(s->lzma2); +#ifdef XZ_DEC_BCJ + xz_dec_bcj_end(s->bcj); +#endif + kfree(s); + } +} diff --git a/lib/xz/xz_dec_syms.c b/lib/xz/xz_dec_syms.c new file mode 100644 index 000000000000..32eb3c03aede --- /dev/null +++ b/lib/xz/xz_dec_syms.c @@ -0,0 +1,26 @@ +/* + * XZ decoder module information + * + * Author: Lasse Collin <lasse.collin@tukaani.org> + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include <linux/module.h> +#include <linux/xz.h> + +EXPORT_SYMBOL(xz_dec_init); +EXPORT_SYMBOL(xz_dec_reset); +EXPORT_SYMBOL(xz_dec_run); +EXPORT_SYMBOL(xz_dec_end); + +MODULE_DESCRIPTION("XZ decompressor"); +MODULE_VERSION("1.0"); +MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org> and Igor Pavlov"); + +/* + * This code is in the public domain, but in Linux it's simplest to just + * say it's GPL and consider the authors as the copyright holders. + */ +MODULE_LICENSE("GPL"); diff --git a/lib/xz/xz_dec_test.c b/lib/xz/xz_dec_test.c new file mode 100644 index 000000000000..da28a19d6c98 --- /dev/null +++ b/lib/xz/xz_dec_test.c @@ -0,0 +1,220 @@ +/* + * XZ decoder tester + * + * Author: Lasse Collin <lasse.collin@tukaani.org> + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/uaccess.h> +#include <linux/crc32.h> +#include <linux/xz.h> + +/* Maximum supported dictionary size */ +#define DICT_MAX (1 << 20) + +/* Device name to pass to register_chrdev(). */ +#define DEVICE_NAME "xz_dec_test" + +/* Dynamically allocated device major number */ +static int device_major; + +/* + * We reuse the same decoder state, and thus can decode only one + * file at a time. + */ +static bool device_is_open; + +/* XZ decoder state */ +static struct xz_dec *state; + +/* + * Return value of xz_dec_run(). We need to avoid calling xz_dec_run() after + * it has returned XZ_STREAM_END, so we make this static. + */ +static enum xz_ret ret; + +/* + * Input and output buffers. The input buffer is used as a temporary safe + * place for the data coming from the userspace. + */ +static uint8_t buffer_in[1024]; +static uint8_t buffer_out[1024]; + +/* + * Structure to pass the input and output buffers to the XZ decoder. + * A few of the fields are never modified so we initialize them here. + */ +static struct xz_buf buffers = { + .in = buffer_in, + .out = buffer_out, + .out_size = sizeof(buffer_out) +}; + +/* + * CRC32 of uncompressed data. This is used to give the user a simple way + * to check that the decoder produces correct output. + */ +static uint32_t crc; + +static int xz_dec_test_open(struct inode *i, struct file *f) +{ + if (device_is_open) + return -EBUSY; + + device_is_open = true; + + xz_dec_reset(state); + ret = XZ_OK; + crc = 0xFFFFFFFF; + + buffers.in_pos = 0; + buffers.in_size = 0; + buffers.out_pos = 0; + + printk(KERN_INFO DEVICE_NAME ": opened\n"); + return 0; +} + +static int xz_dec_test_release(struct inode *i, struct file *f) +{ + device_is_open = false; + + if (ret == XZ_OK) + printk(KERN_INFO DEVICE_NAME ": input was truncated\n"); + + printk(KERN_INFO DEVICE_NAME ": closed\n"); + return 0; +} + +/* + * Decode the data given to us from the userspace. CRC32 of the uncompressed + * data is calculated and is printed at the end of successful decoding. The + * uncompressed data isn't stored anywhere for further use. + * + * The .xz file must have exactly one Stream and no Stream Padding. The data + * after the first Stream is considered to be garbage. + */ +static ssize_t xz_dec_test_write(struct file *file, const char __user *buf, + size_t size, loff_t *pos) +{ + size_t remaining; + + if (ret != XZ_OK) { + if (size > 0) + printk(KERN_INFO DEVICE_NAME ": %zu bytes of " + "garbage at the end of the file\n", + size); + + return -ENOSPC; + } + + printk(KERN_INFO DEVICE_NAME ": decoding %zu bytes of input\n", + size); + + remaining = size; + while ((remaining > 0 || buffers.out_pos == buffers.out_size) + && ret == XZ_OK) { + if (buffers.in_pos == buffers.in_size) { + buffers.in_pos = 0; + buffers.in_size = min(remaining, sizeof(buffer_in)); + if (copy_from_user(buffer_in, buf, buffers.in_size)) + return -EFAULT; + + buf += buffers.in_size; + remaining -= buffers.in_size; + } + + buffers.out_pos = 0; + ret = xz_dec_run(state, &buffers); + crc = crc32(crc, buffer_out, buffers.out_pos); + } + + switch (ret) { + case XZ_OK: + printk(KERN_INFO DEVICE_NAME ": XZ_OK\n"); + return size; + + case XZ_STREAM_END: + printk(KERN_INFO DEVICE_NAME ": XZ_STREAM_END, " + "CRC32 = 0x%08X\n", ~crc); + return size - remaining - (buffers.in_size - buffers.in_pos); + + case XZ_MEMLIMIT_ERROR: + printk(KERN_INFO DEVICE_NAME ": XZ_MEMLIMIT_ERROR\n"); + break; + + case XZ_FORMAT_ERROR: + printk(KERN_INFO DEVICE_NAME ": XZ_FORMAT_ERROR\n"); + break; + + case XZ_OPTIONS_ERROR: + printk(KERN_INFO DEVICE_NAME ": XZ_OPTIONS_ERROR\n"); + break; + + case XZ_DATA_ERROR: + printk(KERN_INFO DEVICE_NAME ": XZ_DATA_ERROR\n"); + break; + + case XZ_BUF_ERROR: + printk(KERN_INFO DEVICE_NAME ": XZ_BUF_ERROR\n"); + break; + + default: + printk(KERN_INFO DEVICE_NAME ": Bug detected!\n"); + break; + } + + return -EIO; +} + +/* Allocate the XZ decoder state and register the character device. */ +static int __init xz_dec_test_init(void) +{ + static const struct file_operations fileops = { + .owner = THIS_MODULE, + .open = &xz_dec_test_open, + .release = &xz_dec_test_release, + .write = &xz_dec_test_write + }; + + state = xz_dec_init(XZ_PREALLOC, DICT_MAX); + if (state == NULL) + return -ENOMEM; + + device_major = register_chrdev(0, DEVICE_NAME, &fileops); + if (device_major < 0) { + xz_dec_end(state); + return device_major; + } + + printk(KERN_INFO DEVICE_NAME ": module loaded\n"); + printk(KERN_INFO DEVICE_NAME ": Create a device node with " + "'mknod " DEVICE_NAME " c %d 0' and write .xz files " + "to it.\n", device_major); + return 0; +} + +static void __exit xz_dec_test_exit(void) +{ + unregister_chrdev(device_major, DEVICE_NAME); + xz_dec_end(state); + printk(KERN_INFO DEVICE_NAME ": module unloaded\n"); +} + +module_init(xz_dec_test_init); +module_exit(xz_dec_test_exit); + +MODULE_DESCRIPTION("XZ decompressor tester"); +MODULE_VERSION("1.0"); +MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org>"); + +/* + * This code is in the public domain, but in Linux it's simplest to just + * say it's GPL and consider the authors as the copyright holders. + */ +MODULE_LICENSE("GPL"); diff --git a/lib/xz/xz_lzma2.h b/lib/xz/xz_lzma2.h new file mode 100644 index 000000000000..071d67bee9f5 --- /dev/null +++ b/lib/xz/xz_lzma2.h @@ -0,0 +1,204 @@ +/* + * LZMA2 definitions + * + * Authors: Lasse Collin <lasse.collin@tukaani.org> + * Igor Pavlov <http://7-zip.org/> + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_LZMA2_H +#define XZ_LZMA2_H + +/* Range coder constants */ +#define RC_SHIFT_BITS 8 +#define RC_TOP_BITS 24 +#define RC_TOP_VALUE (1 << RC_TOP_BITS) +#define RC_BIT_MODEL_TOTAL_BITS 11 +#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS) +#define RC_MOVE_BITS 5 + +/* + * Maximum number of position states. A position state is the lowest pb + * number of bits of the current uncompressed offset. In some places there + * are different sets of probabilities for different position states. + */ +#define POS_STATES_MAX (1 << 4) + +/* + * This enum is used to track which LZMA symbols have occurred most recently + * and in which order. This information is used to predict the next symbol. + * + * Symbols: + * - Literal: One 8-bit byte + * - Match: Repeat a chunk of data at some distance + * - Long repeat: Multi-byte match at a recently seen distance + * - Short repeat: One-byte repeat at a recently seen distance + * + * The symbol names are in from STATE_oldest_older_previous. REP means + * either short or long repeated match, and NONLIT means any non-literal. + */ +enum lzma_state { + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_LIT_MATCH, + STATE_LIT_LONGREP, + STATE_LIT_SHORTREP, + STATE_NONLIT_MATCH, + STATE_NONLIT_REP +}; + +/* Total number of states */ +#define STATES 12 + +/* The lowest 7 states indicate that the previous state was a literal. */ +#define LIT_STATES 7 + +/* Indicate that the latest symbol was a literal. */ +static inline void lzma_state_literal(enum lzma_state *state) +{ + if (*state <= STATE_SHORTREP_LIT_LIT) + *state = STATE_LIT_LIT; + else if (*state <= STATE_LIT_SHORTREP) + *state -= 3; + else + *state -= 6; +} + +/* Indicate that the latest symbol was a match. */ +static inline void lzma_state_match(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH; +} + +/* Indicate that the latest state was a long repeated match. */ +static inline void lzma_state_long_rep(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP; +} + +/* Indicate that the latest symbol was a short match. */ +static inline void lzma_state_short_rep(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP; +} + +/* Test if the previous symbol was a literal. */ +static inline bool lzma_state_is_literal(enum lzma_state state) +{ + return state < LIT_STATES; +} + +/* Each literal coder is divided in three sections: + * - 0x001-0x0FF: Without match byte + * - 0x101-0x1FF: With match byte; match bit is 0 + * - 0x201-0x2FF: With match byte; match bit is 1 + * + * Match byte is used when the previous LZMA symbol was something else than + * a literal (that is, it was some kind of match). + */ +#define LITERAL_CODER_SIZE 0x300 + +/* Maximum number of literal coders */ +#define LITERAL_CODERS_MAX (1 << 4) + +/* Minimum length of a match is two bytes. */ +#define MATCH_LEN_MIN 2 + +/* Match length is encoded with 4, 5, or 10 bits. + * + * Length Bits + * 2-9 4 = Choice=0 + 3 bits + * 10-17 5 = Choice=1 + Choice2=0 + 3 bits + * 18-273 10 = Choice=1 + Choice2=1 + 8 bits + */ +#define LEN_LOW_BITS 3 +#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) +#define LEN_MID_BITS 3 +#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) +#define LEN_HIGH_BITS 8 +#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) +#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) + +/* + * Maximum length of a match is 273 which is a result of the encoding + * described above. + */ +#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) + +/* + * Different sets of probabilities are used for match distances that have + * very short match length: Lengths of 2, 3, and 4 bytes have a separate + * set of probabilities for each length. The matches with longer length + * use a shared set of probabilities. + */ +#define DIST_STATES 4 + +/* + * Get the index of the appropriate probability array for decoding + * the distance slot. + */ +static inline uint32_t lzma_get_dist_state(uint32_t len) +{ + return len < DIST_STATES + MATCH_LEN_MIN + ? len - MATCH_LEN_MIN : DIST_STATES - 1; +} + +/* + * The highest two bits of a 32-bit match distance are encoded using six bits. + * This six-bit value is called a distance slot. This way encoding a 32-bit + * value takes 6-36 bits, larger values taking more bits. + */ +#define DIST_SLOT_BITS 6 +#define DIST_SLOTS (1 << DIST_SLOT_BITS) + +/* Match distances up to 127 are fully encoded using probabilities. Since + * the highest two bits (distance slot) are always encoded using six bits, + * the distances 0-3 don't need any additional bits to encode, since the + * distance slot itself is the same as the actual distance. DIST_MODEL_START + * indicates the first distance slot where at least one additional bit is + * needed. + */ +#define DIST_MODEL_START 4 + +/* + * Match distances greater than 127 are encoded in three pieces: + * - distance slot: the highest two bits + * - direct bits: 2-26 bits below the highest two bits + * - alignment bits: four lowest bits + * + * Direct bits don't use any probabilities. + * + * The distance slot value of 14 is for distances 128-191. + */ +#define DIST_MODEL_END 14 + +/* Distance slots that indicate a distance <= 127. */ +#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2) +#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) + +/* + * For match distances greater than 127, only the highest two bits and the + * lowest four bits (alignment) is encoded using probabilities. + */ +#define ALIGN_BITS 4 +#define ALIGN_SIZE (1 << ALIGN_BITS) +#define ALIGN_MASK (ALIGN_SIZE - 1) + +/* Total number of all probability variables */ +#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE) + +/* + * LZMA remembers the four most recent match distances. Reusing these + * distances tends to take less space than re-encoding the actual + * distance value. + */ +#define REPS 4 + +#endif diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h new file mode 100644 index 000000000000..a65633e06962 --- /dev/null +++ b/lib/xz/xz_private.h @@ -0,0 +1,156 @@ +/* + * Private includes and definitions + * + * Author: Lasse Collin <lasse.collin@tukaani.org> + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_PRIVATE_H +#define XZ_PRIVATE_H + +#ifdef __KERNEL__ +# include <linux/xz.h> +# include <asm/byteorder.h> +# include <asm/unaligned.h> + /* XZ_PREBOOT may be defined only via decompress_unxz.c. */ +# ifndef XZ_PREBOOT +# include <linux/slab.h> +# include <linux/vmalloc.h> +# include <linux/string.h> +# ifdef CONFIG_XZ_DEC_X86 +# define XZ_DEC_X86 +# endif +# ifdef CONFIG_XZ_DEC_POWERPC +# define XZ_DEC_POWERPC +# endif +# ifdef CONFIG_XZ_DEC_IA64 +# define XZ_DEC_IA64 +# endif +# ifdef CONFIG_XZ_DEC_ARM +# define XZ_DEC_ARM +# endif +# ifdef CONFIG_XZ_DEC_ARMTHUMB +# define XZ_DEC_ARMTHUMB +# endif +# ifdef CONFIG_XZ_DEC_SPARC +# define XZ_DEC_SPARC +# endif +# define memeq(a, b, size) (memcmp(a, b, size) == 0) +# define memzero(buf, size) memset(buf, 0, size) +# endif +# define get_le32(p) le32_to_cpup((const uint32_t *)(p)) +#else + /* + * For userspace builds, use a separate header to define the required + * macros and functions. This makes it easier to adapt the code into + * different environments and avoids clutter in the Linux kernel tree. + */ +# include "xz_config.h" +#endif + +/* If no specific decoding mode is requested, enable support for all modes. */ +#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \ + && !defined(XZ_DEC_DYNALLOC) +# define XZ_DEC_SINGLE +# define XZ_DEC_PREALLOC +# define XZ_DEC_DYNALLOC +#endif + +/* + * The DEC_IS_foo(mode) macros are used in "if" statements. If only some + * of the supported modes are enabled, these macros will evaluate to true or + * false at compile time and thus allow the compiler to omit unneeded code. + */ +#ifdef XZ_DEC_SINGLE +# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE) +#else +# define DEC_IS_SINGLE(mode) (false) +#endif + +#ifdef XZ_DEC_PREALLOC +# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC) +#else +# define DEC_IS_PREALLOC(mode) (false) +#endif + +#ifdef XZ_DEC_DYNALLOC +# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC) +#else +# define DEC_IS_DYNALLOC(mode) (false) +#endif + +#if !defined(XZ_DEC_SINGLE) +# define DEC_IS_MULTI(mode) (true) +#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC) +# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE) +#else +# define DEC_IS_MULTI(mode) (false) +#endif + +/* + * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ. + * XZ_DEC_BCJ is used to enable generic support for BCJ decoders. + */ +#ifndef XZ_DEC_BCJ +# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \ + || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \ + || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \ + || defined(XZ_DEC_SPARC) +# define XZ_DEC_BCJ +# endif +#endif + +/* + * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used + * before calling xz_dec_lzma2_run(). + */ +XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, + uint32_t dict_max); + +/* + * Decode the LZMA2 properties (one byte) and reset the decoder. Return + * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not + * big enough, and XZ_OPTIONS_ERROR if props indicates something that this + * decoder doesn't support. + */ +XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, + uint8_t props); + +/* Decode raw LZMA2 stream from b->in to b->out. */ +XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, + struct xz_buf *b); + +/* Free the memory allocated for the LZMA2 decoder. */ +XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s); + +#ifdef XZ_DEC_BCJ +/* + * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before + * calling xz_dec_bcj_run(). + */ +XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call); + +/* + * Decode the Filter ID of a BCJ filter. This implementation doesn't + * support custom start offsets, so no decoding of Filter Properties + * is needed. Returns XZ_OK if the given Filter ID is supported. + * Otherwise XZ_OPTIONS_ERROR is returned. + */ +XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id); + +/* + * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is + * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run() + * must be called directly. + */ +XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, + struct xz_dec_lzma2 *lzma2, + struct xz_buf *b); + +/* Free the memory allocated for the BCJ filters. */ +#define xz_dec_bcj_end(s) kfree(s) +#endif + +#endif diff --git a/lib/xz/xz_stream.h b/lib/xz/xz_stream.h new file mode 100644 index 000000000000..66cb5a7055ec --- /dev/null +++ b/lib/xz/xz_stream.h @@ -0,0 +1,62 @@ +/* + * Definitions for handling the .xz file format + * + * Author: Lasse Collin <lasse.collin@tukaani.org> + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_STREAM_H +#define XZ_STREAM_H + +#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32 +# include <linux/crc32.h> +# undef crc32 +# define xz_crc32(buf, size, crc) \ + (~crc32_le(~(uint32_t)(crc), buf, size)) +#endif + +/* + * See the .xz file format specification at + * http://tukaani.org/xz/xz-file-format.txt + * to understand the container format. + */ + +#define STREAM_HEADER_SIZE 12 + +#define HEADER_MAGIC "\3757zXZ" +#define HEADER_MAGIC_SIZE 6 + +#define FOOTER_MAGIC "YZ" +#define FOOTER_MAGIC_SIZE 2 + +/* + * Variable-length integer can hold a 63-bit unsigned integer or a special + * value indicating that the value is unknown. + * + * Experimental: vli_type can be defined to uint32_t to save a few bytes + * in code size (no effect on speed). Doing so limits the uncompressed and + * compressed size of the file to less than 256 MiB and may also weaken + * error detection slightly. + */ +typedef uint64_t vli_type; + +#define VLI_MAX ((vli_type)-1 / 2) +#define VLI_UNKNOWN ((vli_type)-1) + +/* Maximum encoded size of a VLI */ +#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7) + +/* Integrity Check types */ +enum xz_check { + XZ_CHECK_NONE = 0, + XZ_CHECK_CRC32 = 1, + XZ_CHECK_CRC64 = 4, + XZ_CHECK_SHA256 = 10 +}; + +/* Maximum possible Check ID */ +#define XZ_CHECK_MAX 15 + +#endif diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c index 46a31e5f49c3..d63381e8e333 100644 --- a/lib/zlib_deflate/deflate.c +++ b/lib/zlib_deflate/deflate.c @@ -176,6 +176,7 @@ int zlib_deflateInit2( deflate_state *s; int noheader = 0; deflate_workspace *mem; + char *next; ush *overlay; /* We overlay pending_buf and d_buf+l_buf. This works since the average @@ -199,6 +200,21 @@ int zlib_deflateInit2( strategy < 0 || strategy > Z_HUFFMAN_ONLY) { return Z_STREAM_ERROR; } + + /* + * Direct the workspace's pointers to the chunks that were allocated + * along with the deflate_workspace struct. + */ + next = (char *) mem; + next += sizeof(*mem); + mem->window_memory = (Byte *) next; + next += zlib_deflate_window_memsize(windowBits); + mem->prev_memory = (Pos *) next; + next += zlib_deflate_prev_memsize(windowBits); + mem->head_memory = (Pos *) next; + next += zlib_deflate_head_memsize(memLevel); + mem->overlay_memory = next; + s = (deflate_state *) &(mem->deflate_memory); strm->state = (struct internal_state *)s; s->strm = strm; @@ -1247,7 +1263,18 @@ static block_state deflate_slow( return flush == Z_FINISH ? finish_done : block_done; } -int zlib_deflate_workspacesize(void) +int zlib_deflate_workspacesize(int windowBits, int memLevel) { - return sizeof(deflate_workspace); + if (windowBits < 0) /* undocumented feature: suppress zlib header */ + windowBits = -windowBits; + + /* Since the return value is typically passed to vmalloc() unchecked... */ + BUG_ON(memLevel < 1 || memLevel > MAX_MEM_LEVEL || windowBits < 9 || + windowBits > 15); + + return sizeof(deflate_workspace) + + zlib_deflate_window_memsize(windowBits) + + zlib_deflate_prev_memsize(windowBits) + + zlib_deflate_head_memsize(memLevel) + + zlib_deflate_overlay_memsize(memLevel); } diff --git a/lib/zlib_deflate/defutil.h b/lib/zlib_deflate/defutil.h index 6b15a909ca3f..b640b6402e99 100644 --- a/lib/zlib_deflate/defutil.h +++ b/lib/zlib_deflate/defutil.h @@ -241,12 +241,21 @@ typedef struct deflate_state { typedef struct deflate_workspace { /* State memory for the deflator */ deflate_state deflate_memory; - Byte window_memory[2 * (1 << MAX_WBITS)]; - Pos prev_memory[1 << MAX_WBITS]; - Pos head_memory[1 << (MAX_MEM_LEVEL + 7)]; - char overlay_memory[(1 << (MAX_MEM_LEVEL + 6)) * (sizeof(ush)+2)]; + Byte *window_memory; + Pos *prev_memory; + Pos *head_memory; + char *overlay_memory; } deflate_workspace; +#define zlib_deflate_window_memsize(windowBits) \ + (2 * (1 << (windowBits)) * sizeof(Byte)) +#define zlib_deflate_prev_memsize(windowBits) \ + ((1 << (windowBits)) * sizeof(Pos)) +#define zlib_deflate_head_memsize(memLevel) \ + ((1 << ((memLevel)+7)) * sizeof(Pos)) +#define zlib_deflate_overlay_memsize(memLevel) \ + ((1 << ((memLevel)+6)) * (sizeof(ush)+2)) + /* Output a byte on the stream. * IN assertion: there is enough room in pending_buf. */ |