diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 8 | ||||
-rw-r--r-- | lib/Kconfig.debug | 42 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/debugobjects.c | 11 | ||||
-rw-r--r-- | lib/dynamic_debug.c | 190 | ||||
-rw-r--r-- | lib/jedec_ddr_data.c | 135 | ||||
-rw-r--r-- | lib/kobject.c | 14 | ||||
-rw-r--r-- | lib/list_debug.c | 22 | ||||
-rw-r--r-- | lib/raid6/Makefile | 2 | ||||
-rw-r--r-- | lib/raid6/algos.c | 127 | ||||
-rw-r--r-- | lib/raid6/mktables.c | 25 | ||||
-rw-r--r-- | lib/raid6/recov.c | 15 | ||||
-rw-r--r-- | lib/raid6/recov_ssse3.c | 335 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 2 | ||||
-rw-r--r-- | lib/raid6/test/test.c | 32 | ||||
-rw-r--r-- | lib/raid6/x86.h | 15 | ||||
-rw-r--r-- | lib/rational.c | 2 |
17 files changed, 823 insertions, 156 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 4a8aba2e5cc0..0e25c03939e3 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -353,6 +353,14 @@ config CORDIC This option provides an implementation of the CORDIC algorithm; calculations are in fixed point. Module will be called cordic. +config DDR + bool "JEDEC DDR data" + help + Data from JEDEC specs for DDR SDRAM memories, + particularly the AC timing parameters and addressing + information. This data is useful for drivers handling + DDR SDRAM controllers. + config MPILIB tristate select CLZ_TAB diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6777153f18f3..a42d3ae39648 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -3,12 +3,16 @@ config PRINTK_TIME bool "Show timing information on printks" depends on PRINTK help - Selecting this option causes timing information to be - included in printk output. This allows you to measure - the interval between kernel operations, including bootup - operations. This is useful for identifying long delays - in kernel startup. Or add printk.time=1 at boot-time. - See Documentation/kernel-parameters.txt + Selecting this option causes time stamps of the printk() + messages to be added to the output of the syslog() system + call and at the console. + + The timestamp is always recorded internally, and exported + to /dev/kmsg. This flag just specifies if the timestamp should + be included, not that the timestamp is recorded. + + The behavior is also controlled by the kernel command line + parameter printk.time=1. See Documentation/kernel-parameters.txt config DEFAULT_MESSAGE_LOGLEVEL int "Default message log level (1-7)" @@ -70,6 +74,15 @@ config STRIP_ASM_SYMS that look like '.Lxxx') so they don't pollute the output of get_wchan() and suchlike. +config READABLE_ASM + bool "Generate readable assembler code" + depends on DEBUG_KERNEL + help + Disable some compiler optimizations that tend to generate human unreadable + assembler output. This may make the kernel slightly slower, but it helps + to keep kernel developers who have to stare a lot at assembler listings + sane. + config UNUSED_SYMBOLS bool "Enable unused/obsolete exported symbols" default y if X86 @@ -1205,8 +1218,13 @@ config DYNAMIC_DEBUG otherwise be available at runtime. These messages can then be enabled/disabled based on various levels of scope - per source file, function, module, format string, and line number. This mechanism - implicitly enables all pr_debug() and dev_dbg() calls. The impact of - this compile option is a larger kernel text size of about 2%. + implicitly compiles in all pr_debug() and dev_dbg() calls, which + enlarges the kernel text size by about 2%. + + If a source file is compiled with DEBUG flag set, any + pr_debug() calls in it are enabled by default, but can be + disabled at runtime as below. Note that DEBUG flag is + turned on by many CONFIG_*DEBUG* options. Usage: @@ -1223,16 +1241,16 @@ config DYNAMIC_DEBUG lineno : line number of the debug statement module : module that contains the debug statement function : function that contains the debug statement - flags : 'p' means the line is turned 'on' for printing + flags : '=p' means the line is turned 'on' for printing format : the format used for the debug statement From a live system: nullarbor:~ # cat <debugfs>/dynamic_debug/control # filename:lineno [module]function flags format - fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012" - fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012" - fs/aio.c:1770 [aio]sys_io_cancel - "calling\040cancel\012" + fs/aio.c:222 [aio]__put_ioctx =_ "__put_ioctx:\040freeing\040%p\012" + fs/aio.c:248 [aio]ioctx_alloc =_ "ENOMEM:\040nr_events\040too\040high\012" + fs/aio.c:1770 [aio]sys_io_cancel =_ "calling\040cancel\012" Example usage: diff --git a/lib/Makefile b/lib/Makefile index 18515f0267c4..74290c9e2864 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -123,6 +123,8 @@ obj-$(CONFIG_SIGNATURE) += digsig.o obj-$(CONFIG_CLZ_TAB) += clz_tab.o +obj-$(CONFIG_DDR) += jedec_ddr_data.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 0ab9ae8057f0..d11808ca4bc4 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -79,30 +79,29 @@ static const char *obj_states[ODEBUG_STATE_MAX] = { [ODEBUG_STATE_NOTAVAILABLE] = "not available", }; -static int fill_pool(void) +static void fill_pool(void) { gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; struct debug_obj *new; unsigned long flags; if (likely(obj_pool_free >= ODEBUG_POOL_MIN_LEVEL)) - return obj_pool_free; + return; if (unlikely(!obj_cache)) - return obj_pool_free; + return; while (obj_pool_free < ODEBUG_POOL_MIN_LEVEL) { new = kmem_cache_zalloc(obj_cache, gfp); if (!new) - return obj_pool_free; + return; raw_spin_lock_irqsave(&pool_lock, flags); hlist_add_head(&new->node, &obj_pool); obj_pool_free++; raw_spin_unlock_irqrestore(&pool_lock, flags); } - return obj_pool_free; } /* @@ -1052,10 +1051,10 @@ static int __init debug_objects_replace_static_objects(void) cnt++; } } + local_irq_enable(); printk(KERN_DEBUG "ODEBUG: %d of %d active objects replaced\n", cnt, obj_pool_used); - local_irq_enable(); return 0; free: hlist_for_each_entry_safe(obj, node, tmp, &objects, node) { diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 310c753cf83e..7ca29a0a3019 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -107,20 +107,22 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, return buf; } -#define vpr_info_dq(q, msg) \ -do { \ - if (verbose) \ - /* trim last char off format print */ \ - pr_info("%s: func=\"%s\" file=\"%s\" " \ - "module=\"%s\" format=\"%.*s\" " \ - "lineno=%u-%u", \ - msg, \ - q->function ? q->function : "", \ - q->filename ? q->filename : "", \ - q->module ? q->module : "", \ - (int)(q->format ? strlen(q->format) - 1 : 0), \ - q->format ? q->format : "", \ - q->first_lineno, q->last_lineno); \ +#define vpr_info(fmt, ...) \ + if (verbose) do { pr_info(fmt, ##__VA_ARGS__); } while (0) + +#define vpr_info_dq(q, msg) \ +do { \ + /* trim last char off format print */ \ + vpr_info("%s: func=\"%s\" file=\"%s\" " \ + "module=\"%s\" format=\"%.*s\" " \ + "lineno=%u-%u", \ + msg, \ + q->function ? q->function : "", \ + q->filename ? q->filename : "", \ + q->module ? q->module : "", \ + (int)(q->format ? strlen(q->format) - 1 : 0), \ + q->format ? q->format : "", \ + q->first_lineno, q->last_lineno); \ } while (0) /* @@ -180,12 +182,11 @@ static int ddebug_change(const struct ddebug_query *query, if (newflags == dp->flags) continue; dp->flags = newflags; - if (verbose) - pr_info("changed %s:%d [%s]%s =%s\n", - trim_prefix(dp->filename), dp->lineno, - dt->mod_name, dp->function, - ddebug_describe_flags(dp, flagbuf, - sizeof(flagbuf))); + vpr_info("changed %s:%d [%s]%s =%s\n", + trim_prefix(dp->filename), dp->lineno, + dt->mod_name, dp->function, + ddebug_describe_flags(dp, flagbuf, + sizeof(flagbuf))); } } mutex_unlock(&ddebug_lock); @@ -337,7 +338,7 @@ static int check_set(const char **dest, char *src, char *name) * Returns 0 on success, <0 on error. */ static int ddebug_parse_query(char *words[], int nwords, - struct ddebug_query *query) + struct ddebug_query *query, const char *modname) { unsigned int i; int rc; @@ -347,6 +348,10 @@ static int ddebug_parse_query(char *words[], int nwords, return -EINVAL; memset(query, 0, sizeof(*query)); + if (modname) + /* support $modname.dyndbg=<multiple queries> */ + query->module = modname; + for (i = 0 ; i < nwords ; i += 2) { if (!strcmp(words[i], "func")) rc = check_set(&query->function, words[i+1], "func"); @@ -410,8 +415,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, default: return -EINVAL; } - if (verbose) - pr_info("op='%c'\n", op); + vpr_info("op='%c'\n", op); for ( ; *str ; ++str) { for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) { @@ -423,8 +427,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, if (i < 0) return -EINVAL; } - if (verbose) - pr_info("flags=0x%x\n", flags); + vpr_info("flags=0x%x\n", flags); /* calculate final *flagsp, *maskp according to mask and op */ switch (op) { @@ -441,12 +444,11 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, *flagsp = 0; break; } - if (verbose) - pr_info("*flagsp=0x%x *maskp=0x%x\n", *flagsp, *maskp); + vpr_info("*flagsp=0x%x *maskp=0x%x\n", *flagsp, *maskp); return 0; } -static int ddebug_exec_query(char *query_string) +static int ddebug_exec_query(char *query_string, const char *modname) { unsigned int flags = 0, mask = 0; struct ddebug_query query; @@ -457,7 +459,7 @@ static int ddebug_exec_query(char *query_string) nwords = ddebug_tokenize(query_string, words, MAXWORDS); if (nwords <= 0) return -EINVAL; - if (ddebug_parse_query(words, nwords-1, &query)) + if (ddebug_parse_query(words, nwords-1, &query, modname)) return -EINVAL; if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) return -EINVAL; @@ -473,7 +475,7 @@ static int ddebug_exec_query(char *query_string) last error or number of matching callsites. Module name is either in param (for boot arg) or perhaps in query string. */ -static int ddebug_exec_queries(char *query) +static int ddebug_exec_queries(char *query, const char *modname) { char *split; int i, errs = 0, exitcode = 0, rc, nfound = 0; @@ -487,10 +489,9 @@ static int ddebug_exec_queries(char *query) if (!query || !*query || *query == '#') continue; - if (verbose) - pr_info("query %d: \"%s\"\n", i, query); + vpr_info("query %d: \"%s\"\n", i, query); - rc = ddebug_exec_query(query); + rc = ddebug_exec_query(query, modname); if (rc < 0) { errs++; exitcode = rc; @@ -498,7 +499,7 @@ static int ddebug_exec_queries(char *query) nfound += rc; i++; } - pr_info("processed %d queries, with %d matches, %d errs\n", + vpr_info("processed %d queries, with %d matches, %d errs\n", i, nfound, errs); if (exitcode) @@ -653,10 +654,9 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, return -EFAULT; } tmpbuf[len] = '\0'; - if (verbose) - pr_info("read %d bytes from userspace\n", (int)len); + vpr_info("read %d bytes from userspace\n", (int)len); - ret = ddebug_exec_queries(tmpbuf); + ret = ddebug_exec_queries(tmpbuf, NULL); kfree(tmpbuf); if (ret < 0) return ret; @@ -717,8 +717,7 @@ static void *ddebug_proc_start(struct seq_file *m, loff_t *pos) struct _ddebug *dp; int n = *pos; - if (verbose) - pr_info("called m=%p *pos=%lld\n", m, (unsigned long long)*pos); + vpr_info("called m=%p *pos=%lld\n", m, (unsigned long long)*pos); mutex_lock(&ddebug_lock); @@ -742,9 +741,8 @@ static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos) struct ddebug_iter *iter = m->private; struct _ddebug *dp; - if (verbose) - pr_info("called m=%p p=%p *pos=%lld\n", - m, p, (unsigned long long)*pos); + vpr_info("called m=%p p=%p *pos=%lld\n", + m, p, (unsigned long long)*pos); if (p == SEQ_START_TOKEN) dp = ddebug_iter_first(iter); @@ -766,8 +764,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p) struct _ddebug *dp = p; char flagsbuf[10]; - if (verbose) - pr_info("called m=%p p=%p\n", m, p); + vpr_info("called m=%p p=%p\n", m, p); if (p == SEQ_START_TOKEN) { seq_puts(m, @@ -791,8 +788,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p) */ static void ddebug_proc_stop(struct seq_file *m, void *p) { - if (verbose) - pr_info("called m=%p p=%p\n", m, p); + vpr_info("called m=%p p=%p\n", m, p); mutex_unlock(&ddebug_lock); } @@ -815,8 +811,7 @@ static int ddebug_proc_open(struct inode *inode, struct file *file) struct ddebug_iter *iter; int err; - if (verbose) - pr_info("called\n"); + vpr_info("called\n"); iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (iter == NULL) @@ -866,12 +861,51 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, list_add_tail(&dt->link, &ddebug_tables); mutex_unlock(&ddebug_lock); - if (verbose) - pr_info("%u debug prints in module %s\n", n, dt->mod_name); + vpr_info("%u debug prints in module %s\n", n, dt->mod_name); return 0; } EXPORT_SYMBOL_GPL(ddebug_add_module); +/* helper for ddebug_dyndbg_(boot|module)_param_cb */ +static int ddebug_dyndbg_param_cb(char *param, char *val, + const char *modname, int on_err) +{ + char *sep; + + sep = strchr(param, '.'); + if (sep) { + /* needed only for ddebug_dyndbg_boot_param_cb */ + *sep = '\0'; + modname = param; + param = sep + 1; + } + if (strcmp(param, "dyndbg")) + return on_err; /* determined by caller */ + + ddebug_exec_queries((val ? val : "+p"), modname); + + return 0; /* query failure shouldnt stop module load */ +} + +/* handle both dyndbg and $module.dyndbg params at boot */ +static int ddebug_dyndbg_boot_param_cb(char *param, char *val, + const char *unused) +{ + vpr_info("%s=\"%s\"\n", param, val); + return ddebug_dyndbg_param_cb(param, val, NULL, 0); +} + +/* + * modprobe foo finds foo.params in boot-args, strips "foo.", and + * passes them to load_module(). This callback gets unknown params, + * processes dyndbg params, rejects others. + */ +int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *module) +{ + vpr_info("module: %s %s=\"%s\"\n", module, param, val); + return ddebug_dyndbg_param_cb(param, val, module, -ENOENT); +} + static void ddebug_table_free(struct ddebug_table *dt) { list_del_init(&dt->link); @@ -888,8 +922,7 @@ int ddebug_remove_module(const char *mod_name) struct ddebug_table *dt, *nextdt; int ret = -ENOENT; - if (verbose) - pr_info("removing module \"%s\"\n", mod_name); + vpr_info("removing module \"%s\"\n", mod_name); mutex_lock(&ddebug_lock); list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) { @@ -940,8 +973,10 @@ static int __init dynamic_debug_init(void) { struct _ddebug *iter, *iter_start; const char *modname = NULL; + char *cmdline; int ret = 0; - int n = 0; + int n = 0, entries = 0, modct = 0; + int verbose_bytes = 0; if (__start___verbose == __stop___verbose) { pr_warn("_ddebug table is empty in a " @@ -952,10 +987,15 @@ static int __init dynamic_debug_init(void) modname = iter->modname; iter_start = iter; for (; iter < __stop___verbose; iter++) { + entries++; + verbose_bytes += strlen(iter->modname) + strlen(iter->function) + + strlen(iter->filename) + strlen(iter->format); + if (strcmp(modname, iter->modname)) { + modct++; ret = ddebug_add_module(iter_start, n, modname); if (ret) - goto out_free; + goto out_err; n = 0; modname = iter->modname; iter_start = iter; @@ -964,29 +1004,45 @@ static int __init dynamic_debug_init(void) } ret = ddebug_add_module(iter_start, n, modname); if (ret) - goto out_free; + goto out_err; + + ddebug_init_success = 1; + vpr_info("%d modules, %d entries and %d bytes in ddebug tables," + " %d bytes in (readonly) verbose section\n", + modct, entries, (int)( modct * sizeof(struct ddebug_table)), + verbose_bytes + (int)(__stop___verbose - __start___verbose)); - /* ddebug_query boot param got passed -> set it up */ + /* apply ddebug_query boot param, dont unload tables on err */ if (ddebug_setup_string[0] != '\0') { - ret = ddebug_exec_queries(ddebug_setup_string); + pr_warn("ddebug_query param name is deprecated," + " change it to dyndbg\n"); + ret = ddebug_exec_queries(ddebug_setup_string, NULL); if (ret < 0) pr_warn("Invalid ddebug boot param %s", ddebug_setup_string); else pr_info("%d changes by ddebug_query\n", ret); - - /* keep tables even on ddebug_query parse error */ - ret = 0; } + /* now that ddebug tables are loaded, process all boot args + * again to find and activate queries given in dyndbg params. + * While this has already been done for known boot params, it + * ignored the unknown ones (dyndbg in particular). Reusing + * parse_args avoids ad-hoc parsing. This will also attempt + * to activate queries for not-yet-loaded modules, which is + * slightly noisy if verbose, but harmless. + */ + cmdline = kstrdup(saved_command_line, GFP_KERNEL); + parse_args("dyndbg params", cmdline, NULL, + 0, 0, 0, &ddebug_dyndbg_boot_param_cb); + kfree(cmdline); + return 0; -out_free: - if (ret) - ddebug_remove_all_tables(); - else - ddebug_init_success = 1; +out_err: + ddebug_remove_all_tables(); return 0; } /* Allow early initialization for boot messages via boot param */ -arch_initcall(dynamic_debug_init); +early_initcall(dynamic_debug_init); + /* Debugfs setup must be done later */ -module_init(dynamic_debug_init_debugfs); +fs_initcall(dynamic_debug_init_debugfs); diff --git a/lib/jedec_ddr_data.c b/lib/jedec_ddr_data.c new file mode 100644 index 000000000000..6d2cbf1d567f --- /dev/null +++ b/lib/jedec_ddr_data.c @@ -0,0 +1,135 @@ +/* + * DDR addressing details and AC timing parameters from JEDEC specs + * + * Copyright (C) 2012 Texas Instruments, Inc. + * + * Aneesh V <aneesh@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <memory/jedec_ddr.h> +#include <linux/module.h> + +/* LPDDR2 addressing details from JESD209-2 section 2.4 */ +const struct lpddr2_addressing + lpddr2_jedec_addressing_table[NUM_DDR_ADDR_TABLE_ENTRIES] = { + {B4, T_REFI_15_6, T_RFC_90}, /* 64M */ + {B4, T_REFI_15_6, T_RFC_90}, /* 128M */ + {B4, T_REFI_7_8, T_RFC_90}, /* 256M */ + {B4, T_REFI_7_8, T_RFC_90}, /* 512M */ + {B8, T_REFI_7_8, T_RFC_130}, /* 1GS4 */ + {B8, T_REFI_3_9, T_RFC_130}, /* 2GS4 */ + {B8, T_REFI_3_9, T_RFC_130}, /* 4G */ + {B8, T_REFI_3_9, T_RFC_210}, /* 8G */ + {B4, T_REFI_7_8, T_RFC_130}, /* 1GS2 */ + {B4, T_REFI_3_9, T_RFC_130}, /* 2GS2 */ +}; +EXPORT_SYMBOL_GPL(lpddr2_jedec_addressing_table); + +/* LPDDR2 AC timing parameters from JESD209-2 section 12 */ +const struct lpddr2_timings + lpddr2_jedec_timings[NUM_DDR_TIMING_TABLE_ENTRIES] = { + /* Speed bin 400(200 MHz) */ + [0] = { + .max_freq = 200000000, + .min_freq = 10000000, + .tRPab = 21000, + .tRCD = 18000, + .tWR = 15000, + .tRAS_min = 42000, + .tRRD = 10000, + .tWTR = 10000, + .tXP = 7500, + .tRTP = 7500, + .tCKESR = 15000, + .tDQSCK_max = 5500, + .tFAW = 50000, + .tZQCS = 90000, + .tZQCL = 360000, + .tZQinit = 1000000, + .tRAS_max_ns = 70000, + .tDQSCK_max_derated = 6000, + }, + /* Speed bin 533(266 MHz) */ + [1] = { + .max_freq = 266666666, + .min_freq = 10000000, + .tRPab = 21000, + .tRCD = 18000, + .tWR = 15000, + .tRAS_min = 42000, + .tRRD = 10000, + .tWTR = 7500, + .tXP = 7500, + .tRTP = 7500, + .tCKESR = 15000, + .tDQSCK_max = 5500, + .tFAW = 50000, + .tZQCS = 90000, + .tZQCL = 360000, + .tZQinit = 1000000, + .tRAS_max_ns = 70000, + .tDQSCK_max_derated = 6000, + }, + /* Speed bin 800(400 MHz) */ + [2] = { + .max_freq = 400000000, + .min_freq = 10000000, + .tRPab = 21000, + .tRCD = 18000, + .tWR = 15000, + .tRAS_min = 42000, + .tRRD = 10000, + .tWTR = 7500, + .tXP = 7500, + .tRTP = 7500, + .tCKESR = 15000, + .tDQSCK_max = 5500, + .tFAW = 50000, + .tZQCS = 90000, + .tZQCL = 360000, + .tZQinit = 1000000, + .tRAS_max_ns = 70000, + .tDQSCK_max_derated = 6000, + }, + /* Speed bin 1066(533 MHz) */ + [3] = { + .max_freq = 533333333, + .min_freq = 10000000, + .tRPab = 21000, + .tRCD = 18000, + .tWR = 15000, + .tRAS_min = 42000, + .tRRD = 10000, + .tWTR = 7500, + .tXP = 7500, + .tRTP = 7500, + .tCKESR = 15000, + .tDQSCK_max = 5500, + .tFAW = 50000, + .tZQCS = 90000, + .tZQCL = 360000, + .tZQinit = 1000000, + .tRAS_max_ns = 70000, + .tDQSCK_max_derated = 5620, + }, +}; +EXPORT_SYMBOL_GPL(lpddr2_jedec_timings); + +const struct lpddr2_min_tck lpddr2_jedec_min_tck = { + .tRPab = 3, + .tRCD = 3, + .tWR = 3, + .tRASmin = 3, + .tRRD = 2, + .tWTR = 2, + .tXP = 2, + .tRTP = 2, + .tCKE = 3, + .tCKESR = 3, + .tFAW = 8 +}; +EXPORT_SYMBOL_GPL(lpddr2_jedec_min_tck); diff --git a/lib/kobject.c b/lib/kobject.c index aeefa8bc8b1c..e07ee1fcd6f1 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -47,13 +47,11 @@ static int populate_dir(struct kobject *kobj) static int create_dir(struct kobject *kobj) { int error = 0; - if (kobject_name(kobj)) { - error = sysfs_create_dir(kobj); - if (!error) { - error = populate_dir(kobj); - if (error) - sysfs_remove_dir(kobj); - } + error = sysfs_create_dir(kobj); + if (!error) { + error = populate_dir(kobj); + if (error) + sysfs_remove_dir(kobj); } return error; } @@ -634,7 +632,7 @@ struct kobject *kobject_create(void) /** * kobject_create_and_add - create a struct kobject dynamically and register it with sysfs * - * @name: the name for the kset + * @name: the name for the kobject * @parent: the parent kobject of this kobject, if any. * * This function creates a kobject structure dynamically and registers it diff --git a/lib/list_debug.c b/lib/list_debug.c index 982b850d4e7a..3810b481f940 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -10,6 +10,7 @@ #include <linux/list.h> #include <linux/bug.h> #include <linux/kernel.h> +#include <linux/rculist.h> /* * Insert a new entry between two known consecutive entries. @@ -75,3 +76,24 @@ void list_del(struct list_head *entry) entry->prev = LIST_POISON2; } EXPORT_SYMBOL(list_del); + +/* + * RCU variants. + */ +void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next) +{ + WARN(next->prev != prev, + "list_add_rcu corruption. next->prev should be " + "prev (%p), but was %p. (next=%p).\n", + prev, next->prev, next); + WARN(prev->next != next, + "list_add_rcu corruption. prev->next should be " + "next (%p), but was %p. (prev=%p).\n", + next, prev->next, prev); + new->next = next; + new->prev = prev; + rcu_assign_pointer(list_next_rcu(prev), new); + next->prev = new; +} +EXPORT_SYMBOL(__list_add_rcu); diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 8a38102770f3..de06dfe165b8 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o -raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ +raid6_pq-y += algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \ int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ altivec8.o mmx.o sse1.o sse2.o hostprogs-y += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 8b02f60ffc86..589f5f50ad2e 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -17,11 +17,11 @@ */ #include <linux/raid/pq.h> -#include <linux/module.h> #ifndef __KERNEL__ #include <sys/mman.h> #include <stdio.h> #else +#include <linux/module.h> #include <linux/gfp.h> #if !RAID6_USE_EMPTY_ZERO_PAGE /* In .bss so it's zeroed */ @@ -34,10 +34,6 @@ struct raid6_calls raid6_call; EXPORT_SYMBOL_GPL(raid6_call); const struct raid6_calls * const raid6_algos[] = { - &raid6_intx1, - &raid6_intx2, - &raid6_intx4, - &raid6_intx8, #if defined(__ia64__) &raid6_intx16, &raid6_intx32, @@ -61,6 +57,24 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_altivec4, &raid6_altivec8, #endif + &raid6_intx1, + &raid6_intx2, + &raid6_intx4, + &raid6_intx8, + NULL +}; + +void (*raid6_2data_recov)(int, size_t, int, int, void **); +EXPORT_SYMBOL_GPL(raid6_2data_recov); + +void (*raid6_datap_recov)(int, size_t, int, void **); +EXPORT_SYMBOL_GPL(raid6_datap_recov); + +const struct raid6_recov_calls *const raid6_recov_algos[] = { +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + &raid6_recov_ssse3, +#endif + &raid6_recov_intx1, NULL }; @@ -72,59 +86,55 @@ const struct raid6_calls * const raid6_algos[] = { #define time_before(x, y) ((x) < (y)) #endif -/* Try to pick the best algorithm */ -/* This code uses the gfmul table as convenient data set to abuse */ - -int __init raid6_select_algo(void) +static inline const struct raid6_recov_calls *raid6_choose_recov(void) { - const struct raid6_calls * const * algo; - const struct raid6_calls * best; - char *syndromes; - void *dptrs[(65536/PAGE_SIZE)+2]; - int i, disks; - unsigned long perf, bestperf; - int bestprefer; - unsigned long j0, j1; + const struct raid6_recov_calls *const *algo; + const struct raid6_recov_calls *best; - disks = (65536/PAGE_SIZE)+2; - for ( i = 0 ; i < disks-2 ; i++ ) { - dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; - } + for (best = NULL, algo = raid6_recov_algos; *algo; algo++) + if (!best || (*algo)->priority > best->priority) + if (!(*algo)->valid || (*algo)->valid()) + best = *algo; - /* Normal code - use a 2-page allocation to avoid D$ conflict */ - syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); + if (best) { + raid6_2data_recov = best->data2; + raid6_datap_recov = best->datap; - if ( !syndromes ) { - printk("raid6: Yikes! No memory available.\n"); - return -ENOMEM; - } + printk("raid6: using %s recovery algorithm\n", best->name); + } else + printk("raid6: Yikes! No recovery algorithm found!\n"); - dptrs[disks-2] = syndromes; - dptrs[disks-1] = syndromes + PAGE_SIZE; + return best; +} + +static inline const struct raid6_calls *raid6_choose_gen( + void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) +{ + unsigned long perf, bestperf, j0, j1; + const struct raid6_calls *const *algo; + const struct raid6_calls *best; - bestperf = 0; bestprefer = 0; best = NULL; + for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + if (!best || (*algo)->prefer >= best->prefer) { + if ((*algo)->valid && !(*algo)->valid()) + continue; - for ( algo = raid6_algos ; *algo ; algo++ ) { - if ( !(*algo)->valid || (*algo)->valid() ) { perf = 0; preempt_disable(); j0 = jiffies; - while ( (j1 = jiffies) == j0 ) + while ((j1 = jiffies) == j0) cpu_relax(); while (time_before(jiffies, j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { - (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); + (*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs); perf++; } preempt_enable(); - if ( (*algo)->prefer > bestprefer || - ((*algo)->prefer == bestprefer && - perf > bestperf) ) { - best = *algo; - bestprefer = best->prefer; + if (perf > bestperf) { bestperf = perf; + best = *algo; } printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); @@ -139,9 +149,46 @@ int __init raid6_select_algo(void) } else printk("raid6: Yikes! No algorithm found!\n"); + return best; +} + + +/* Try to pick the best algorithm */ +/* This code uses the gfmul table as convenient data set to abuse */ + +int __init raid6_select_algo(void) +{ + const int disks = (65536/PAGE_SIZE)+2; + + const struct raid6_calls *gen_best; + const struct raid6_recov_calls *rec_best; + char *syndromes; + void *dptrs[(65536/PAGE_SIZE)+2]; + int i; + + for (i = 0; i < disks-2; i++) + dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; + + /* Normal code - use a 2-page allocation to avoid D$ conflict */ + syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); + + if (!syndromes) { + printk("raid6: Yikes! No memory available.\n"); + return -ENOMEM; + } + + dptrs[disks-2] = syndromes; + dptrs[disks-1] = syndromes + PAGE_SIZE; + + /* select raid gen_syndrome function */ + gen_best = raid6_choose_gen(&dptrs, disks); + + /* select raid recover functions */ + rec_best = raid6_choose_recov(); + free_pages((unsigned long)syndromes, 1); - return best ? 0 : -EINVAL; + return gen_best && rec_best ? 0 : -EINVAL; } static void raid6_exit(void) diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c index 8a3780902cec..39787db588b0 100644 --- a/lib/raid6/mktables.c +++ b/lib/raid6/mktables.c @@ -81,6 +81,31 @@ int main(int argc, char *argv[]) printf("EXPORT_SYMBOL(raid6_gfmul);\n"); printf("#endif\n"); + /* Compute vector multiplication table */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_vgfmul[256][32] =\n" + "{\n"); + for (i = 0; i < 256; i++) { + printf("\t{\n"); + for (j = 0; j < 16; j += 8) { + printf("\t\t"); + for (k = 0; k < 8; k++) + printf("0x%02x,%c", gfmul(i, j + k), + (k == 7) ? '\n' : ' '); + } + for (j = 0; j < 16; j += 8) { + printf("\t\t"); + for (k = 0; k < 8; k++) + printf("0x%02x,%c", gfmul(i, (j + k) << 4), + (k == 7) ? '\n' : ' '); + } + printf("\t},\n"); + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_vgfmul);\n"); + printf("#endif\n"); + /* Compute power-of-2 table (exponent) */ v = 1; printf("\nconst u8 __attribute__((aligned(256)))\n" diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index fe275d7b6b36..1805a5cc5daa 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -22,7 +22,7 @@ #include <linux/raid/pq.h> /* Recover two failed data blocks. */ -void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, +void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb, void **ptrs) { u8 *p, *q, *dp, *dq; @@ -64,10 +64,9 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, p++; q++; } } -EXPORT_SYMBOL_GPL(raid6_2data_recov); /* Recover failure of one data block plus the P block */ -void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) +void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs) { u8 *p, *q, *dq; const u8 *qmul; /* Q multiplier table */ @@ -96,7 +95,15 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) q++; dq++; } } -EXPORT_SYMBOL_GPL(raid6_datap_recov); + + +const struct raid6_recov_calls raid6_recov_intx1 = { + .data2 = raid6_2data_recov_intx1, + .datap = raid6_datap_recov_intx1, + .valid = NULL, + .name = "intx1", + .priority = 0, +}; #ifndef __KERNEL__ /* Testing only */ diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c new file mode 100644 index 000000000000..37ae61930559 --- /dev/null +++ b/lib/raid6/recov_ssse3.c @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "x86.h" + +static int raid6_has_ssse3(void) +{ + return boot_cpu_has(X86_FEATURE_XMM) && + boot_cpu_has(X86_FEATURE_XMM2) && + boot_cpu_has(X86_FEATURE_SSSE3); +} + +void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb, + void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + static const u8 __aligned(16) x0f[16] = { + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data pages + Use the dead data pages as temporary storage for + delta p and delta q */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks-2] = p; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ + raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0])); + +#ifdef CONFIG_X86_64 + asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0])); + asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0])); + asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16])); +#endif + + /* Now do it... */ + while (bytes) { +#ifdef CONFIG_X86_64 + /* xmm6, xmm14, xmm15 */ + + asm volatile("movdqa %0,%%xmm1" : : "m" (q[0])); + asm volatile("movdqa %0,%%xmm9" : : "m" (q[16])); + asm volatile("movdqa %0,%%xmm0" : : "m" (p[0])); + asm volatile("movdqa %0,%%xmm8" : : "m" (p[16])); + asm volatile("pxor %0,%%xmm1" : : "m" (dq[0])); + asm volatile("pxor %0,%%xmm9" : : "m" (dq[16])); + asm volatile("pxor %0,%%xmm0" : : "m" (dp[0])); + asm volatile("pxor %0,%%xmm8" : : "m" (dp[16])); + + /* xmm0/8 = px */ + + asm volatile("movdqa %xmm6,%xmm4"); + asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); + asm volatile("movdqa %xmm6,%xmm12"); + asm volatile("movdqa %xmm5,%xmm13"); + asm volatile("movdqa %xmm1,%xmm3"); + asm volatile("movdqa %xmm9,%xmm11"); + asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */ + asm volatile("movdqa %xmm8,%xmm10"); + asm volatile("psraw $4,%xmm1"); + asm volatile("psraw $4,%xmm9"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm11"); + asm volatile("pand %xmm7,%xmm1"); + asm volatile("pand %xmm7,%xmm9"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm11,%xmm12"); + asm volatile("pshufb %xmm1,%xmm5"); + asm volatile("pshufb %xmm9,%xmm13"); + asm volatile("pxor %xmm4,%xmm5"); + asm volatile("pxor %xmm12,%xmm13"); + + /* xmm5/13 = qx */ + + asm volatile("movdqa %xmm14,%xmm4"); + asm volatile("movdqa %xmm15,%xmm1"); + asm volatile("movdqa %xmm14,%xmm12"); + asm volatile("movdqa %xmm15,%xmm9"); + asm volatile("movdqa %xmm2,%xmm3"); + asm volatile("movdqa %xmm10,%xmm11"); + asm volatile("psraw $4,%xmm2"); + asm volatile("psraw $4,%xmm10"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm11"); + asm volatile("pand %xmm7,%xmm2"); + asm volatile("pand %xmm7,%xmm10"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm11,%xmm12"); + asm volatile("pshufb %xmm2,%xmm1"); + asm volatile("pshufb %xmm10,%xmm9"); + asm volatile("pxor %xmm4,%xmm1"); + asm volatile("pxor %xmm12,%xmm9"); + + /* xmm1/9 = pbmul[px] */ + asm volatile("pxor %xmm5,%xmm1"); + asm volatile("pxor %xmm13,%xmm9"); + /* xmm1/9 = db = DQ */ + asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0])); + asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16])); + + asm volatile("pxor %xmm1,%xmm0"); + asm volatile("pxor %xmm9,%xmm8"); + asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0])); + asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16])); + + bytes -= 32; + p += 32; + q += 32; + dp += 32; + dq += 32; +#else + asm volatile("movdqa %0,%%xmm1" : : "m" (*q)); + asm volatile("movdqa %0,%%xmm0" : : "m" (*p)); + asm volatile("pxor %0,%%xmm1" : : "m" (*dq)); + asm volatile("pxor %0,%%xmm0" : : "m" (*dp)); + + /* 1 = dq ^ q + * 0 = dp ^ p + */ + asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0])); + asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); + + asm volatile("movdqa %xmm1,%xmm3"); + asm volatile("psraw $4,%xmm1"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm1"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm1,%xmm5"); + asm volatile("pxor %xmm4,%xmm5"); + + asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */ + + /* xmm5 = qx */ + + asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0])); + asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16])); + asm volatile("movdqa %xmm2,%xmm3"); + asm volatile("psraw $4,%xmm2"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm2"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm2,%xmm1"); + asm volatile("pxor %xmm4,%xmm1"); + + /* xmm1 = pbmul[px] */ + asm volatile("pxor %xmm5,%xmm1"); + /* xmm1 = db = DQ */ + asm volatile("movdqa %%xmm1,%0" : "=m" (*dq)); + + asm volatile("pxor %xmm1,%xmm0"); + asm volatile("movdqa %%xmm0,%0" : "=m" (*dp)); + + bytes -= 16; + p += 16; + q += 16; + dp += 16; + dq += 16; +#endif + } + + kernel_fpu_end(); +} + + +void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + static const u8 __aligned(16) x0f[16] = { + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data page + Use the dead data page as temporary storage for delta q */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0])); + + while (bytes) { +#ifdef CONFIG_X86_64 + asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); + asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16])); + asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); + asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); + + /* xmm3 = q[0] ^ dq[0] */ + + asm volatile("pxor %0, %%xmm4" : : "m" (q[16])); + asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); + + /* xmm4 = q[16] ^ dq[16] */ + + asm volatile("movdqa %xmm3, %xmm6"); + asm volatile("movdqa %xmm4, %xmm8"); + + /* xmm4 = xmm8 = q[16] ^ dq[16] */ + + asm volatile("psraw $4, %xmm3"); + asm volatile("pand %xmm7, %xmm6"); + asm volatile("pand %xmm7, %xmm3"); + asm volatile("pshufb %xmm6, %xmm0"); + asm volatile("pshufb %xmm3, %xmm1"); + asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0])); + asm volatile("pxor %xmm0, %xmm1"); + asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16])); + + /* xmm1 = qmul[q[0] ^ dq[0]] */ + + asm volatile("psraw $4, %xmm4"); + asm volatile("pand %xmm7, %xmm8"); + asm volatile("pand %xmm7, %xmm4"); + asm volatile("pshufb %xmm8, %xmm10"); + asm volatile("pshufb %xmm4, %xmm11"); + asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); + asm volatile("pxor %xmm10, %xmm11"); + asm volatile("movdqa %0, %%xmm12" : : "m" (p[16])); + + /* xmm11 = qmul[q[16] ^ dq[16]] */ + + asm volatile("pxor %xmm1, %xmm2"); + + /* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */ + + asm volatile("pxor %xmm11, %xmm12"); + + /* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */ + + asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); + asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16])); + + asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); + asm volatile("movdqa %%xmm12, %0" : "=m" (p[16])); + + bytes -= 32; + p += 32; + q += 32; + dq += 32; + +#else + asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); + asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); + asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); + asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); + + /* xmm3 = *q ^ *dq */ + + asm volatile("movdqa %xmm3, %xmm6"); + asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); + asm volatile("psraw $4, %xmm3"); + asm volatile("pand %xmm7, %xmm6"); + asm volatile("pand %xmm7, %xmm3"); + asm volatile("pshufb %xmm6, %xmm0"); + asm volatile("pshufb %xmm3, %xmm1"); + asm volatile("pxor %xmm0, %xmm1"); + + /* xmm1 = qmul[*q ^ *dq */ + + asm volatile("pxor %xmm1, %xmm2"); + + /* xmm2 = *p ^ qmul[*q ^ *dq] */ + + asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); + asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); + + bytes -= 16; + p += 16; + q += 16; + dq += 16; +#endif + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_ssse3 = { + .data2 = raid6_2data_recov_ssse3, + .datap = raid6_datap_recov_ssse3, + .valid = raid6_has_ssse3, +#ifdef CONFIG_X86_64 + .name = "ssse3x2", +#else + .name = "ssse3x1", +#endif + .priority = 1, +}; + +#endif diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index aa651697b6dc..c76151d94764 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -23,7 +23,7 @@ RANLIB = ranlib all: raid6.a raid6test raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ - altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \ + altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \ tables.o rm -f $@ $(AR) cq $@ $^ diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 7a930318b17d..5a485b7a7d3c 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -90,25 +90,35 @@ static int test_disks(int i, int j) int main(int argc, char *argv[]) { const struct raid6_calls *const *algo; + const struct raid6_recov_calls *const *ra; int i, j; int err = 0; makedata(); - for (algo = raid6_algos; *algo; algo++) { - if (!(*algo)->valid || (*algo)->valid()) { - raid6_call = **algo; + for (ra = raid6_recov_algos; *ra; ra++) { + if ((*ra)->valid && !(*ra)->valid()) + continue; + raid6_2data_recov = (*ra)->data2; + raid6_datap_recov = (*ra)->datap; - /* Nuke syndromes */ - memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + printf("using recovery %s\n", (*ra)->name); - /* Generate assumed good syndrome */ - raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, - (void **)&dataptrs); + for (algo = raid6_algos; *algo; algo++) { + if (!(*algo)->valid || (*algo)->valid()) { + raid6_call = **algo; - for (i = 0; i < NDISKS-1; i++) - for (j = i+1; j < NDISKS; j++) - err += test_disks(i, j); + /* Nuke syndromes */ + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + + /* Generate assumed good syndrome */ + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, + (void **)&dataptrs); + + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + } } printf("\n"); } diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index cb2a8c91c886..d55d63232c55 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h @@ -35,24 +35,29 @@ static inline void kernel_fpu_end(void) { } +#define __aligned(x) __attribute__((aligned(x))) + #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ #define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions * (fast save and restore) */ #define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ #define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ /* Should work well enough on modern CPUs for testing */ static inline int boot_cpu_has(int flag) { - u32 eax = (flag >> 5) ? 0x80000001 : 1; - u32 edx; + u32 eax = (flag & 0x20) ? 0x80000001 : 1; + u32 ecx, edx; asm volatile("cpuid" - : "+a" (eax), "=d" (edx) - : : "ecx", "ebx"); + : "+a" (eax), "=d" (edx), "=c" (ecx) + : : "ebx"); - return (edx >> (flag & 31)) & 1; + return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1; } #endif /* ndef __KERNEL__ */ diff --git a/lib/rational.c b/lib/rational.c index d326da3976f5..f0aa21c2a762 100644 --- a/lib/rational.c +++ b/lib/rational.c @@ -1,7 +1,7 @@ /* * rational fractions * - * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com> + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <oskar@scara.com> * * helper functions when coping with rational numbers */ |