From 7d43c2e42cb1e436f97c1763150e4e1122ae0d57 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 May 2012 14:19:55 -0600 Subject: iommu: Remove group_mf The iommu=group_mf is really no longer needed with the addition of ACS support in IOMMU drivers creating groups. Most multifunction devices will now be grouped already. If a device has gone to the trouble of exposing ACS, trust that it works. We can use the device specific ACS function for fixing devices we trust individually. This largely reverts bcb71abe. Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel --- Documentation/kernel-parameters.txt | 1 - 1 file changed, 1 deletion(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a92c5ebf373e..d2f4f7acc435 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1134,7 +1134,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. forcesac soft pt [x86, IA-64] - group_mf [x86, IA-64] io7= [HW] IO7 for Marvel based alpha systems -- cgit v1.2.3 From f885b7f2b2de70be266d2cecc476f773a1e2ca5d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 Apr 2012 15:52:53 -0700 Subject: rcu: Control RCU_FANOUT_LEAF from boot-time parameter Although making RCU_FANOUT_LEAF a kernel configuration parameter rather than a fixed constant makes it easier for people to decrease cache-miss overhead for large systems, it is of little help for people who must run a single pre-built kernel binary. This commit therefore allows the value of RCU_FANOUT_LEAF to be increased (but not decreased!) via a boot-time parameter named rcutree.rcu_fanout_leaf. Reported-by: Mike Galbraith Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 5 ++ kernel/rcutree.c | 97 +++++++++++++++++++++++++++++++------ kernel/rcutree.h | 23 +++++---- kernel/rcutree_plugin.h | 2 + kernel/rcutree_trace.c | 2 +- 5 files changed, 104 insertions(+), 25 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a92c5ebf373e..12783fa833c3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2367,6 +2367,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Set maximum number of finished RCU callbacks to process in one batch. + rcutree.fanout_leaf= [KNL,BOOT] + Increase the number of CPUs assigned to each + leaf rcu_node structure. Useful for very large + systems. + rcutree.qhimark= [KNL,BOOT] Set threshold of queued RCU callbacks over which batch limiting is disabled. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396e..a4c592b66e10 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -60,17 +60,10 @@ /* Data structures. */ -static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; +static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; #define RCU_STATE_INITIALIZER(structname) { \ .level = { &structname##_state.node[0] }, \ - .levelcnt = { \ - NUM_RCU_LVL_0, /* root of hierarchy. */ \ - NUM_RCU_LVL_1, \ - NUM_RCU_LVL_2, \ - NUM_RCU_LVL_3, \ - NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ - }, \ .fqs_state = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ @@ -91,6 +84,19 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; +/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ +static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; +module_param(rcu_fanout_leaf, int, 0); +int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; +static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ + NUM_RCU_LVL_0, + NUM_RCU_LVL_1, + NUM_RCU_LVL_2, + NUM_RCU_LVL_3, + NUM_RCU_LVL_4, +}; +int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ + /* * The rcu_scheduler_active variable transitions from zero to one just * before the first task is spawned. So when this variable is zero, RCU @@ -2574,9 +2580,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) { int i; - for (i = NUM_RCU_LVLS - 1; i > 0; i--) + for (i = rcu_num_lvls - 1; i > 0; i--) rsp->levelspread[i] = CONFIG_RCU_FANOUT; - rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; + rsp->levelspread[0] = rcu_fanout_leaf; } #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ static void __init rcu_init_levelspread(struct rcu_state *rsp) @@ -2586,7 +2592,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) int i; cprv = NR_CPUS; - for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + for (i = rcu_num_lvls - 1; i >= 0; i--) { ccur = rsp->levelcnt[i]; rsp->levelspread[i] = (cprv + ccur - 1) / ccur; cprv = ccur; @@ -2613,13 +2619,15 @@ static void __init rcu_init_one(struct rcu_state *rsp, /* Initialize the level-tracking arrays. */ - for (i = 1; i < NUM_RCU_LVLS; i++) + for (i = 0; i < rcu_num_lvls; i++) + rsp->levelcnt[i] = num_rcu_lvl[i]; + for (i = 1; i < rcu_num_lvls; i++) rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; rcu_init_levelspread(rsp); /* Initialize the elements themselves, starting from the leaves. */ - for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + for (i = rcu_num_lvls - 1; i >= 0; i--) { cpustride *= rsp->levelspread[i]; rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { @@ -2649,7 +2657,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, } rsp->rda = rda; - rnp = rsp->level[NUM_RCU_LVLS - 1]; + rnp = rsp->level[rcu_num_lvls - 1]; for_each_possible_cpu(i) { while (i > rnp->grphi) rnp++; @@ -2658,11 +2666,72 @@ static void __init rcu_init_one(struct rcu_state *rsp, } } +/* + * Compute the rcu_node tree geometry from kernel parameters. This cannot + * replace the definitions in rcutree.h because those are needed to size + * the ->node array in the rcu_state structure. + */ +static void __init rcu_init_geometry(void) +{ + int i; + int j; + int n = NR_CPUS; + int rcu_capacity[MAX_RCU_LVLS + 1]; + + /* If the compile-time values are accurate, just leave. */ + if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) + return; + + /* + * Compute number of nodes that can be handled an rcu_node tree + * with the given number of levels. Setting rcu_capacity[0] makes + * some of the arithmetic easier. + */ + rcu_capacity[0] = 1; + rcu_capacity[1] = rcu_fanout_leaf; + for (i = 2; i <= MAX_RCU_LVLS; i++) + rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; + + /* + * The boot-time rcu_fanout_leaf parameter is only permitted + * to increase the leaf-level fanout, not decrease it. Of course, + * the leaf-level fanout cannot exceed the number of bits in + * the rcu_node masks. Finally, the tree must be able to accommodate + * the configured number of CPUs. Complain and fall back to the + * compile-time values if these limits are exceeded. + */ + if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || + rcu_fanout_leaf > sizeof(unsigned long) * 8 || + n > rcu_capacity[MAX_RCU_LVLS]) { + WARN_ON(1); + return; + } + + /* Calculate the number of rcu_nodes at each level of the tree. */ + for (i = 1; i <= MAX_RCU_LVLS; i++) + if (n <= rcu_capacity[i]) { + for (j = 0; j <= i; j++) + num_rcu_lvl[j] = + DIV_ROUND_UP(n, rcu_capacity[i - j]); + rcu_num_lvls = i; + for (j = i + 1; j <= MAX_RCU_LVLS; j++) + num_rcu_lvl[j] = 0; + break; + } + + /* Calculate the total number of rcu_node structures. */ + rcu_num_nodes = 0; + for (i = 0; i <= MAX_RCU_LVLS; i++) + rcu_num_nodes += num_rcu_lvl[i]; + rcu_num_nodes -= n; +} + void __init rcu_init(void) { int cpu; rcu_bootup_announce(); + rcu_init_geometry(); rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 19b61ac1079f..780a0195d35a 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -42,28 +42,28 @@ #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) #if NR_CPUS <= RCU_FANOUT_1 -# define NUM_RCU_LVLS 1 +# define RCU_NUM_LVLS 1 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 (NR_CPUS) # define NUM_RCU_LVL_2 0 # define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_2 -# define NUM_RCU_LVLS 2 +# define RCU_NUM_LVLS 2 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) # define NUM_RCU_LVL_2 (NR_CPUS) # define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_3 -# define NUM_RCU_LVLS 3 +# define RCU_NUM_LVLS 3 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) # define NUM_RCU_LVL_3 (NR_CPUS) # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_4 -# define NUM_RCU_LVLS 4 +# define RCU_NUM_LVLS 4 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) @@ -76,6 +76,9 @@ #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) +extern int rcu_num_lvls; +extern int rcu_num_nodes; + /* * Dynticks per-CPU state. */ @@ -206,7 +209,7 @@ struct rcu_node { */ #define rcu_for_each_node_breadth_first(rsp, rnp) \ for ((rnp) = &(rsp)->node[0]; \ - (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) /* * Do a breadth-first scan of the non-leaf rcu_node structures for the @@ -215,7 +218,7 @@ struct rcu_node { */ #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ for ((rnp) = &(rsp)->node[0]; \ - (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) + (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) /* * Scan the leaves of the rcu_node hierarchy for the specified rcu_state @@ -224,8 +227,8 @@ struct rcu_node { * It is still a leaf node, even if it is also the root node. */ #define rcu_for_each_leaf_node(rsp, rnp) \ - for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ - (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) + for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) /* Index values for nxttail array in struct rcu_data. */ #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ @@ -357,9 +360,9 @@ do { \ */ struct rcu_state { struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ - struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ + struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ - u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ + u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ /* The following fields are guarded by the root rcu_node's lock. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3e4899459f3d..fb92b6dd9980 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -70,6 +70,8 @@ static void __init rcu_bootup_announce_oddness(void) #if NUM_RCU_LVL_4 != 0 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); #endif + if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) + printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); } #ifdef CONFIG_TREE_PREEMPT_RCU diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index d4bc16ddd1d4..a3556a2d842c 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -278,7 +278,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) rsp->n_force_qs, rsp->n_force_qs_ngp, rsp->n_force_qs - rsp->n_force_qs_ngp, rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); - for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { + for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { if (rnp->level != level) { seq_puts(m, "\n"); level = rnp->level; -- cgit v1.2.3 From eaa05dfcdb12cf3a7bedf8918dc8699c00944384 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 7 Jul 2012 23:05:28 -0400 Subject: [SCSI] usb-storage: add support for write cache quirk Add support for write cache quirk on usb hdd. scsi driver will be set to wce by detecting write cache quirk in quirk list when plugging usb hdd. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Amit Sahrawat Acked-by: Alan Stern Signed-off-by: James Bottomley --- Documentation/kernel-parameters.txt | 2 ++ drivers/usb/storage/scsiglue.c | 5 +++++ drivers/usb/storage/usb.c | 5 ++++- include/linux/usb_usual.h | 4 +++- 4 files changed, 14 insertions(+), 2 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a92c5ebf373e..c68634edd451 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2932,6 +2932,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. initial READ(10) command); o = CAPACITY_OK (accept the capacity reported by the device); + p = WRITE_CACHE (the device cache is ON + by default); r = IGNORE_RESIDUE (the device reports bogus residue values); s = SINGLE_LUN (the device has only one diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 11418da9bc09..a3d54366afcc 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -236,6 +236,11 @@ static int slave_configure(struct scsi_device *sdev) US_FL_SCM_MULT_TARG)) && us->protocol == USB_PR_BULK) us->use_last_sector_hacks = 1; + + /* Check if write cache default on flag is set or not */ + if (us->fflags & US_FL_WRITE_CACHE) + sdev->wce_default_on = 1; + } else { /* Non-disk-type devices don't need to blacklist any pages diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index e23c30ab66da..d012fe4329e7 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -473,7 +473,7 @@ static void adjust_quirks(struct us_data *us) US_FL_CAPACITY_OK | US_FL_IGNORE_RESIDUE | US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT | US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 | - US_FL_INITIAL_READ10); + US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE); p = quirks; while (*p) { @@ -529,6 +529,9 @@ static void adjust_quirks(struct us_data *us) case 'o': f |= US_FL_CAPACITY_OK; break; + case 'p': + f |= US_FL_WRITE_CACHE; + break; case 'r': f |= US_FL_IGNORE_RESIDUE; break; diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 17df3600bcef..e84e769aaddc 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -64,7 +64,9 @@ US_FLAG(NO_READ_CAPACITY_16, 0x00080000) \ /* cannot handle READ_CAPACITY_16 */ \ US_FLAG(INITIAL_READ10, 0x00100000) \ - /* Initial READ(10) (and others) must be retried */ + /* Initial READ(10) (and others) must be retried */ \ + US_FLAG(WRITE_CACHE, 0x00200000) \ + /* Write Cache status is not available */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From e9da6e9905e639b0f842a244bc770b48ad0523e9 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 30 Jul 2012 09:11:33 +0200 Subject: ARM: dma-mapping: remove custom consistent dma region This patch changes dma-mapping subsystem to use generic vmalloc areas for all consistent dma allocations. This increases the total size limit of the consistent allocations and removes platform hacks and a lot of duplicated code. Atomic allocations are served from special pool preallocated on boot, because vmalloc areas cannot be reliably created in atomic context. Signed-off-by: Marek Szyprowski Reviewed-by: Kyungmin Park Reviewed-by: Minchan Kim --- Documentation/kernel-parameters.txt | 2 +- arch/arm/include/asm/dma-mapping.h | 2 +- arch/arm/mm/dma-mapping.c | 486 ++++++++++++------------------------ arch/arm/mm/mm.h | 3 + include/linux/vmalloc.h | 1 + mm/vmalloc.c | 10 +- 6 files changed, 181 insertions(+), 323 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a92c5ebf373e..4ee28f32b909 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -526,7 +526,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. coherent_pool=nn[KMG] [ARM,KNL] Sets the size of memory pool for coherent, atomic dma - allocations if Contiguous Memory Allocator (CMA) is used. + allocations, by default set to 256K. code_bytes [X86] How many bytes of object code to print in an oops report. diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index bbef15d04890..80777d871422 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -226,7 +226,7 @@ static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struc * DMA region above it's default value of 2MB. It must be called before the * memory allocator is initialised, i.e. before any core_initcall. */ -extern void __init init_consistent_dma_size(unsigned long size); +static inline void init_consistent_dma_size(unsigned long size) { } /* * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 655878bcc96d..f906d5f4cbd8 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -217,115 +218,70 @@ static void __dma_free_buffer(struct page *page, size_t size) } #ifdef CONFIG_MMU +#ifdef CONFIG_HUGETLB_PAGE +#error ARM Coherent DMA allocator does not (yet) support huge TLB +#endif -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) -#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT) - -/* - * These are the page tables (2MB each) covering uncached, DMA consistent allocations - */ -static pte_t **consistent_pte; - -#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page); -static unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller); -void __init init_consistent_dma_size(unsigned long size) +static void * +__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) { - unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M); + struct vm_struct *area; + unsigned long addr; - BUG_ON(consistent_pte); /* Check we're called before DMA region init */ - BUG_ON(base < VMALLOC_END); + /* + * DMA allocation can be mapped to user space, so lets + * set VM_USERMAP flags too. + */ + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + area->phys_addr = __pfn_to_phys(page_to_pfn(page)); - /* Grow region to accommodate specified size */ - if (base < consistent_base) - consistent_base = base; + if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { + vunmap((void *)addr); + return NULL; + } + return (void *)addr; } -#include "vmregion.h" - -static struct arm_vmregion_head consistent_head = { - .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_end = CONSISTENT_END, -}; - -#ifdef CONFIG_HUGETLB_PAGE -#error ARM Coherent DMA allocator does not (yet) support huge TLB -#endif - -/* - * Initialise the consistent memory allocation. - */ -static int __init consistent_init(void) +static void __dma_free_remap(void *cpu_addr, size_t size) { - int ret = 0; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int i = 0; - unsigned long base = consistent_base; - unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; - - if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) - return 0; - - consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); - if (!consistent_pte) { - pr_err("%s: no memory\n", __func__); - return -ENOMEM; + unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP; + struct vm_struct *area = find_vm_area(cpu_addr); + if (!area || (area->flags & flags) != flags) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; } - - pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END); - consistent_head.vm_start = base; - - do { - pgd = pgd_offset(&init_mm, base); - - pud = pud_alloc(&init_mm, pgd, base); - if (!pud) { - pr_err("%s: no pud tables\n", __func__); - ret = -ENOMEM; - break; - } - - pmd = pmd_alloc(&init_mm, pud, base); - if (!pmd) { - pr_err("%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - WARN_ON(!pmd_none(*pmd)); - - pte = pte_alloc_kernel(pmd, base); - if (!pte) { - pr_err("%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte[i++] = pte; - base += PMD_SIZE; - } while (base < CONSISTENT_END); - - return ret; + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); } -core_initcall(consistent_init); - -static void *__alloc_from_contiguous(struct device *dev, size_t size, - pgprot_t prot, struct page **ret_page); -static struct arm_vmregion_head coherent_head = { - .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), - .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), +struct dma_pool { + size_t size; + spinlock_t lock; + unsigned long *bitmap; + unsigned long nr_pages; + void *vaddr; + struct page *page; }; -static size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; +static struct dma_pool atomic_pool = { + .size = SZ_256K, +}; static int __init early_coherent_pool(char *p) { - coherent_pool_size = memparse(p, &p); + atomic_pool.size = memparse(p, &p); return 0; } early_param("coherent_pool", early_coherent_pool); @@ -333,32 +289,45 @@ early_param("coherent_pool", early_coherent_pool); /* * Initialise the coherent pool for atomic allocations. */ -static int __init coherent_init(void) +static int __init atomic_pool_init(void) { + struct dma_pool *pool = &atomic_pool; pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); - size_t size = coherent_pool_size; + unsigned long nr_pages = pool->size >> PAGE_SHIFT; + unsigned long *bitmap; struct page *page; void *ptr; + int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long); - if (!IS_ENABLED(CONFIG_CMA)) - return 0; + bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!bitmap) + goto no_bitmap; - ptr = __alloc_from_contiguous(NULL, size, prot, &page); + if (IS_ENABLED(CONFIG_CMA)) + ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page); + else + ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot, + &page, NULL); if (ptr) { - coherent_head.vm_start = (unsigned long) ptr; - coherent_head.vm_end = (unsigned long) ptr + size; - printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", - (unsigned)size / 1024); + spin_lock_init(&pool->lock); + pool->vaddr = ptr; + pool->page = page; + pool->bitmap = bitmap; + pool->nr_pages = nr_pages; + pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", + (unsigned)pool->size / 1024); return 0; } - printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", - (unsigned)size / 1024); + kfree(bitmap); +no_bitmap: + pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", + (unsigned)pool->size / 1024); return -ENOMEM; } /* * CMA is activated by core_initcall, so we must be called after it. */ -postcore_initcall(coherent_init); +postcore_initcall(atomic_pool_init); struct dma_contig_early_reserve { phys_addr_t base; @@ -406,112 +375,6 @@ void __init dma_contiguous_remap(void) } } -static void * -__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, - const void *caller) -{ - struct arm_vmregion *c; - size_t align; - int bit; - - if (!consistent_pte) { - pr_err("%s: not initialised\n", __func__); - dump_stack(); - return NULL; - } - - /* - * Align the virtual region allocation - maximum alignment is - * a section size, minimum is a page size. This helps reduce - * fragmentation of the DMA space, and also prevents allocations - * smaller than a section from crossing a section boundary. - */ - bit = fls(size - 1); - if (bit > SECTION_SHIFT) - bit = SECTION_SHIFT; - align = 1 << bit; - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = arm_vmregion_alloc(&consistent_head, align, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller); - if (c) { - pte_t *pte; - int idx = CONSISTENT_PTE_INDEX(c->vm_start); - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - - pte = consistent_pte[idx] + off; - c->priv = page; - - do { - BUG_ON(!pte_none(*pte)); - - set_pte_ext(pte, mk_pte(page, prot), 0); - page++; - pte++; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - pte = consistent_pte[++idx]; - } - } while (size -= PAGE_SIZE); - - dsb(); - - return (void *)c->vm_start; - } - return NULL; -} - -static void __dma_free_remap(void *cpu_addr, size_t size) -{ - struct arm_vmregion *c; - unsigned long addr; - pte_t *ptep; - int idx; - u32 off; - - c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); - if (!c) { - pr_err("%s: trying to free invalid coherent area: %p\n", - __func__, cpu_addr); - dump_stack(); - return; - } - - if ((c->vm_end - c->vm_start) != size) { - pr_err("%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - idx = CONSISTENT_PTE_INDEX(c->vm_start); - off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - ptep = consistent_pte[idx] + off; - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - - ptep++; - addr += PAGE_SIZE; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - ptep = consistent_pte[++idx]; - } - - if (pte_none(pte) || !pte_present(pte)) - pr_crit("%s: bad page in kernel page table\n", - __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - arm_vmregion_free(&consistent_head, c); -} - static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, void *data) { @@ -552,16 +415,17 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, return ptr; } -static void *__alloc_from_pool(struct device *dev, size_t size, - struct page **ret_page, const void *caller) +static void *__alloc_from_pool(size_t size, struct page **ret_page) { - struct arm_vmregion *c; + struct dma_pool *pool = &atomic_pool; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned int pageno; + unsigned long flags; + void *ptr = NULL; size_t align; - if (!coherent_head.vm_start) { - printk(KERN_ERR "%s: coherent pool not initialised!\n", - __func__); - dump_stack(); + if (!pool->vaddr) { + WARN(1, "coherent pool not initialised!\n"); return NULL; } @@ -571,35 +435,41 @@ static void *__alloc_from_pool(struct device *dev, size_t size, * size. This helps reduce fragmentation of the DMA space. */ align = PAGE_SIZE << get_order(size); - c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); - if (c) { - void *ptr = (void *)c->vm_start; - struct page *page = virt_to_page(ptr); - *ret_page = page; - return ptr; + + spin_lock_irqsave(&pool->lock, flags); + pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages, + 0, count, (1 << align) - 1); + if (pageno < pool->nr_pages) { + bitmap_set(pool->bitmap, pageno, count); + ptr = pool->vaddr + PAGE_SIZE * pageno; + *ret_page = pool->page + pageno; } - return NULL; + spin_unlock_irqrestore(&pool->lock, flags); + + return ptr; } -static int __free_from_pool(void *cpu_addr, size_t size) +static int __free_from_pool(void *start, size_t size) { - unsigned long start = (unsigned long)cpu_addr; - unsigned long end = start + size; - struct arm_vmregion *c; + struct dma_pool *pool = &atomic_pool; + unsigned long pageno, count; + unsigned long flags; - if (start < coherent_head.vm_start || end > coherent_head.vm_end) + if (start < pool->vaddr || start > pool->vaddr + pool->size) return 0; - c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); - - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; + if (start + size > pool->vaddr + pool->size) { + WARN(1, "freeing wrong coherent size from pool\n"); + return 0; } - arm_vmregion_free(&coherent_head, c); + pageno = (start - pool->vaddr) >> PAGE_SHIFT; + count = size >> PAGE_SHIFT; + + spin_lock_irqsave(&pool->lock, flags); + bitmap_clear(pool->bitmap, pageno, count); + spin_unlock_irqrestore(&pool->lock, flags); + return 1; } @@ -644,7 +514,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) #define __get_dma_pgprot(attrs, prot) __pgprot(0) #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL -#define __alloc_from_pool(dev, size, ret_page, c) NULL +#define __alloc_from_pool(size, ret_page) NULL #define __alloc_from_contiguous(dev, size, prot, ret) NULL #define __free_from_pool(cpu_addr, size) 0 #define __free_from_contiguous(dev, page, size) do { } while (0) @@ -702,10 +572,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (arch_is_coherent() || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); + else if (gfp & GFP_ATOMIC) + addr = __alloc_from_pool(size, &page); else if (!IS_ENABLED(CONFIG_CMA)) addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); - else if (gfp & GFP_ATOMIC) - addr = __alloc_from_pool(dev, size, &page, caller); else addr = __alloc_from_contiguous(dev, size, prot, &page); @@ -998,9 +868,6 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask) static int __init dma_debug_do_init(void) { -#ifdef CONFIG_MMU - arm_vmregion_create_proc("dma-mappings", &consistent_head); -#endif dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); return 0; } @@ -1117,61 +984,32 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s * Create a CPU mapping for a specified pages */ static void * -__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) { - struct arm_vmregion *c; - size_t align; - size_t count = size >> PAGE_SHIFT; - int bit; + unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct vm_struct *area; + unsigned long p; - if (!consistent_pte[0]) { - pr_err("%s: not initialised\n", __func__); - dump_stack(); + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) return NULL; - } - /* - * Align the virtual region allocation - maximum alignment is - * a section size, minimum is a page size. This helps reduce - * fragmentation of the DMA space, and also prevents allocations - * smaller than a section from crossing a section boundary. - */ - bit = fls(size - 1); - if (bit > SECTION_SHIFT) - bit = SECTION_SHIFT; - align = 1 << bit; - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = arm_vmregion_alloc(&consistent_head, align, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL); - if (c) { - pte_t *pte; - int idx = CONSISTENT_PTE_INDEX(c->vm_start); - int i = 0; - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - - pte = consistent_pte[idx] + off; - c->priv = pages; - - do { - BUG_ON(!pte_none(*pte)); - - set_pte_ext(pte, mk_pte(pages[i], prot), 0); - pte++; - off++; - i++; - if (off >= PTRS_PER_PTE) { - off = 0; - pte = consistent_pte[++idx]; - } - } while (i < count); - - dsb(); + area->pages = pages; + area->nr_pages = nr_pages; + p = (unsigned long)area->addr; - return (void *)c->vm_start; + for (i = 0; i < nr_pages; i++) { + phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i])); + if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot)) + goto err; + p += PAGE_SIZE; } + return area->addr; +err: + unmap_kernel_range((unsigned long)area->addr, size); + vunmap(area->addr); return NULL; } @@ -1230,6 +1068,16 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si return 0; } +static struct page **__iommu_get_pages(void *cpu_addr) +{ + struct vm_struct *area; + + area = find_vm_area(cpu_addr); + if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) + return area->pages; + return NULL; +} + static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { @@ -1248,7 +1096,8 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, if (*handle == DMA_ERROR_CODE) goto err_buffer; - addr = __iommu_alloc_remap(pages, size, gfp, prot); + addr = __iommu_alloc_remap(pages, size, gfp, prot, + __builtin_return_address(0)); if (!addr) goto err_mapping; @@ -1265,31 +1114,25 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { - struct arm_vmregion *c; + unsigned long uaddr = vma->vm_start; + unsigned long usize = vma->vm_end - vma->vm_start; + struct page **pages = __iommu_get_pages(cpu_addr); vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - if (c) { - struct page **pages = c->priv; - - unsigned long uaddr = vma->vm_start; - unsigned long usize = vma->vm_end - vma->vm_start; - int i = 0; - - do { - int ret; + if (!pages) + return -ENXIO; - ret = vm_insert_page(vma, uaddr, pages[i++]); - if (ret) { - pr_err("Remapping memory, error: %d\n", ret); - return ret; - } + do { + int ret = vm_insert_page(vma, uaddr, *pages++); + if (ret) { + pr_err("Remapping memory failed: %d\n", ret); + return ret; + } + uaddr += PAGE_SIZE; + usize -= PAGE_SIZE; + } while (usize > 0); - uaddr += PAGE_SIZE; - usize -= PAGE_SIZE; - } while (usize > 0); - } return 0; } @@ -1300,16 +1143,19 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { - struct arm_vmregion *c; + struct page **pages = __iommu_get_pages(cpu_addr); size = PAGE_ALIGN(size); - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - if (c) { - struct page **pages = c->priv; - __dma_free_remap(cpu_addr, size); - __iommu_remove_mapping(dev, handle, size); - __iommu_free_buffer(dev, pages, size); + if (!pages) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; } + + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); + + __iommu_remove_mapping(dev, handle, size); + __iommu_free_buffer(dev, pages, size); } /* diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 2e8a1efdf7b8..6776160618ef 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -59,6 +59,9 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page #define VM_ARM_MTYPE(mt) ((mt) << 20) #define VM_ARM_MTYPE_MASK (0x1f << 20) +/* consistent regions used by dma_alloc_attrs() */ +#define VM_ARM_DMA_CONSISTENT 0x20000000 + #endif #ifdef CONFIG_ZONE_DMA diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 2e28f4d40d7f..6071e911c7f4 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -93,6 +93,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long start, unsigned long end, const void *caller); extern struct vm_struct *remove_vm_area(const void *addr); +extern struct vm_struct *find_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 11308f034f85..65fc4dc71108 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1403,7 +1403,15 @@ struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, -1, GFP_KERNEL, caller); } -static struct vm_struct *find_vm_area(const void *addr) +/** + * find_vm_area - find a continuous kernel virtual area + * @addr: base address + * + * Search for the kernel VM area starting at @addr, and return it. + * It is up to the caller to do all required locking to keep the returned + * pointer valid. + */ +struct vm_struct *find_vm_area(const void *addr) { struct vmap_area *va; -- cgit v1.2.3