diff options
Diffstat (limited to 'arch/ia64/mm')
-rw-r--r-- | arch/ia64/mm/contig.c | 99 | ||||
-rw-r--r-- | arch/ia64/mm/discontig.c | 129 | ||||
-rw-r--r-- | arch/ia64/mm/init.c | 6 | ||||
-rw-r--r-- | arch/ia64/mm/ioremap.c | 11 | ||||
-rw-r--r-- | arch/ia64/mm/tlb.c | 56 |
5 files changed, 245 insertions, 56 deletions
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 2f724d2bf299..54bf54059811 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -154,38 +154,99 @@ static void *cpu_data; void * __cpuinit per_cpu_init (void) { - int cpu; - static int first_time=1; + static bool first_time = true; + void *cpu0_data = __cpu0_per_cpu; + unsigned int cpu; + + if (!first_time) + goto skip; + first_time = false; /* - * get_free_pages() cannot be used before cpu_init() done. BSP - * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls - * get_zeroed_page(). + * get_free_pages() cannot be used before cpu_init() done. + * BSP allocates PERCPU_PAGE_SIZE bytes for all possible CPUs + * to avoid that AP calls get_zeroed_page(). */ - if (first_time) { - void *cpu0_data = __cpu0_per_cpu; + for_each_possible_cpu(cpu) { + void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start; - first_time=0; + memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start; + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; - __per_cpu_offset[0] = (char *) cpu0_data - __per_cpu_start; - per_cpu(local_per_cpu_offset, 0) = __per_cpu_offset[0]; + /* + * percpu area for cpu0 is moved from the __init area + * which is setup by head.S and used till this point. + * Update ar.k3. This move is ensures that percpu + * area for cpu0 is on the correct node and its + * virtual address isn't insanely far from other + * percpu areas which is important for congruent + * percpu allocator. + */ + if (cpu == 0) + ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) - + (unsigned long)__per_cpu_start); - for (cpu = 1; cpu < NR_CPUS; cpu++) { - memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start; - cpu_data += PERCPU_PAGE_SIZE; - per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; - } + cpu_data += PERCPU_PAGE_SIZE; } +skip: return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; } static inline void alloc_per_cpu_data(void) { - cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS-1, + cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * num_possible_cpus(), PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); } + +/** + * setup_per_cpu_areas - setup percpu areas + * + * Arch code has already allocated and initialized percpu areas. All + * this function has to do is to teach the determined layout to the + * dynamic percpu allocator, which happens to be more complex than + * creating whole new ones using helpers. + */ +void __init +setup_per_cpu_areas(void) +{ + struct pcpu_alloc_info *ai; + struct pcpu_group_info *gi; + unsigned int cpu; + ssize_t static_size, reserved_size, dyn_size; + int rc; + + ai = pcpu_alloc_alloc_info(1, num_possible_cpus()); + if (!ai) + panic("failed to allocate pcpu_alloc_info"); + gi = &ai->groups[0]; + + /* units are assigned consecutively to possible cpus */ + for_each_possible_cpu(cpu) + gi->cpu_map[gi->nr_units++] = cpu; + + /* set parameters */ + static_size = __per_cpu_end - __per_cpu_start; + reserved_size = PERCPU_MODULE_RESERVE; + dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; + if (dyn_size < 0) + panic("percpu area overflow static=%zd reserved=%zd\n", + static_size, reserved_size); + + ai->static_size = static_size; + ai->reserved_size = reserved_size; + ai->dyn_size = dyn_size; + ai->unit_size = PERCPU_PAGE_SIZE; + ai->atom_size = PAGE_SIZE; + ai->alloc_size = PERCPU_PAGE_SIZE; + + rc = pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]); + if (rc) + panic("failed to setup percpu area (err=%d)", rc); + + pcpu_free_alloc_info(ai); +} #else #define alloc_per_cpu_data() do { } while (0) #endif /* CONFIG_SMP */ @@ -270,8 +331,8 @@ paging_init (void) map_size = PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * sizeof(struct page)); - vmalloc_end -= map_size; - vmem_map = (struct page *) vmalloc_end; + VMALLOC_END -= map_size; + vmem_map = (struct page *) VMALLOC_END; efi_memmap_walk(create_mem_map_page_table, NULL); /* diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index d85ba98d9008..19c4b2195dce 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -143,22 +143,120 @@ static void *per_cpu_node_setup(void *cpu_data, int node) int cpu; for_each_possible_early_cpu(cpu) { - if (cpu == 0) { - void *cpu0_data = __cpu0_per_cpu; - __per_cpu_offset[cpu] = (char*)cpu0_data - - __per_cpu_start; - } else if (node == node_cpuid[cpu].nid) { - memcpy(__va(cpu_data), __phys_per_cpu_start, - __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char*)__va(cpu_data) - - __per_cpu_start; - cpu_data += PERCPU_PAGE_SIZE; - } + void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start; + + if (node != node_cpuid[cpu].nid) + continue; + + memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char *)__va(cpu_data) - + __per_cpu_start; + + /* + * percpu area for cpu0 is moved from the __init area + * which is setup by head.S and used till this point. + * Update ar.k3. This move is ensures that percpu + * area for cpu0 is on the correct node and its + * virtual address isn't insanely far from other + * percpu areas which is important for congruent + * percpu allocator. + */ + if (cpu == 0) + ia64_set_kr(IA64_KR_PER_CPU_DATA, + (unsigned long)cpu_data - + (unsigned long)__per_cpu_start); + + cpu_data += PERCPU_PAGE_SIZE; } #endif return cpu_data; } +#ifdef CONFIG_SMP +/** + * setup_per_cpu_areas - setup percpu areas + * + * Arch code has already allocated and initialized percpu areas. All + * this function has to do is to teach the determined layout to the + * dynamic percpu allocator, which happens to be more complex than + * creating whole new ones using helpers. + */ +void __init setup_per_cpu_areas(void) +{ + struct pcpu_alloc_info *ai; + struct pcpu_group_info *uninitialized_var(gi); + unsigned int *cpu_map; + void *base; + unsigned long base_offset; + unsigned int cpu; + ssize_t static_size, reserved_size, dyn_size; + int node, prev_node, unit, nr_units, rc; + + ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids); + if (!ai) + panic("failed to allocate pcpu_alloc_info"); + cpu_map = ai->groups[0].cpu_map; + + /* determine base */ + base = (void *)ULONG_MAX; + for_each_possible_cpu(cpu) + base = min(base, + (void *)(__per_cpu_offset[cpu] + __per_cpu_start)); + base_offset = (void *)__per_cpu_start - base; + + /* build cpu_map, units are grouped by node */ + unit = 0; + for_each_node(node) + for_each_possible_cpu(cpu) + if (node == node_cpuid[cpu].nid) + cpu_map[unit++] = cpu; + nr_units = unit; + + /* set basic parameters */ + static_size = __per_cpu_end - __per_cpu_start; + reserved_size = PERCPU_MODULE_RESERVE; + dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; + if (dyn_size < 0) + panic("percpu area overflow static=%zd reserved=%zd\n", + static_size, reserved_size); + + ai->static_size = static_size; + ai->reserved_size = reserved_size; + ai->dyn_size = dyn_size; + ai->unit_size = PERCPU_PAGE_SIZE; + ai->atom_size = PAGE_SIZE; + ai->alloc_size = PERCPU_PAGE_SIZE; + + /* + * CPUs are put into groups according to node. Walk cpu_map + * and create new groups at node boundaries. + */ + prev_node = -1; + ai->nr_groups = 0; + for (unit = 0; unit < nr_units; unit++) { + cpu = cpu_map[unit]; + node = node_cpuid[cpu].nid; + + if (node == prev_node) { + gi->nr_units++; + continue; + } + prev_node = node; + + gi = &ai->groups[ai->nr_groups++]; + gi->nr_units = 1; + gi->base_offset = __per_cpu_offset[cpu] + base_offset; + gi->cpu_map = &cpu_map[unit]; + } + + rc = pcpu_setup_first_chunk(ai, base); + if (rc) + panic("failed to setup percpu area (err=%d)", rc); + + pcpu_free_alloc_info(ai); +} +#endif + /** * fill_pernode - initialize pernode data. * @node: the node id. @@ -352,7 +450,8 @@ static void __init initialize_pernode_data(void) /* Set the node_data pointer for each per-cpu struct */ for_each_possible_early_cpu(cpu) { node = node_cpuid[cpu].nid; - per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data; + per_cpu(ia64_cpu_info, cpu).node_data = + mem_data[node].node_data; } #else { @@ -360,7 +459,7 @@ static void __init initialize_pernode_data(void) cpu = 0; node = node_cpuid[cpu].nid; cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + - ((char *)&per_cpu__cpu_info - __per_cpu_start)); + ((char *)&per_cpu__ia64_cpu_info - __per_cpu_start)); cpu0_cpu_info->node_data = mem_data[node].node_data; } #endif /* CONFIG_SMP */ @@ -666,9 +765,9 @@ void __init paging_init(void) sparse_init(); #ifdef CONFIG_VIRTUAL_MEM_MAP - vmalloc_end -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * + VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * sizeof(struct page)); - vmem_map = (struct page *) vmalloc_end; + vmem_map = (struct page *) VMALLOC_END; efi_memmap_walk(create_mem_map_page_table, NULL); printk("Virtual mem_map starts at 0x%p\n", vmem_map); #endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 1857766a63c1..7c0d4814a68d 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -44,8 +44,8 @@ extern void ia64_tlb_init (void); unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; #ifdef CONFIG_VIRTUAL_MEM_MAP -unsigned long vmalloc_end = VMALLOC_END_INIT; -EXPORT_SYMBOL(vmalloc_end); +unsigned long VMALLOC_END = VMALLOC_END_INIT; +EXPORT_SYMBOL(VMALLOC_END); struct page *vmem_map; EXPORT_SYMBOL(vmem_map); #endif @@ -91,7 +91,7 @@ dma_mark_clean(void *addr, size_t size) inline void ia64_set_rbs_bot (void) { - unsigned long stack_size = current->signal->rlim[RLIMIT_STACK].rlim_max & -16; + unsigned long stack_size = rlimit_max(RLIMIT_STACK) & -16; if (stack_size > MAX_USER_STACK_SIZE) stack_size = MAX_USER_STACK_SIZE; diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c index 2a140627dfd6..3dccdd8eb275 100644 --- a/arch/ia64/mm/ioremap.c +++ b/arch/ia64/mm/ioremap.c @@ -22,6 +22,12 @@ __ioremap (unsigned long phys_addr) } void __iomem * +early_ioremap (unsigned long phys_addr, unsigned long size) +{ + return __ioremap(phys_addr); +} + +void __iomem * ioremap (unsigned long phys_addr, unsigned long size) { void __iomem *addr; @@ -102,6 +108,11 @@ ioremap_nocache (unsigned long phys_addr, unsigned long size) EXPORT_SYMBOL(ioremap_nocache); void +early_iounmap (volatile void __iomem *addr, unsigned long size) +{ +} + +void iounmap (volatile void __iomem *addr) { if (REGION_NUMBER(addr) == RGN_GATE) diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index f426dc78d959..f3de9d7a98b4 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -48,7 +48,7 @@ DEFINE_PER_CPU(u8, ia64_need_tlb_flush); DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ -struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX]; +struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; /* * Initializes the ia64_ctx.bitmap array based on max_ctx+1. @@ -100,24 +100,36 @@ wrap_mmu_context (struct mm_struct *mm) * this primitive it can be moved up to a spinaphore.h header. */ struct spinaphore { - atomic_t cur; + unsigned long ticket; + unsigned long serve; }; static inline void spinaphore_init(struct spinaphore *ss, int val) { - atomic_set(&ss->cur, val); + ss->ticket = 0; + ss->serve = val; } static inline void down_spin(struct spinaphore *ss) { - while (unlikely(!atomic_add_unless(&ss->cur, -1, 0))) - while (atomic_read(&ss->cur) == 0) - cpu_relax(); + unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve; + + if (time_before(t, ss->serve)) + return; + + ia64_invala(); + + for (;;) { + asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory"); + if (time_before(t, serve)) + return; + cpu_relax(); + } } static inline void up_spin(struct spinaphore *ss) { - atomic_add(1, &ss->cur); + ia64_fetchadd(1, &ss->serve, rel); } static struct spinaphore ptcg_sem; @@ -417,10 +429,16 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) struct ia64_tr_entry *p; int cpu = smp_processor_id(); + if (!ia64_idtrs[cpu]) { + ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX * + sizeof (struct ia64_tr_entry), GFP_KERNEL); + if (!ia64_idtrs[cpu]) + return -ENOMEM; + } r = -EINVAL; /*Check overlap with existing TR entries*/ if (target_mask & 0x1) { - p = &__per_cpu_idtrs[cpu][0][0]; + p = ia64_idtrs[cpu]; for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); i++, p++) { if (p->pte & 0x1) @@ -432,7 +450,7 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) } } if (target_mask & 0x2) { - p = &__per_cpu_idtrs[cpu][1][0]; + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX; for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); i++, p++) { if (p->pte & 0x1) @@ -447,16 +465,16 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { switch (target_mask & 0x3) { case 1: - if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1)) + if (!((ia64_idtrs[cpu] + i)->pte & 0x1)) goto found; continue; case 2: - if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) + if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) goto found; continue; case 3: - if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) && - !(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) + if (!((ia64_idtrs[cpu] + i)->pte & 0x1) && + !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) goto found; continue; default: @@ -476,7 +494,7 @@ found: if (target_mask & 0x1) { ia64_itr(0x1, i, va, pte, log_size); ia64_srlz_i(); - p = &__per_cpu_idtrs[cpu][0][i]; + p = ia64_idtrs[cpu] + i; p->ifa = va; p->pte = pte; p->itir = log_size << 2; @@ -485,7 +503,7 @@ found: if (target_mask & 0x2) { ia64_itr(0x2, i, va, pte, log_size); ia64_srlz_i(); - p = &__per_cpu_idtrs[cpu][1][i]; + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i; p->ifa = va; p->pte = pte; p->itir = log_size << 2; @@ -516,7 +534,7 @@ void ia64_ptr_entry(u64 target_mask, int slot) return; if (target_mask & 0x1) { - p = &__per_cpu_idtrs[cpu][0][slot]; + p = ia64_idtrs[cpu] + slot; if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { p->pte = 0; ia64_ptr(0x1, p->ifa, p->itir>>2); @@ -525,7 +543,7 @@ void ia64_ptr_entry(u64 target_mask, int slot) } if (target_mask & 0x2) { - p = &__per_cpu_idtrs[cpu][1][slot]; + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot; if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { p->pte = 0; ia64_ptr(0x2, p->ifa, p->itir>>2); @@ -534,8 +552,8 @@ void ia64_ptr_entry(u64 target_mask, int slot) } for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { - if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) || - (__per_cpu_idtrs[cpu][1][i].pte & 0x1)) + if (((ia64_idtrs[cpu] + i)->pte & 0x1) || + ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) break; } per_cpu(ia64_tr_used, cpu) = i; |