diff options
Diffstat (limited to 'include/asm-powerpc')
-rw-r--r-- | include/asm-powerpc/cputable.h | 22 | ||||
-rw-r--r-- | include/asm-powerpc/delay.h | 40 | ||||
-rw-r--r-- | include/asm-powerpc/dma-mapping.h | 285 | ||||
-rw-r--r-- | include/asm-powerpc/eeh.h | 4 | ||||
-rw-r--r-- | include/asm-powerpc/io.h | 462 | ||||
-rw-r--r-- | include/asm-powerpc/mmu.h | 399 | ||||
-rw-r--r-- | include/asm-powerpc/mmu_context.h | 89 | ||||
-rw-r--r-- | include/asm-powerpc/mmzone.h | 50 | ||||
-rw-r--r-- | include/asm-powerpc/page_64.h | 4 | ||||
-rw-r--r-- | include/asm-powerpc/pci-bridge.h | 153 | ||||
-rw-r--r-- | include/asm-powerpc/pci.h | 247 | ||||
-rw-r--r-- | include/asm-powerpc/pgalloc.h | 156 | ||||
-rw-r--r-- | include/asm-powerpc/pgtable-4k.h | 91 | ||||
-rw-r--r-- | include/asm-powerpc/pgtable-64k.h | 90 | ||||
-rw-r--r-- | include/asm-powerpc/pgtable.h | 524 | ||||
-rw-r--r-- | include/asm-powerpc/ppc-pci.h | 2 | ||||
-rw-r--r-- | include/asm-powerpc/spinlock.h | 269 | ||||
-rw-r--r-- | include/asm-powerpc/topology.h | 4 |
18 files changed, 2840 insertions, 51 deletions
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index 04e2726002cf..d1cfa3f515ea 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -90,6 +90,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset); #define CPU_FTR_NEED_COHERENT ASM_CONST(0x0000000000020000) #define CPU_FTR_NO_BTIC ASM_CONST(0x0000000000040000) #define CPU_FTR_BIG_PHYS ASM_CONST(0x0000000000080000) +#define CPU_FTR_NODSISRALIGN ASM_CONST(0x0000000000100000) #ifdef __powerpc64__ /* Add the 64b processor unique features in the top half of the word */ @@ -97,7 +98,6 @@ extern void do_cpu_ftr_fixups(unsigned long offset); #define CPU_FTR_16M_PAGE ASM_CONST(0x0000000200000000) #define CPU_FTR_TLBIEL ASM_CONST(0x0000000400000000) #define CPU_FTR_NOEXECUTE ASM_CONST(0x0000000800000000) -#define CPU_FTR_NODSISRALIGN ASM_CONST(0x0000001000000000) #define CPU_FTR_IABR ASM_CONST(0x0000002000000000) #define CPU_FTR_MMCRA ASM_CONST(0x0000004000000000) #define CPU_FTR_CTRL ASM_CONST(0x0000008000000000) @@ -113,7 +113,6 @@ extern void do_cpu_ftr_fixups(unsigned long offset); #define CPU_FTR_16M_PAGE ASM_CONST(0x0) #define CPU_FTR_TLBIEL ASM_CONST(0x0) #define CPU_FTR_NOEXECUTE ASM_CONST(0x0) -#define CPU_FTR_NODSISRALIGN ASM_CONST(0x0) #define CPU_FTR_IABR ASM_CONST(0x0) #define CPU_FTR_MMCRA ASM_CONST(0x0) #define CPU_FTR_CTRL ASM_CONST(0x0) @@ -273,18 +272,21 @@ enum { CPU_FTRS_POWER3_32 = CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE, CPU_FTRS_POWER4_32 = CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | - CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE, + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_NODSISRALIGN, CPU_FTRS_970_32 = CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_MAYBE_CAN_NAP, + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN, CPU_FTRS_8XX = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB, - CPU_FTRS_40X = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB, - CPU_FTRS_44X = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB, - CPU_FTRS_E200 = CPU_FTR_USE_TB, - CPU_FTRS_E500 = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB, + CPU_FTRS_40X = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | + CPU_FTR_NODSISRALIGN, + CPU_FTRS_44X = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | + CPU_FTR_NODSISRALIGN, + CPU_FTRS_E200 = CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN, + CPU_FTRS_E500 = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | + CPU_FTR_NODSISRALIGN, CPU_FTRS_E500_2 = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | - CPU_FTR_BIG_PHYS, - CPU_FTRS_GENERIC_32 = CPU_FTR_COMMON, + CPU_FTR_BIG_PHYS | CPU_FTR_NODSISRALIGN, + CPU_FTRS_GENERIC_32 = CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN, #ifdef __powerpc64__ CPU_FTRS_POWER3 = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR, diff --git a/include/asm-powerpc/delay.h b/include/asm-powerpc/delay.h index 1492aa9ab716..54fe1f4f8fd0 100644 --- a/include/asm-powerpc/delay.h +++ b/include/asm-powerpc/delay.h @@ -13,43 +13,7 @@ * Anton Blanchard. */ -extern unsigned long tb_ticks_per_usec; - -#ifdef CONFIG_PPC64 -/* define these here to prevent circular dependencies */ -/* these instructions control the thread priority on multi-threaded cpus */ -#define __HMT_low() asm volatile("or 1,1,1") -#define __HMT_medium() asm volatile("or 2,2,2") -#else -#define __HMT_low() -#define __HMT_medium() -#endif - -#define __barrier() asm volatile("" ::: "memory") - -static inline unsigned long __get_tb(void) -{ - unsigned long rval; - - asm volatile("mftb %0" : "=r" (rval)); - return rval; -} - -static inline void __delay(unsigned long loops) -{ - unsigned long start = __get_tb(); - - while((__get_tb() - start) < loops) - __HMT_low(); - __HMT_medium(); - __barrier(); -} - -static inline void udelay(unsigned long usecs) -{ - unsigned long loops = tb_ticks_per_usec * usecs; - - __delay(loops); -} +extern void __delay(unsigned long loops); +extern void udelay(unsigned long usecs); #endif /* _ASM_POWERPC_DELAY_H */ diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h new file mode 100644 index 000000000000..59a80163f75f --- /dev/null +++ b/include/asm-powerpc/dma-mapping.h @@ -0,0 +1,285 @@ +/* + * Copyright (C) 2004 IBM + * + * Implements the generic device dma API for powerpc. + * the pci and vio busses + */ +#ifndef _ASM_DMA_MAPPING_H +#define _ASM_DMA_MAPPING_H + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/cache.h> +/* need struct page definitions */ +#include <linux/mm.h> +#include <asm/scatterlist.h> +#include <asm/io.h> +#include <asm/bug.h> + +#define DMA_ERROR_CODE (~(dma_addr_t)0x0) + +#ifdef CONFIG_NOT_COHERENT_CACHE +/* + * DMA-consistent mapping functions for PowerPCs that don't support + * cache snooping. These allocate/free a region of uncached mapped + * memory space for use with DMA devices. Alternatively, you could + * allocate the space "normally" and use the cache management functions + * to ensure it is consistent. + */ +extern void *__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp); +extern void __dma_free_coherent(size_t size, void *vaddr); +extern void __dma_sync(void *vaddr, size_t size, int direction); +extern void __dma_sync_page(struct page *page, unsigned long offset, + size_t size, int direction); + +#else /* ! CONFIG_NOT_COHERENT_CACHE */ +/* + * Cache coherent cores. + */ + +#define __dma_alloc_coherent(gfp, size, handle) NULL +#define __dma_free_coherent(size, addr) do { } while (0) +#define __dma_sync(addr, size, rw) do { } while (0) +#define __dma_sync_page(pg, off, sz, rw) do { } while (0) + +#endif /* ! CONFIG_NOT_COHERENT_CACHE */ + +#ifdef CONFIG_PPC64 + +extern int dma_supported(struct device *dev, u64 mask); +extern int dma_set_mask(struct device *dev, u64 dma_mask); +extern void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag); +extern void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle); +extern dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, + size_t size, enum dma_data_direction direction); +extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction); +extern dma_addr_t dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction); +extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address, + size_t size, enum dma_data_direction direction); +extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction); +extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nhwentries, enum dma_data_direction direction); + +#else /* CONFIG_PPC64 */ + +#define dma_supported(dev, mask) (1) + +static inline int dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = dma_mask; + + return 0; +} + +static inline void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, + gfp_t gfp) +{ +#ifdef CONFIG_NOT_COHERENT_CACHE + return __dma_alloc_coherent(size, dma_handle, gfp); +#else + void *ret; + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); + + if (dev == NULL || dev->coherent_dma_mask < 0xffffffff) + gfp |= GFP_DMA; + + ret = (void *)__get_free_pages(gfp, get_order(size)); + + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = virt_to_bus(ret); + } + + return ret; +#endif +} + +static inline void +dma_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ +#ifdef CONFIG_NOT_COHERENT_CACHE + __dma_free_coherent(size, vaddr); +#else + free_pages((unsigned long)vaddr, get_order(size)); +#endif +} + +static inline dma_addr_t +dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + + __dma_sync(ptr, size, direction); + + return virt_to_bus(ptr); +} + +/* We do nothing. */ +#define dma_unmap_single(dev, addr, size, dir) do { } while (0) + +static inline dma_addr_t +dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + + __dma_sync_page(page, offset, size, direction); + + return page_to_bus(page) + offset; +} + +/* We do nothing. */ +#define dma_unmap_page(dev, handle, size, dir) do { } while (0) + +static inline int +dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + int i; + + BUG_ON(direction == DMA_NONE); + + for (i = 0; i < nents; i++, sg++) { + BUG_ON(!sg->page); + __dma_sync_page(sg->page, sg->offset, sg->length, direction); + sg->dma_address = page_to_bus(sg->page) + sg->offset; + } + + return nents; +} + +/* We don't do anything here. */ +#define dma_unmap_sg(dev, sg, nents, dir) do { } while (0) + +#endif /* CONFIG_PPC64 */ + +static inline void dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + __dma_sync(bus_to_virt(dma_handle), size, direction); +} + +static inline void dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + __dma_sync(bus_to_virt(dma_handle), size, direction); +} + +static inline void dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + int i; + + BUG_ON(direction == DMA_NONE); + + for (i = 0; i < nents; i++, sg++) + __dma_sync_page(sg->page, sg->offset, sg->length, direction); +} + +static inline void dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + int i; + + BUG_ON(direction == DMA_NONE); + + for (i = 0; i < nents; i++, sg++) + __dma_sync_page(sg->page, sg->offset, sg->length, direction); +} + +static inline int dma_mapping_error(dma_addr_t dma_addr) +{ +#ifdef CONFIG_PPC64 + return (dma_addr == DMA_ERROR_CODE); +#else + return 0; +#endif +} + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) +#ifdef CONFIG_NOT_COHERENT_CACHE +#define dma_is_consistent(d) (0) +#else +#define dma_is_consistent(d) (1) +#endif + +static inline int dma_get_cache_alignment(void) +{ +#ifdef CONFIG_PPC64 + /* no easy way to get cache size on all processors, so return + * the maximum possible, to be safe */ + return (1 << L1_CACHE_SHIFT_MAX); +#else + /* + * Each processor family will define its own L1_CACHE_SHIFT, + * L1_CACHE_BYTES wraps to this, so this is always safe. + */ + return L1_CACHE_BYTES; +#endif +} + +static inline void dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + /* just sync everything for now */ + dma_sync_single_for_cpu(dev, dma_handle, offset + size, direction); +} + +static inline void dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + /* just sync everything for now */ + dma_sync_single_for_device(dev, dma_handle, offset + size, direction); +} + +static inline void dma_cache_sync(void *vaddr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + __dma_sync(vaddr, size, (int)direction); +} + +/* + * DMA operations are abstracted for G5 vs. i/pSeries, PCI vs. VIO + */ +struct dma_mapping_ops { + void * (*alloc_coherent)(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag); + void (*free_coherent)(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle); + dma_addr_t (*map_single)(struct device *dev, void *ptr, + size_t size, enum dma_data_direction direction); + void (*unmap_single)(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction); + int (*map_sg)(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction); + void (*unmap_sg)(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction); + int (*dma_supported)(struct device *dev, u64 mask); + int (*dac_dma_supported)(struct device *dev, u64 mask); +}; + +#endif /* _ASM_DMA_MAPPING_H */ diff --git a/include/asm-powerpc/eeh.h b/include/asm-powerpc/eeh.h index 89f26ab31908..f8633aafe4ba 100644 --- a/include/asm-powerpc/eeh.h +++ b/include/asm-powerpc/eeh.h @@ -30,6 +30,8 @@ struct device_node; #ifdef CONFIG_EEH +extern int eeh_subsystem_enabled; + /* Values for eeh_mode bits in device_node */ #define EEH_MODE_SUPPORTED (1<<0) #define EEH_MODE_NOCHECK (1<<1) @@ -75,7 +77,7 @@ void eeh_remove_device(struct pci_dev *); * If this macro yields TRUE, the caller relays to eeh_check_failure() * which does further tests out of line. */ -#define EEH_POSSIBLE_ERROR(val, type) ((val) == (type)~0) +#define EEH_POSSIBLE_ERROR(val, type) ((val) == (type)~0 && eeh_subsystem_enabled) /* * Reads from a device which has been isolated by EEH will return diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h new file mode 100644 index 000000000000..48938d84d055 --- /dev/null +++ b/include/asm-powerpc/io.h @@ -0,0 +1,462 @@ +#ifndef _ASM_POWERPC_IO_H +#define _ASM_POWERPC_IO_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef CONFIG_PPC64 +#include <asm-ppc/io.h> +#else + +#include <linux/compiler.h> +#include <asm/page.h> +#include <asm/byteorder.h> +#ifdef CONFIG_PPC_ISERIES +#include <asm/iseries/iseries_io.h> +#endif +#include <asm/synch.h> +#include <asm/delay.h> + +#include <asm-generic/iomap.h> + +#define __ide_mm_insw(p, a, c) _insw_ns((volatile u16 __iomem *)(p), (a), (c)) +#define __ide_mm_insl(p, a, c) _insl_ns((volatile u32 __iomem *)(p), (a), (c)) +#define __ide_mm_outsw(p, a, c) _outsw_ns((volatile u16 __iomem *)(p), (a), (c)) +#define __ide_mm_outsl(p, a, c) _outsl_ns((volatile u32 __iomem *)(p), (a), (c)) + + +#define SIO_CONFIG_RA 0x398 +#define SIO_CONFIG_RD 0x399 + +#define SLOW_DOWN_IO + +extern unsigned long isa_io_base; +extern unsigned long pci_io_base; +extern unsigned long io_page_mask; + +#define MAX_ISA_PORT 0x10000 + +#define _IO_IS_VALID(port) ((port) >= MAX_ISA_PORT || (1 << (port>>PAGE_SHIFT)) \ + & io_page_mask) + +#ifdef CONFIG_PPC_ISERIES +/* __raw_* accessors aren't supported on iSeries */ +#define __raw_readb(addr) { BUG(); 0; } +#define __raw_readw(addr) { BUG(); 0; } +#define __raw_readl(addr) { BUG(); 0; } +#define __raw_readq(addr) { BUG(); 0; } +#define __raw_writeb(v, addr) { BUG(); 0; } +#define __raw_writew(v, addr) { BUG(); 0; } +#define __raw_writel(v, addr) { BUG(); 0; } +#define __raw_writeq(v, addr) { BUG(); 0; } +#define readb(addr) iSeries_Read_Byte(addr) +#define readw(addr) iSeries_Read_Word(addr) +#define readl(addr) iSeries_Read_Long(addr) +#define writeb(data, addr) iSeries_Write_Byte((data),(addr)) +#define writew(data, addr) iSeries_Write_Word((data),(addr)) +#define writel(data, addr) iSeries_Write_Long((data),(addr)) +#define memset_io(a,b,c) iSeries_memset_io((a),(b),(c)) +#define memcpy_fromio(a,b,c) iSeries_memcpy_fromio((a), (b), (c)) +#define memcpy_toio(a,b,c) iSeries_memcpy_toio((a), (b), (c)) + +#define inb(addr) readb(((void __iomem *)(long)(addr))) +#define inw(addr) readw(((void __iomem *)(long)(addr))) +#define inl(addr) readl(((void __iomem *)(long)(addr))) +#define outb(data,addr) writeb(data,((void __iomem *)(long)(addr))) +#define outw(data,addr) writew(data,((void __iomem *)(long)(addr))) +#define outl(data,addr) writel(data,((void __iomem *)(long)(addr))) +/* + * The *_ns versions below don't do byte-swapping. + * Neither do the standard versions now, these are just here + * for older code. + */ +#define insw_ns(port, buf, ns) _insw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns)) +#define insl_ns(port, buf, nl) _insl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl)) +#else + +static inline unsigned char __raw_readb(const volatile void __iomem *addr) +{ + return *(volatile unsigned char __force *)addr; +} +static inline unsigned short __raw_readw(const volatile void __iomem *addr) +{ + return *(volatile unsigned short __force *)addr; +} +static inline unsigned int __raw_readl(const volatile void __iomem *addr) +{ + return *(volatile unsigned int __force *)addr; +} +static inline unsigned long __raw_readq(const volatile void __iomem *addr) +{ + return *(volatile unsigned long __force *)addr; +} +static inline void __raw_writeb(unsigned char v, volatile void __iomem *addr) +{ + *(volatile unsigned char __force *)addr = v; +} +static inline void __raw_writew(unsigned short v, volatile void __iomem *addr) +{ + *(volatile unsigned short __force *)addr = v; +} +static inline void __raw_writel(unsigned int v, volatile void __iomem *addr) +{ + *(volatile unsigned int __force *)addr = v; +} +static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr) +{ + *(volatile unsigned long __force *)addr = v; +} +#define readb(addr) eeh_readb(addr) +#define readw(addr) eeh_readw(addr) +#define readl(addr) eeh_readl(addr) +#define readq(addr) eeh_readq(addr) +#define writeb(data, addr) eeh_writeb((data), (addr)) +#define writew(data, addr) eeh_writew((data), (addr)) +#define writel(data, addr) eeh_writel((data), (addr)) +#define writeq(data, addr) eeh_writeq((data), (addr)) +#define memset_io(a,b,c) eeh_memset_io((a),(b),(c)) +#define memcpy_fromio(a,b,c) eeh_memcpy_fromio((a),(b),(c)) +#define memcpy_toio(a,b,c) eeh_memcpy_toio((a),(b),(c)) +#define inb(port) eeh_inb((unsigned long)port) +#define outb(val, port) eeh_outb(val, (unsigned long)port) +#define inw(port) eeh_inw((unsigned long)port) +#define outw(val, port) eeh_outw(val, (unsigned long)port) +#define inl(port) eeh_inl((unsigned long)port) +#define outl(val, port) eeh_outl(val, (unsigned long)port) + +/* + * The insw/outsw/insl/outsl macros don't do byte-swapping. + * They are only used in practice for transferring buffers which + * are arrays of bytes, and byte-swapping is not appropriate in + * that case. - paulus */ +#define insb(port, buf, ns) eeh_insb((port), (buf), (ns)) +#define insw(port, buf, ns) eeh_insw_ns((port), (buf), (ns)) +#define insl(port, buf, nl) eeh_insl_ns((port), (buf), (nl)) +#define insw_ns(port, buf, ns) eeh_insw_ns((port), (buf), (ns)) +#define insl_ns(port, buf, nl) eeh_insl_ns((port), (buf), (nl)) + +#define outsb(port, buf, ns) _outsb((u8 __iomem *)((port)+pci_io_base), (buf), (ns)) +#define outsw(port, buf, ns) _outsw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns)) +#define outsl(port, buf, nl) _outsl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl)) + +#endif + +#define readb_relaxed(addr) readb(addr) +#define readw_relaxed(addr) readw(addr) +#define readl_relaxed(addr) readl(addr) +#define readq_relaxed(addr) readq(addr) + +extern void _insb(volatile u8 __iomem *port, void *buf, int ns); +extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns); +extern void _insw(volatile u16 __iomem *port, void *buf, int ns); +extern void _outsw(volatile u16 __iomem *port, const void *buf, int ns); +extern void _insl(volatile u32 __iomem *port, void *buf, int nl); +extern void _outsl(volatile u32 __iomem *port, const void *buf, int nl); +extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns); +extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns); +extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl); +extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl); + +#define mmiowb() + +/* + * output pause versions need a delay at least for the + * w83c105 ide controller in a p610. + */ +#define inb_p(port) inb(port) +#define outb_p(val, port) (udelay(1), outb((val), (port))) +#define inw_p(port) inw(port) +#define outw_p(val, port) (udelay(1), outw((val), (port))) +#define inl_p(port) inl(port) +#define outl_p(val, port) (udelay(1), outl((val), (port))) + +/* + * The *_ns versions below don't do byte-swapping. + * Neither do the standard versions now, these are just here + * for older code. + */ +#define outsw_ns(port, buf, ns) _outsw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns)) +#define outsl_ns(port, buf, nl) _outsl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl)) + + +#define IO_SPACE_LIMIT ~(0UL) + + +#ifdef __KERNEL__ +extern int __ioremap_explicit(unsigned long p_addr, unsigned long v_addr, + unsigned long size, unsigned long flags); +extern void __iomem *__ioremap(unsigned long address, unsigned long size, + unsigned long flags); + +/** + * ioremap - map bus memory into CPU space + * @address: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + */ +extern void __iomem *ioremap(unsigned long address, unsigned long size); + +#define ioremap_nocache(addr, size) ioremap((addr), (size)) +extern int iounmap_explicit(volatile void __iomem *addr, unsigned long size); +extern void iounmap(volatile void __iomem *addr); +extern void __iomem * reserve_phb_iospace(unsigned long size); + +/** + * virt_to_phys - map virtual addresses to physical + * @address: address to remap + * + * The returned physical address is the physical (CPU) mapping for + * the memory address given. It is only valid to use this function on + * addresses directly mapped or allocated via kmalloc. + * + * This function does not give bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ +static inline unsigned long virt_to_phys(volatile void * address) +{ + return __pa((unsigned long)address); +} + +/** + * phys_to_virt - map physical address to virtual + * @address: address to remap + * + * The returned virtual address is a current CPU mapping for + * the memory address given. It is only valid to use this function on + * addresses that have a kernel mapping + * + * This function does not handle bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ +static inline void * phys_to_virt(unsigned long address) +{ + return (void *)__va(address); +} + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) + +/* We do NOT want virtual merging, it would put too much pressure on + * our iommu allocator. Instead, we want drivers to be smart enough + * to coalesce sglists that happen to have been mapped in a contiguous + * way by the iommu + */ +#define BIO_VMERGE_BOUNDARY 0 + +#endif /* __KERNEL__ */ + +static inline void iosync(void) +{ + __asm__ __volatile__ ("sync" : : : "memory"); +} + +/* Enforce in-order execution of data I/O. + * No distinction between read/write on PPC; use eieio for all three. + */ +#define iobarrier_rw() eieio() +#define iobarrier_r() eieio() +#define iobarrier_w() eieio() + +/* + * 8, 16 and 32 bit, big and little endian I/O operations, with barrier. + * These routines do not perform EEH-related I/O address translation, + * and should not be used directly by device drivers. Use inb/readb + * instead. + */ +static inline int in_8(const volatile unsigned char __iomem *addr) +{ + int ret; + + __asm__ __volatile__("lbz%U1%X1 %0,%1; twi 0,%0,0; isync" + : "=r" (ret) : "m" (*addr)); + return ret; +} + +static inline void out_8(volatile unsigned char __iomem *addr, int val) +{ + __asm__ __volatile__("stb%U0%X0 %1,%0; sync" + : "=m" (*addr) : "r" (val)); +} + +static inline int in_le16(const volatile unsigned short __iomem *addr) +{ + int ret; + + __asm__ __volatile__("lhbrx %0,0,%1; twi 0,%0,0; isync" + : "=r" (ret) : "r" (addr), "m" (*addr)); + return ret; +} + +static inline int in_be16(const volatile unsigned short __iomem *addr) +{ + int ret; + + __asm__ __volatile__("lhz%U1%X1 %0,%1; twi 0,%0,0; isync" + : "=r" (ret) : "m" (*addr)); + return ret; +} + +static inline void out_le16(volatile unsigned short __iomem *addr, int val) +{ + __asm__ __volatile__("sthbrx %1,0,%2; sync" + : "=m" (*addr) : "r" (val), "r" (addr)); +} + +static inline void out_be16(volatile unsigned short __iomem *addr, int val) +{ + __asm__ __volatile__("sth%U0%X0 %1,%0; sync" + : "=m" (*addr) : "r" (val)); +} + +static inline unsigned in_le32(const volatile unsigned __iomem *addr) +{ + unsigned ret; + + __asm__ __volatile__("lwbrx %0,0,%1; twi 0,%0,0; isync" + : "=r" (ret) : "r" (addr), "m" (*addr)); + return ret; +} + +static inline unsigned in_be32(const volatile unsigned __iomem *addr) +{ + unsigned ret; + + __asm__ __volatile__("lwz%U1%X1 %0,%1; twi 0,%0,0; isync" + : "=r" (ret) : "m" (*addr)); + return ret; +} + +static inline void out_le32(volatile unsigned __iomem *addr, int val) +{ + __asm__ __volatile__("stwbrx %1,0,%2; sync" : "=m" (*addr) + : "r" (val), "r" (addr)); +} + +static inline void out_be32(volatile unsigned __iomem *addr, int val) +{ + __asm__ __volatile__("stw%U0%X0 %1,%0; sync" + : "=m" (*addr) : "r" (val)); +} + +static inline unsigned long in_le64(const volatile unsigned long __iomem *addr) +{ + unsigned long tmp, ret; + + __asm__ __volatile__( + "ld %1,0(%2)\n" + "twi 0,%1,0\n" + "isync\n" + "rldimi %0,%1,5*8,1*8\n" + "rldimi %0,%1,3*8,2*8\n" + "rldimi %0,%1,1*8,3*8\n" + "rldimi %0,%1,7*8,4*8\n" + "rldicl %1,%1,32,0\n" + "rlwimi %0,%1,8,8,31\n" + "rlwimi %0,%1,24,16,23\n" + : "=r" (ret) , "=r" (tmp) : "b" (addr) , "m" (*addr)); + return ret; +} + +static inline unsigned long in_be64(const volatile unsigned long __iomem *addr) +{ + unsigned long ret; + + __asm__ __volatile__("ld%U1%X1 %0,%1; twi 0,%0,0; isync" + : "=r" (ret) : "m" (*addr)); + return ret; +} + +static inline void out_le64(volatile unsigned long __iomem *addr, unsigned long val) +{ + unsigned long tmp; + + __asm__ __volatile__( + "rldimi %0,%1,5*8,1*8\n" + "rldimi %0,%1,3*8,2*8\n" + "rldimi %0,%1,1*8,3*8\n" + "rldimi %0,%1,7*8,4*8\n" + "rldicl %1,%1,32,0\n" + "rlwimi %0,%1,8,8,31\n" + "rlwimi %0,%1,24,16,23\n" + "std %0,0(%3)\n" + "sync" + : "=&r" (tmp) , "=&r" (val) : "1" (val) , "b" (addr) , "m" (*addr)); +} + +static inline void out_be64(volatile unsigned long __iomem *addr, unsigned long val) +{ + __asm__ __volatile__("std%U0%X0 %1,%0; sync" : "=m" (*addr) : "r" (val)); +} + +#ifndef CONFIG_PPC_ISERIES +#include <asm/eeh.h> +#endif + +#ifdef __KERNEL__ + +/** + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the mmio address io_addr. This + * address should have been obtained by ioremap. + * Returns 1 on a match. + */ +static inline int check_signature(const volatile void __iomem * io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; +#ifndef CONFIG_PPC_ISERIES + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: +#endif + return retval; +} + +/* Nothing to do */ + +#define dma_cache_inv(_start,_size) do { } while (0) +#define dma_cache_wback(_start,_size) do { } while (0) +#define dma_cache_wback_inv(_start,_size) do { } while (0) + +/* Check of existence of legacy devices */ +extern int check_legacy_ioport(unsigned long base_port); + + +/* + * Convert a physical pointer to a virtual kernel pointer for /dev/mem + * access + */ +#define xlate_dev_mem_ptr(p) __va(p) + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +#endif /* __KERNEL__ */ + +#endif /* CONFIG_PPC64 */ +#endif /* _ASM_POWERPC_IO_H */ diff --git a/include/asm-powerpc/mmu.h b/include/asm-powerpc/mmu.h new file mode 100644 index 000000000000..c1b4bbabbe97 --- /dev/null +++ b/include/asm-powerpc/mmu.h @@ -0,0 +1,399 @@ +#ifndef _ASM_POWERPC_MMU_H_ +#define _ASM_POWERPC_MMU_H_ + +#ifndef CONFIG_PPC64 +#include <asm-ppc/mmu.h> +#else + +/* + * PowerPC memory management structures + * + * Dave Engebretsen & Mike Corrigan <{engebret|mikejc}@us.ibm.com> + * PPC64 rework. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/asm-compat.h> +#include <asm/page.h> + +/* + * Segment table + */ + +#define STE_ESID_V 0x80 +#define STE_ESID_KS 0x20 +#define STE_ESID_KP 0x10 +#define STE_ESID_N 0x08 + +#define STE_VSID_SHIFT 12 + +/* Location of cpu0's segment table */ +#define STAB0_PAGE 0x6 +#define STAB0_PHYS_ADDR (STAB0_PAGE<<12) + +#ifndef __ASSEMBLY__ +extern char initial_stab[]; +#endif /* ! __ASSEMBLY */ + +/* + * SLB + */ + +#define SLB_NUM_BOLTED 3 +#define SLB_CACHE_ENTRIES 8 + +/* Bits in the SLB ESID word */ +#define SLB_ESID_V ASM_CONST(0x0000000008000000) /* valid */ + +/* Bits in the SLB VSID word */ +#define SLB_VSID_SHIFT 12 +#define SLB_VSID_B ASM_CONST(0xc000000000000000) +#define SLB_VSID_B_256M ASM_CONST(0x0000000000000000) +#define SLB_VSID_B_1T ASM_CONST(0x4000000000000000) +#define SLB_VSID_KS ASM_CONST(0x0000000000000800) +#define SLB_VSID_KP ASM_CONST(0x0000000000000400) +#define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */ +#define SLB_VSID_L ASM_CONST(0x0000000000000100) +#define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */ +#define SLB_VSID_LP ASM_CONST(0x0000000000000030) +#define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000) +#define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010) +#define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020) +#define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030) +#define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP) + +#define SLB_VSID_KERNEL (SLB_VSID_KP) +#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) + +#define SLBIE_C (0x08000000) + +/* + * Hash table + */ + +#define HPTES_PER_GROUP 8 + +#define HPTE_V_AVPN_SHIFT 7 +#define HPTE_V_AVPN ASM_CONST(0xffffffffffffff80) +#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) +#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & HPTE_V_AVPN)) +#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) +#define HPTE_V_LOCK ASM_CONST(0x0000000000000008) +#define HPTE_V_LARGE ASM_CONST(0x0000000000000004) +#define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002) +#define HPTE_V_VALID ASM_CONST(0x0000000000000001) + +#define HPTE_R_PP0 ASM_CONST(0x8000000000000000) +#define HPTE_R_TS ASM_CONST(0x4000000000000000) +#define HPTE_R_RPN_SHIFT 12 +#define HPTE_R_RPN ASM_CONST(0x3ffffffffffff000) +#define HPTE_R_FLAGS ASM_CONST(0x00000000000003ff) +#define HPTE_R_PP ASM_CONST(0x0000000000000003) +#define HPTE_R_N ASM_CONST(0x0000000000000004) + +/* Values for PP (assumes Ks=0, Kp=1) */ +/* pp0 will always be 0 for linux */ +#define PP_RWXX 0 /* Supervisor read/write, User none */ +#define PP_RWRX 1 /* Supervisor read/write, User read */ +#define PP_RWRW 2 /* Supervisor read/write, User read/write */ +#define PP_RXRX 3 /* Supervisor read, User read */ + +#ifndef __ASSEMBLY__ + +typedef struct { + unsigned long v; + unsigned long r; +} hpte_t; + +extern hpte_t *htab_address; +extern unsigned long htab_hash_mask; + +/* + * Page size definition + * + * shift : is the "PAGE_SHIFT" value for that page size + * sllp : is a bit mask with the value of SLB L || LP to be or'ed + * directly to a slbmte "vsid" value + * penc : is the HPTE encoding mask for the "LP" field: + * + */ +struct mmu_psize_def +{ + unsigned int shift; /* number of bits */ + unsigned int penc; /* HPTE encoding */ + unsigned int tlbiel; /* tlbiel supported for that page size */ + unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */ + unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */ +}; + +#endif /* __ASSEMBLY__ */ + +/* + * The kernel use the constants below to index in the page sizes array. + * The use of fixed constants for this purpose is better for performances + * of the low level hash refill handlers. + * + * A non supported page size has a "shift" field set to 0 + * + * Any new page size being implemented can get a new entry in here. Whether + * the kernel will use it or not is a different matter though. The actual page + * size used by hugetlbfs is not defined here and may be made variable + */ + +#define MMU_PAGE_4K 0 /* 4K */ +#define MMU_PAGE_64K 1 /* 64K */ +#define MMU_PAGE_64K_AP 2 /* 64K Admixed (in a 4K segment) */ +#define MMU_PAGE_1M 3 /* 1M */ +#define MMU_PAGE_16M 4 /* 16M */ +#define MMU_PAGE_16G 5 /* 16G */ +#define MMU_PAGE_COUNT 6 + +#ifndef __ASSEMBLY__ + +/* + * The current system page sizes + */ +extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +extern int mmu_linear_psize; +extern int mmu_virtual_psize; + +#ifdef CONFIG_HUGETLB_PAGE +/* + * The page size index of the huge pages for use by hugetlbfs + */ +extern int mmu_huge_psize; + +#endif /* CONFIG_HUGETLB_PAGE */ + +/* + * This function sets the AVPN and L fields of the HPTE appropriately + * for the page size + */ +static inline unsigned long hpte_encode_v(unsigned long va, int psize) +{ + unsigned long v = + v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); + v <<= HPTE_V_AVPN_SHIFT; + if (psize != MMU_PAGE_4K) + v |= HPTE_V_LARGE; + return v; +} + +/* + * This function sets the ARPN, and LP fields of the HPTE appropriately + * for the page size. We assume the pa is already "clean" that is properly + * aligned for the requested page size + */ +static inline unsigned long hpte_encode_r(unsigned long pa, int psize) +{ + unsigned long r; + + /* A 4K page needs no special encoding */ + if (psize == MMU_PAGE_4K) + return pa & HPTE_R_RPN; + else { + unsigned int penc = mmu_psize_defs[psize].penc; + unsigned int shift = mmu_psize_defs[psize].shift; + return (pa & ~((1ul << shift) - 1)) | (penc << 12); + } + return r; +} + +/* + * This hashes a virtual address for a 256Mb segment only for now + */ + +static inline unsigned long hpt_hash(unsigned long va, unsigned int shift) +{ + return ((va >> 28) & 0x7fffffffffUL) ^ ((va & 0x0fffffffUL) >> shift); +} + +extern int __hash_page_4K(unsigned long ea, unsigned long access, + unsigned long vsid, pte_t *ptep, unsigned long trap, + unsigned int local); +extern int __hash_page_64K(unsigned long ea, unsigned long access, + unsigned long vsid, pte_t *ptep, unsigned long trap, + unsigned int local); +struct mm_struct; +extern int hash_huge_page(struct mm_struct *mm, unsigned long access, + unsigned long ea, unsigned long vsid, int local); + +extern void htab_finish_init(void); +extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, + unsigned long pstart, unsigned long mode, + int psize); + +extern void htab_initialize(void); +extern void htab_initialize_secondary(void); +extern void hpte_init_native(void); +extern void hpte_init_lpar(void); +extern void hpte_init_iSeries(void); +extern void mm_init_ppc64(void); + +extern long pSeries_lpar_hpte_insert(unsigned long hpte_group, + unsigned long va, unsigned long prpn, + unsigned long rflags, + unsigned long vflags, int psize); + +extern long native_hpte_insert(unsigned long hpte_group, + unsigned long va, unsigned long prpn, + unsigned long rflags, + unsigned long vflags, int psize); + +extern long iSeries_hpte_insert(unsigned long hpte_group, + unsigned long va, unsigned long prpn, + unsigned long rflags, + unsigned long vflags, int psize); + +extern void stabs_alloc(void); +extern void slb_initialize(void); +extern void stab_initialize(unsigned long stab); + +#endif /* __ASSEMBLY__ */ + +/* + * VSID allocation + * + * We first generate a 36-bit "proto-VSID". For kernel addresses this + * is equal to the ESID, for user addresses it is: + * (context << 15) | (esid & 0x7fff) + * + * The two forms are distinguishable because the top bit is 0 for user + * addresses, whereas the top two bits are 1 for kernel addresses. + * Proto-VSIDs with the top two bits equal to 0b10 are reserved for + * now. + * + * The proto-VSIDs are then scrambled into real VSIDs with the + * multiplicative hash: + * + * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS + * where VSID_MULTIPLIER = 268435399 = 0xFFFFFC7 + * VSID_MODULUS = 2^36-1 = 0xFFFFFFFFF + * + * This scramble is only well defined for proto-VSIDs below + * 0xFFFFFFFFF, so both proto-VSID and actual VSID 0xFFFFFFFFF are + * reserved. VSID_MULTIPLIER is prime, so in particular it is + * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. + * Because the modulus is 2^n-1 we can compute it efficiently without + * a divide or extra multiply (see below). + * + * This scheme has several advantages over older methods: + * + * - We have VSIDs allocated for every kernel address + * (i.e. everything above 0xC000000000000000), except the very top + * segment, which simplifies several things. + * + * - We allow for 15 significant bits of ESID and 20 bits of + * context for user addresses. i.e. 8T (43 bits) of address space for + * up to 1M contexts (although the page table structure and context + * allocation will need changes to take advantage of this). + * + * - The scramble function gives robust scattering in the hash + * table (at least based on some initial results). The previous + * method was more susceptible to pathological cases giving excessive + * hash collisions. + */ +/* + * WARNING - If you change these you must make sure the asm + * implementations in slb_allocate (slb_low.S), do_stab_bolted + * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly. + * + * You'll also need to change the precomputed VSID values in head.S + * which are used by the iSeries firmware. + */ + +#define VSID_MULTIPLIER ASM_CONST(200730139) /* 28-bit prime */ +#define VSID_BITS 36 +#define VSID_MODULUS ((1UL<<VSID_BITS)-1) + +#define CONTEXT_BITS 19 +#define USER_ESID_BITS 16 + +#define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT)) + +/* + * This macro generates asm code to compute the VSID scramble + * function. Used in slb_allocate() and do_stab_bolted. The function + * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS + * + * rt = register continaing the proto-VSID and into which the + * VSID will be stored + * rx = scratch register (clobbered) + * + * - rt and rx must be different registers + * - The answer will end up in the low 36 bits of rt. The higher + * bits may contain other garbage, so you may need to mask the + * result. + */ +#define ASM_VSID_SCRAMBLE(rt, rx) \ + lis rx,VSID_MULTIPLIER@h; \ + ori rx,rx,VSID_MULTIPLIER@l; \ + mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ + \ + srdi rx,rt,VSID_BITS; \ + clrldi rt,rt,(64-VSID_BITS); \ + add rt,rt,rx; /* add high and low bits */ \ + /* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \ + * 2^36-1+2^28-1. That in particular means that if r3 >= \ + * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \ + * the bit clear, r3 already has the answer we want, if it \ + * doesn't, the answer is the low 36 bits of r3+1. So in all \ + * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\ + addi rx,rt,1; \ + srdi rx,rx,VSID_BITS; /* extract 2^36 bit */ \ + add rt,rt,rx + + +#ifndef __ASSEMBLY__ + +typedef unsigned long mm_context_id_t; + +typedef struct { + mm_context_id_t id; +#ifdef CONFIG_HUGETLB_PAGE + u16 low_htlb_areas, high_htlb_areas; +#endif +} mm_context_t; + + +static inline unsigned long vsid_scramble(unsigned long protovsid) +{ +#if 0 + /* The code below is equivalent to this function for arguments + * < 2^VSID_BITS, which is all this should ever be called + * with. However gcc is not clever enough to compute the + * modulus (2^n-1) without a second multiply. */ + return ((protovsid * VSID_MULTIPLIER) % VSID_MODULUS); +#else /* 1 */ + unsigned long x; + + x = protovsid * VSID_MULTIPLIER; + x = (x >> VSID_BITS) + (x & VSID_MODULUS); + return (x + ((x+1) >> VSID_BITS)) & VSID_MODULUS; +#endif /* 1 */ +} + +/* This is only valid for addresses >= KERNELBASE */ +static inline unsigned long get_kernel_vsid(unsigned long ea) +{ + return vsid_scramble(ea >> SID_SHIFT); +} + +/* This is only valid for user addresses (which are below 2^41) */ +static inline unsigned long get_vsid(unsigned long context, unsigned long ea) +{ + return vsid_scramble((context << USER_ESID_BITS) + | (ea >> SID_SHIFT)); +} + +#define VSID_SCRAMBLE(pvsid) (((pvsid) * VSID_MULTIPLIER) % VSID_MODULUS) +#define KERNEL_VSID(ea) VSID_SCRAMBLE(GET_ESID(ea)) + +#endif /* __ASSEMBLY */ + +#endif /* CONFIG_PPC64 */ +#endif /* _ASM_POWERPC_MMU_H_ */ diff --git a/include/asm-powerpc/mmu_context.h b/include/asm-powerpc/mmu_context.h new file mode 100644 index 000000000000..ea6798c7d5fc --- /dev/null +++ b/include/asm-powerpc/mmu_context.h @@ -0,0 +1,89 @@ +#ifndef __ASM_POWERPC_MMU_CONTEXT_H +#define __ASM_POWERPC_MMU_CONTEXT_H + +#ifndef CONFIG_PPC64 +#include <asm-ppc/mmu_context.h> +#else + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <asm/mmu.h> +#include <asm/cputable.h> + +/* + * Copyright (C) 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Getting into a kernel thread, there is no valid user segment, mark + * paca->pgdir NULL so that SLB miss on user addresses will fault + */ +static inline void enter_lazy_tlb(struct mm_struct *mm, + struct task_struct *tsk) +{ +#ifdef CONFIG_PPC_64K_PAGES + get_paca()->pgdir = NULL; +#endif /* CONFIG_PPC_64K_PAGES */ +} + +#define NO_CONTEXT 0 +#define MAX_CONTEXT (0x100000-1) + +extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); +extern void destroy_context(struct mm_struct *mm); + +extern void switch_stab(struct task_struct *tsk, struct mm_struct *mm); +extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); + +/* + * switch_mm is the entry point called from the architecture independent + * code in kernel/sched.c + */ +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + if (!cpu_isset(smp_processor_id(), next->cpu_vm_mask)) + cpu_set(smp_processor_id(), next->cpu_vm_mask); + + /* No need to flush userspace segments if the mm doesnt change */ +#ifdef CONFIG_PPC_64K_PAGES + if (prev == next && get_paca()->pgdir == next->pgd) + return; +#else + if (prev == next) + return; +#endif /* CONFIG_PPC_64K_PAGES */ + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + asm volatile ("dssall"); +#endif /* CONFIG_ALTIVEC */ + + if (cpu_has_feature(CPU_FTR_SLB)) + switch_slb(tsk, next); + else + switch_stab(tsk, next); +} + +#define deactivate_mm(tsk,mm) do { } while (0) + +/* + * After we have set current->mm to a new value, this activates + * the context for the new mm so we see the new mappings. + */ +static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm(prev, next, current); + local_irq_restore(flags); +} + +#endif /* CONFIG_PPC64 */ +#endif /* __ASM_POWERPC_MMU_CONTEXT_H */ diff --git a/include/asm-powerpc/mmzone.h b/include/asm-powerpc/mmzone.h new file mode 100644 index 000000000000..54958d6cae04 --- /dev/null +++ b/include/asm-powerpc/mmzone.h @@ -0,0 +1,50 @@ +/* + * Written by Kanoj Sarcar (kanoj@sgi.com) Aug 99 + * + * PowerPC64 port: + * Copyright (C) 2002 Anton Blanchard, IBM Corp. + */ +#ifndef _ASM_MMZONE_H_ +#define _ASM_MMZONE_H_ + +#include <linux/config.h> + +/* + * generic non-linear memory support: + * + * 1) we will not split memory into more chunks than will fit into the + * flags field of the struct page + */ + +#ifdef CONFIG_NEED_MULTIPLE_NODES + +extern struct pglist_data *node_data[]; +/* + * Return a pointer to the node data for node n. + */ +#define NODE_DATA(nid) (node_data[nid]) + +/* + * Following are specific to this numa platform. + */ + +extern int numa_cpu_lookup_table[]; +extern cpumask_t numa_cpumask_lookup_table[]; +#ifdef CONFIG_MEMORY_HOTPLUG +extern unsigned long max_pfn; +#endif + +/* + * Following are macros that each numa implmentation must define. + */ + +#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) +#define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn) + +#endif /* CONFIG_NEED_MULTIPLE_NODES */ + +#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID +extern int __init early_pfn_to_nid(unsigned long pfn); +#endif + +#endif /* _ASM_MMZONE_H_ */ diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h index c16f106b5373..1e6e7846824f 100644 --- a/include/asm-powerpc/page_64.h +++ b/include/asm-powerpc/page_64.h @@ -86,7 +86,11 @@ static inline void copy_page(void *to, void *from) extern u64 ppc64_pft_size; /* Large pages size */ +#ifdef CONFIG_HUGETLB_PAGE extern unsigned int HPAGE_SHIFT; +#else +#define HPAGE_SHIFT PAGE_SHIFT +#endif #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) diff --git a/include/asm-powerpc/pci-bridge.h b/include/asm-powerpc/pci-bridge.h new file mode 100644 index 000000000000..223ec7bd81da --- /dev/null +++ b/include/asm-powerpc/pci-bridge.h @@ -0,0 +1,153 @@ +#ifndef _ASM_POWERPC_PCI_BRIDGE_H +#define _ASM_POWERPC_PCI_BRIDGE_H + +#ifndef CONFIG_PPC64 +#include <asm-ppc/pci-bridge.h> +#else + +#include <linux/pci.h> +#include <linux/list.h> + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Structure of a PCI controller (host bridge) + */ +struct pci_controller { + struct pci_bus *bus; + char is_dynamic; + void *arch_data; + struct list_head list_node; + + int first_busno; + int last_busno; + + void __iomem *io_base_virt; + unsigned long io_base_phys; + + /* Some machines have a non 1:1 mapping of + * the PCI memory space in the CPU bus space + */ + unsigned long pci_mem_offset; + unsigned long pci_io_size; + + struct pci_ops *ops; + volatile unsigned int __iomem *cfg_addr; + volatile void __iomem *cfg_data; + + /* Currently, we limit ourselves to 1 IO range and 3 mem + * ranges since the common pci_bus structure can't handle more + */ + struct resource io_resource; + struct resource mem_resources[3]; + int global_number; + int local_number; + unsigned long buid; + unsigned long dma_window_base_cur; + unsigned long dma_window_size; +}; + +/* + * PCI stuff, for nodes representing PCI devices, pointed to + * by device_node->data. + */ +struct pci_controller; +struct iommu_table; + +struct pci_dn { + int busno; /* for pci devices */ + int bussubno; /* for pci devices */ + int devfn; /* for pci devices */ + +#ifdef CONFIG_PPC_PSERIES + int eeh_mode; /* See eeh.h for possible EEH_MODEs */ + int eeh_config_addr; + int eeh_check_count; /* # times driver ignored error */ + int eeh_freeze_count; /* # times this device froze up. */ + int eeh_is_bridge; /* device is pci-to-pci bridge */ +#endif + int pci_ext_config_space; /* for pci devices */ + struct pci_controller *phb; /* for pci devices */ + struct iommu_table *iommu_table; /* for phb's or bridges */ + struct pci_dev *pcidev; /* back-pointer to the pci device */ + struct device_node *node; /* back-pointer to the device_node */ +#ifdef CONFIG_PPC_ISERIES + struct list_head Device_List; + int Irq; /* Assigned IRQ */ + int Flags; /* Possible flags(disable/bist)*/ + u8 LogicalSlot; /* Hv Slot Index for Tces */ +#endif + u32 config_space[16]; /* saved PCI config space */ +}; + +/* Get the pointer to a device_node's pci_dn */ +#define PCI_DN(dn) ((struct pci_dn *) (dn)->data) + +struct device_node *fetch_dev_dn(struct pci_dev *dev); + +/* Get a device_node from a pci_dev. This code must be fast except + * in the case where the sysdata is incorrect and needs to be fixed + * up (this will only happen once). + * In this case the sysdata will have been inherited from a PCI host + * bridge or a PCI-PCI bridge further up the tree, so it will point + * to a valid struct pci_dn, just not the one we want. + */ +static inline struct device_node *pci_device_to_OF_node(struct pci_dev *dev) +{ + struct device_node *dn = dev->sysdata; + struct pci_dn *pdn = dn->data; + + if (pdn && pdn->devfn == dev->devfn && pdn->busno == dev->bus->number) + return dn; /* fast path. sysdata is good */ + return fetch_dev_dn(dev); +} + +static inline int pci_device_from_OF_node(struct device_node *np, + u8 *bus, u8 *devfn) +{ + if (!PCI_DN(np)) + return -ENODEV; + *bus = PCI_DN(np)->busno; + *devfn = PCI_DN(np)->devfn; + return 0; +} + +static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus) +{ + if (bus->self) + return pci_device_to_OF_node(bus->self); + else + return bus->sysdata; /* Must be root bus (PHB) */ +} + +extern void pci_process_bridge_OF_ranges(struct pci_controller *hose, + struct device_node *dev, int primary); + +extern int pcibios_remove_root_bus(struct pci_controller *phb); + +extern void phbs_remap_io(void); + +static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus) +{ + struct device_node *busdn = bus->sysdata; + + BUG_ON(busdn == NULL); + return PCI_DN(busdn)->phb; +} + +extern struct pci_controller * +pcibios_alloc_controller(struct device_node *dev); +extern void pcibios_free_controller(struct pci_controller *phb); + +/* Return values for ppc_md.pci_probe_mode function */ +#define PCI_PROBE_NONE -1 /* Don't look at this bus at all */ +#define PCI_PROBE_NORMAL 0 /* Do normal PCI probing */ +#define PCI_PROBE_DEVTREE 1 /* Instantiate from device tree */ + +#endif /* CONFIG_PPC64 */ +#endif diff --git a/include/asm-powerpc/pci.h b/include/asm-powerpc/pci.h new file mode 100644 index 000000000000..d5934a076bd0 --- /dev/null +++ b/include/asm-powerpc/pci.h @@ -0,0 +1,247 @@ +#ifndef __ASM_POWERPC_PCI_H +#define __ASM_POWERPC_PCI_H +#ifdef __KERNEL__ + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/dma-mapping.h> + +#include <asm/machdep.h> +#include <asm/scatterlist.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> + +#include <asm-generic/pci-dma-compat.h> + +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM 0x10000000 + +struct pci_dev; + +/* Values for the `which' argument to sys_pciconfig_iobase syscall. */ +#define IOBASE_BRIDGE_NUMBER 0 +#define IOBASE_MEMORY 1 +#define IOBASE_IO 2 +#define IOBASE_ISA_IO 3 +#define IOBASE_ISA_MEM 4 + +/* + * Set this to 1 if you want the kernel to re-assign all PCI + * bus numbers + */ +extern int pci_assign_all_buses; +#define pcibios_assign_all_busses() (pci_assign_all_buses) + +#define pcibios_scan_all_fns(a, b) 0 + +static inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} + +static inline void pcibios_penalize_isa_irq(int irq, int active) +{ + /* We don't do dynamic PCI IRQ allocation */ +} + +#define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ +static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) +{ + if (ppc_md.pci_get_legacy_ide_irq) + return ppc_md.pci_get_legacy_ide_irq(dev, channel); + return channel ? 15 : 14; +} + +#ifdef CONFIG_PPC64 +#define HAVE_ARCH_PCI_MWI 1 +static inline int pcibios_prep_mwi(struct pci_dev *dev) +{ + /* + * We would like to avoid touching the cacheline size or MWI bit + * but we cant do that with the current pcibios_prep_mwi + * interface. pSeries firmware sets the cacheline size (which is not + * the cpu cacheline size in all cases) and hardware treats MWI + * the same as memory write. So we dont touch the cacheline size + * here and allow the generic code to set the MWI bit. + */ + return 0; +} + +extern struct dma_mapping_ops pci_dma_ops; + +/* For DAC DMA, we currently don't support it by default, but + * we let 64-bit platforms override this. + */ +static inline int pci_dac_dma_supported(struct pci_dev *hwdev,u64 mask) +{ + if (pci_dma_ops.dac_dma_supported) + return pci_dma_ops.dac_dma_supported(&hwdev->dev, mask); + return 0; +} + +#ifdef CONFIG_PCI +static inline void pci_dma_burst_advice(struct pci_dev *pdev, + enum pci_dma_burst_strategy *strat, + unsigned long *strategy_parameter) +{ + unsigned long cacheline_size; + u8 byte; + + pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte); + if (byte == 0) + cacheline_size = 1024; + else + cacheline_size = (int) byte * 4; + + *strat = PCI_DMA_BURST_MULTIPLE; + *strategy_parameter = cacheline_size; +} +#endif + +extern int pci_domain_nr(struct pci_bus *bus); + +/* Decide whether to display the domain number in /proc */ +extern int pci_proc_domain(struct pci_bus *bus); + +#else /* 32-bit */ + +#ifdef CONFIG_PCI +static inline void pci_dma_burst_advice(struct pci_dev *pdev, + enum pci_dma_burst_strategy *strat, + unsigned long *strategy_parameter) +{ + *strat = PCI_DMA_BURST_INFINITY; + *strategy_parameter = ~0UL; +} +#endif + +/* + * At present there are very few 32-bit PPC machines that can have + * memory above the 4GB point, and we don't support that. + */ +#define pci_dac_dma_supported(pci_dev, mask) (0) + +/* Return the index of the PCI controller for device PDEV. */ +#define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index + +/* Set the name of the bus as it appears in /proc/bus/pci */ +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return 0; +} + +#endif /* CONFIG_PPC64 */ + +struct vm_area_struct; +/* Map a range of PCI memory or I/O space for a device into user space */ +int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); + +/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */ +#define HAVE_PCI_MMAP 1 + +#ifdef CONFIG_PPC64 +/* pci_unmap_{single,page} is not a nop, thus... */ +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ + dma_addr_t ADDR_NAME; +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ + __u32 LEN_NAME; +#define pci_unmap_addr(PTR, ADDR_NAME) \ + ((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + (((PTR)->ADDR_NAME) = (VAL)) +#define pci_unmap_len(PTR, LEN_NAME) \ + ((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + (((PTR)->LEN_NAME) = (VAL)) + +/* The PCI address space does not equal the physical memory address + * space (we have an IOMMU). The IDE and SCSI device layers use + * this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS (0) + +#else /* 32-bit */ + +/* The PCI address space does equal the physical memory + * address space (no IOMMU). The IDE and SCSI device layers use + * this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS (1) + +/* pci_unmap_{page,single} is a nop so... */ +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) +#define pci_unmap_addr(PTR, ADDR_NAME) (0) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) +#define pci_unmap_len(PTR, LEN_NAME) (0) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) + +#endif /* CONFIG_PPC64 */ + +extern void pcibios_resource_to_bus(struct pci_dev *dev, + struct pci_bus_region *region, + struct resource *res); + +extern void pcibios_bus_to_resource(struct pci_dev *dev, + struct resource *res, + struct pci_bus_region *region); + +static inline struct resource *pcibios_select_root(struct pci_dev *pdev, + struct resource *res) +{ + struct resource *root = NULL; + + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; + + return root; +} + +extern int unmap_bus_range(struct pci_bus *bus); + +extern int remap_bus_range(struct pci_bus *bus); + +extern void pcibios_fixup_device_resources(struct pci_dev *dev, + struct pci_bus *bus); + +extern struct pci_controller *init_phb_dynamic(struct device_node *dn); + +extern struct pci_dev *of_create_pci_dev(struct device_node *node, + struct pci_bus *bus, int devfn); + +extern void of_scan_pci_bridge(struct device_node *node, + struct pci_dev *dev); + +extern void of_scan_bus(struct device_node *node, struct pci_bus *bus); + +extern int pci_read_irq_line(struct pci_dev *dev); + +extern void pcibios_add_platform_entries(struct pci_dev *dev); + +struct file; +extern pgprot_t pci_phys_mem_access_prot(struct file *file, + unsigned long pfn, + unsigned long size, + pgprot_t prot); + +#if defined(CONFIG_PPC_MULTIPLATFORM) || defined(CONFIG_PPC32) +#define HAVE_ARCH_PCI_RESOURCE_TO_USER +extern void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, + u64 *start, u64 *end); +#endif /* CONFIG_PPC_MULTIPLATFORM || CONFIG_PPC32 */ + +#endif /* __KERNEL__ */ +#endif /* __ASM_POWERPC_PCI_H */ diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h new file mode 100644 index 000000000000..bfc2113b3630 --- /dev/null +++ b/include/asm-powerpc/pgalloc.h @@ -0,0 +1,156 @@ +#ifndef _ASM_POWERPC_PGALLOC_H +#define _ASM_POWERPC_PGALLOC_H + +#ifndef CONFIG_PPC64 +#include <asm-ppc/pgalloc.h> +#else + +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/cpumask.h> +#include <linux/percpu.h> + +extern kmem_cache_t *pgtable_cache[]; + +#ifdef CONFIG_PPC_64K_PAGES +#define PTE_CACHE_NUM 0 +#define PMD_CACHE_NUM 1 +#define PGD_CACHE_NUM 2 +#else +#define PTE_CACHE_NUM 0 +#define PMD_CACHE_NUM 1 +#define PUD_CACHE_NUM 1 +#define PGD_CACHE_NUM 0 +#endif + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL); +} + +static inline void pgd_free(pgd_t *pgd) +{ + kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); +} + +#ifndef CONFIG_PPC_64K_PAGES + +#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(pud_t *pud) +{ + kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); +} + +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + pud_set(pud, (unsigned long)pmd); +} + +#define pmd_populate(mm, pmd, pte_page) \ + pmd_populate_kernel(mm, pmd, page_address(pte_page)) +#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte)) + + +#else /* CONFIG_PPC_64K_PAGES */ + +#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd) + +static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, + pte_t *pte) +{ + pmd_set(pmd, (unsigned long)pte); +} + +#define pmd_populate(mm, pmd, pte_page) \ + pmd_populate_kernel(mm, pmd, page_address(pte_page)) + +#endif /* CONFIG_PPC_64K_PAGES */ + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pmd_free(pmd_t *pmd) +{ + kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); +} + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) +{ + return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); +} + +static inline struct page *pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + return virt_to_page(pte_alloc_one_kernel(mm, address)); +} + +static inline void pte_free_kernel(pte_t *pte) +{ + kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte); +} + +static inline void pte_free(struct page *ptepage) +{ + pte_free_kernel(page_address(ptepage)); +} + +#define PGF_CACHENUM_MASK 0xf + +typedef struct pgtable_free { + unsigned long val; +} pgtable_free_t; + +static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum, + unsigned long mask) +{ + BUG_ON(cachenum > PGF_CACHENUM_MASK); + + return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum}; +} + +static inline void pgtable_free(pgtable_free_t pgf) +{ + void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK); + int cachenum = pgf.val & PGF_CACHENUM_MASK; + + kmem_cache_free(pgtable_cache[cachenum], p); +} + +extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); + +#define __pte_free_tlb(tlb, ptepage) \ + pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ + PTE_CACHE_NUM, PTE_TABLE_SIZE-1)) +#define __pmd_free_tlb(tlb, pmd) \ + pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ + PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) +#ifndef CONFIG_PPC_64K_PAGES +#define __pud_free_tlb(tlb, pmd) \ + pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ + PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) +#endif /* CONFIG_PPC_64K_PAGES */ + +#define check_pgt_cache() do { } while (0) + +#endif /* CONFIG_PPC64 */ +#endif /* _ASM_POWERPC_PGALLOC_H */ diff --git a/include/asm-powerpc/pgtable-4k.h b/include/asm-powerpc/pgtable-4k.h new file mode 100644 index 000000000000..e9590c06ad92 --- /dev/null +++ b/include/asm-powerpc/pgtable-4k.h @@ -0,0 +1,91 @@ +/* + * Entries per page directory level. The PTE level must use a 64b record + * for each page table entry. The PMD and PGD level use a 32b record for + * each entry by assuming that each entry is page aligned. + */ +#define PTE_INDEX_SIZE 9 +#define PMD_INDEX_SIZE 7 +#define PUD_INDEX_SIZE 7 +#define PGD_INDEX_SIZE 9 + +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + +/* PMD_SHIFT determines what a second-level page table entry can map */ +#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* With 4k base page size, hugepage PTEs go at the PMD level */ +#define MIN_HUGEPTE_SHIFT PMD_SHIFT + +/* PUD_SHIFT determines what a third-level page table entry can map */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* PTE bits */ +#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */ +#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */ +#define _PAGE_F_SECOND _PAGE_SECONDARY +#define _PAGE_F_GIX _PAGE_GROUP_IX + +/* PTE flags to conserve for HPTE identification */ +#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \ + _PAGE_SECONDARY | _PAGE_GROUP_IX) + +/* PAGE_MASK gives the right answer below, but only by accident */ +/* It should be preserving the high 48 bits and then specifically */ +/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */ +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \ + _PAGE_HPTEFLAGS) + +/* Bits to mask out from a PMD to get to the PTE page */ +#define PMD_MASKED_BITS 0 +/* Bits to mask out from a PUD to get to the PMD page */ +#define PUD_MASKED_BITS 0 +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0 + +/* shift to put page number into pte */ +#define PTE_RPN_SHIFT (17) + +#define __real_pte(e,p) ((real_pte_t)(e)) +#define __rpte_to_pte(r) (r) +#define __rpte_to_hidx(r,index) (pte_val((r)) >> 12) + +#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ + do { \ + index = 0; \ + shift = mmu_psize_defs[psize].shift; \ + +#define pte_iterate_hashed_end() } while(0) + +/* + * 4-level page tables related bits + */ + +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (pgd_val(pgd) == 0) +#define pgd_present(pgd) (pgd_val(pgd) != 0) +#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) +#define pgd_page(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) + +#define pud_offset(pgdp, addr) \ + (((pud_t *) pgd_page(*(pgdp))) + \ + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) + +#define pud_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) diff --git a/include/asm-powerpc/pgtable-64k.h b/include/asm-powerpc/pgtable-64k.h new file mode 100644 index 000000000000..154f1840ece4 --- /dev/null +++ b/include/asm-powerpc/pgtable-64k.h @@ -0,0 +1,90 @@ +#include <asm-generic/pgtable-nopud.h> + + +#define PTE_INDEX_SIZE 12 +#define PMD_INDEX_SIZE 12 +#define PUD_INDEX_SIZE 0 +#define PGD_INDEX_SIZE 4 + +#define PTE_TABLE_SIZE (sizeof(real_pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + +/* With 4k base page size, hugepage PTEs go at the PMD level */ +#define MIN_HUGEPTE_SHIFT PAGE_SHIFT + +/* PMD_SHIFT determines what a second-level page table entry can map */ +#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* PGDIR_SHIFT determines what a third-level page table entry can map */ +#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* Additional PTE bits (don't change without checking asm in hash_low.S) */ +#define _PAGE_HPTE_SUB 0x0ffff000 /* combo only: sub pages HPTE bits */ +#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */ +#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */ +#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */ +#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */ + +/* PTE flags to conserve for HPTE identification */ +#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_HPTE_SUB |\ + _PAGE_COMBO) + +/* Shift to put page number into pte. + * + * That gives us a max RPN of 32 bits, which means a max of 48 bits + * of addressable physical space. + * We could get 3 more bits here by setting PTE_RPN_SHIFT to 29 but + * 32 makes PTEs more readable for debugging for now :) + */ +#define PTE_RPN_SHIFT (32) +#define PTE_RPN_MAX (1UL << (64 - PTE_RPN_SHIFT)) +#define PTE_RPN_MASK (~((1UL<<PTE_RPN_SHIFT)-1)) + +/* _PAGE_CHG_MASK masks of bits that are to be preserved accross + * pgprot changes + */ +#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ + _PAGE_ACCESSED) + +/* Bits to mask out from a PMD to get to the PTE page */ +#define PMD_MASKED_BITS 0x1ff +/* Bits to mask out from a PGD/PUD to get to the PMD page */ +#define PUD_MASKED_BITS 0x1ff + +#ifndef __ASSEMBLY__ + +/* Manipulate "rpte" values */ +#define __real_pte(e,p) ((real_pte_t) { \ + (e), pte_val(*((p) + PTRS_PER_PTE)) }) +#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ + (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) +#define __rpte_to_pte(r) ((r).pte) +#define __rpte_sub_valid(rpte, index) \ + (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index))) + + +/* Trick: we set __end to va + 64k, which happens works for + * a 16M page as well as we want only one iteration + */ +#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ + do { \ + unsigned long __end = va + PAGE_SIZE; \ + unsigned __split = (psize == MMU_PAGE_4K || \ + psize == MMU_PAGE_64K_AP); \ + shift = mmu_psize_defs[psize].shift; \ + for (index = 0; va < __end; index++, va += (1 << shift)) { \ + if (!__split || __rpte_sub_valid(rpte, index)) do { \ + +#define pte_iterate_hashed_end() } while(0); } } while(0) + + +#endif /* __ASSEMBLY__ */ diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h new file mode 100644 index 000000000000..0303f57366c1 --- /dev/null +++ b/include/asm-powerpc/pgtable.h @@ -0,0 +1,524 @@ +#ifndef _ASM_POWERPC_PGTABLE_H +#define _ASM_POWERPC_PGTABLE_H + +#ifndef CONFIG_PPC64 +#include <asm-ppc/pgtable.h> +#else + +/* + * This file contains the functions and defines necessary to modify and use + * the ppc64 hashed page table. + */ + +#ifndef __ASSEMBLY__ +#include <linux/config.h> +#include <linux/stddef.h> +#include <asm/processor.h> /* For TASK_SIZE */ +#include <asm/mmu.h> +#include <asm/page.h> +#include <asm/tlbflush.h> +struct mm_struct; +#endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_PPC_64K_PAGES +#include <asm/pgtable-64k.h> +#else +#include <asm/pgtable-4k.h> +#endif + +#define FIRST_USER_ADDRESS 0 + +/* + * Size of EA range mapped by our pagetables. + */ +#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ + PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) +#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE) + +#if TASK_SIZE_USER64 > PGTABLE_RANGE +#error TASK_SIZE_USER64 exceeds pagetable range +#endif + +#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) +#error TASK_SIZE_USER64 exceeds user VSID range +#endif + +/* + * Define the address range of the vmalloc VM area. + */ +#define VMALLOC_START (0xD000000000000000ul) +#define VMALLOC_SIZE (0x80000000000UL) +#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) + +/* + * Define the address range of the imalloc VM area. + */ +#define PHBS_IO_BASE VMALLOC_END +#define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ +#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE) + +/* + * Common bits in a linux-style PTE. These match the bits in the + * (hardware-defined) PowerPC PTE as closely as possible. Additional + * bits may be defined in pgtable-*.h + */ +#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */ +#define _PAGE_USER 0x0002 /* matches one of the PP bits */ +#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */ +#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */ +#define _PAGE_GUARDED 0x0008 +#define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */ +#define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */ +#define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */ +#define _PAGE_DIRTY 0x0080 /* C: page changed */ +#define _PAGE_ACCESSED 0x0100 /* R: page referenced */ +#define _PAGE_RW 0x0200 /* software: user write access allowed */ +#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */ +#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */ + +#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT) + +#define _PAGE_WRENABLE (_PAGE_RW | _PAGE_DIRTY) + +/* __pgprot defined in asm-powerpc/page.h */ +#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED) + +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_USER) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_USER | _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_WRENABLE) +#define PAGE_KERNEL_CI __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \ + _PAGE_WRENABLE | _PAGE_NO_CACHE | _PAGE_GUARDED) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_EXEC) + +#define PAGE_AGP __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_NO_CACHE) +#define HAVE_PAGE_AGP + +/* PTEIDX nibble */ +#define _PTEIDX_SECONDARY 0x8 +#define _PTEIDX_GROUP_IX 0x7 + + +/* + * POWER4 and newer have per page execute protection, older chips can only + * do this on a segment (256MB) basis. + * + * Also, write permissions imply read permissions. + * This is the closest we can get.. + * + * Note due to the way vm flags are laid out, the bits are XWR + */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_X +#define __P101 PAGE_READONLY_X +#define __P110 PAGE_COPY_X +#define __P111 PAGE_COPY_X + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_X +#define __S101 PAGE_READONLY_X +#define __S110 PAGE_SHARED_X +#define __S111 PAGE_SHARED_X + +#ifndef __ASSEMBLY__ + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +#endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_HUGETLB_PAGE + +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + +#endif + +#ifndef __ASSEMBLY__ + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + * + * mk_pte takes a (struct page *) as input + */ +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) +{ + pte_t pte; + + + pte_val(pte) = (pfn << PTE_RPN_SHIFT) | pgprot_val(pgprot); + return pte; +} + +#define pte_modify(_pte, newprot) \ + (__pte((pte_val(_pte) & _PAGE_CHG_MASK) | pgprot_val(newprot))) + +#define pte_none(pte) ((pte_val(pte) & ~_PAGE_HPTEFLAGS) == 0) +#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT) + +/* pte_clear moved to later in this file */ + +#define pte_pfn(x) ((unsigned long)((pte_val(x)>>PTE_RPN_SHIFT))) +#define pte_page(x) pfn_to_page(pte_pfn(x)) + +#define pmd_set(pmdp, pmdval) (pmd_val(*(pmdp)) = (pmdval)) +#define pmd_none(pmd) (!pmd_val(pmd)) +#define pmd_bad(pmd) (pmd_val(pmd) == 0) +#define pmd_present(pmd) (pmd_val(pmd) != 0) +#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) +#define pmd_page_kernel(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) +#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) + +#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval)) +#define pud_none(pud) (!pud_val(pud)) +#define pud_bad(pud) ((pud_val(pud)) == 0) +#define pud_present(pud) (pud_val(pud) != 0) +#define pud_clear(pudp) (pud_val(*(pudp)) = 0) +#define pud_page(pud) (pud_val(pud) & ~PUD_MASKED_BITS) + +#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) + +/* + * Find an entry in a page-table-directory. We combine the address region + * (the high order N bits) and the pgd portion of the address. + */ +/* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */ +#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff) + +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) + +#define pmd_offset(pudp,addr) \ + (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + +#define pte_offset_kernel(dir,addr) \ + (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) + +#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_unmap(pte) do { } while(0) +#define pte_unmap_nested(pte) do { } while(0) + +/* to find an entry in a kernel page-table-directory */ +/* This now only contains the vmalloc pages */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_USER;} +static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW;} +static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC;} +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;} +static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;} +static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;} + +static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } +static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } + +static inline pte_t pte_rdprotect(pte_t pte) { + pte_val(pte) &= ~_PAGE_USER; return pte; } +static inline pte_t pte_exprotect(pte_t pte) { + pte_val(pte) &= ~_PAGE_EXEC; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) { + pte_val(pte) &= ~(_PAGE_RW); return pte; } +static inline pte_t pte_mkclean(pte_t pte) { + pte_val(pte) &= ~(_PAGE_DIRTY); return pte; } +static inline pte_t pte_mkold(pte_t pte) { + pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkread(pte_t pte) { + pte_val(pte) |= _PAGE_USER; return pte; } +static inline pte_t pte_mkexec(pte_t pte) { + pte_val(pte) |= _PAGE_USER | _PAGE_EXEC; return pte; } +static inline pte_t pte_mkwrite(pte_t pte) { + pte_val(pte) |= _PAGE_RW; return pte; } +static inline pte_t pte_mkdirty(pte_t pte) { + pte_val(pte) |= _PAGE_DIRTY; return pte; } +static inline pte_t pte_mkyoung(pte_t pte) { + pte_val(pte) |= _PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkhuge(pte_t pte) { + return pte; } + +/* Atomic PTE updates */ +static inline unsigned long pte_update(pte_t *p, unsigned long clr) +{ + unsigned long old, tmp; + + __asm__ __volatile__( + "1: ldarx %0,0,%3 # pte_update\n\ + andi. %1,%0,%6\n\ + bne- 1b \n\ + andc %1,%0,%4 \n\ + stdcx. %1,0,%3 \n\ + bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p) + : "r" (p), "r" (clr), "m" (*p), "i" (_PAGE_BUSY) + : "cc" ); + return old; +} + +/* PTE updating functions, this function puts the PTE in the + * batch, doesn't actually triggers the hash flush immediately, + * you need to call flush_tlb_pending() to do that. + * Pass -1 for "normal" size (4K or 64K) + */ +extern void hpte_update(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long pte, int huge); + +static inline int __ptep_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + unsigned long old; + + if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) + return 0; + old = pte_update(ptep, _PAGE_ACCESSED); + if (old & _PAGE_HASHPTE) { + hpte_update(mm, addr, ptep, old, 0); + flush_tlb_pending(); + } + return (old & _PAGE_ACCESSED) != 0; +} +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define ptep_test_and_clear_young(__vma, __addr, __ptep) \ +({ \ + int __r; \ + __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ + __r; \ +}) + +/* + * On RW/DIRTY bit transitions we can avoid flushing the hpte. For the + * moment we always flush but we need to fix hpte_update and test if the + * optimisation is worth it. + */ +static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + unsigned long old; + + if ((pte_val(*ptep) & _PAGE_DIRTY) == 0) + return 0; + old = pte_update(ptep, _PAGE_DIRTY); + if (old & _PAGE_HASHPTE) + hpte_update(mm, addr, ptep, old, 0); + return (old & _PAGE_DIRTY) != 0; +} +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY +#define ptep_test_and_clear_dirty(__vma, __addr, __ptep) \ +({ \ + int __r; \ + __r = __ptep_test_and_clear_dirty((__vma)->vm_mm, __addr, __ptep); \ + __r; \ +}) + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + unsigned long old; + + if ((pte_val(*ptep) & _PAGE_RW) == 0) + return; + old = pte_update(ptep, _PAGE_RW); + if (old & _PAGE_HASHPTE) + hpte_update(mm, addr, ptep, old, 0); +} + +/* + * We currently remove entries from the hashtable regardless of whether + * the entry was young or dirty. The generic routines only flush if the + * entry was young or dirty which is not good enough. + * + * We should be more intelligent about this but for the moment we override + * these functions and force a tlb flush unconditionally + */ +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define ptep_clear_flush_young(__vma, __address, __ptep) \ +({ \ + int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \ + __ptep); \ + __young; \ +}) + +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH +#define ptep_clear_flush_dirty(__vma, __address, __ptep) \ +({ \ + int __dirty = __ptep_test_and_clear_dirty((__vma)->vm_mm, __address, \ + __ptep); \ + flush_tlb_page(__vma, __address); \ + __dirty; \ +}) + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + unsigned long old = pte_update(ptep, ~0UL); + + if (old & _PAGE_HASHPTE) + hpte_update(mm, addr, ptep, old, 0); + return __pte(old); +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t * ptep) +{ + unsigned long old = pte_update(ptep, ~0UL); + + if (old & _PAGE_HASHPTE) + hpte_update(mm, addr, ptep, old, 0); +} + +/* + * set_pte stores a linux PTE into the linux page table. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + if (pte_present(*ptep)) { + pte_clear(mm, addr, ptep); + flush_tlb_pending(); + } + pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); + +#ifdef CONFIG_PPC_64K_PAGES + if (mmu_virtual_psize != MMU_PAGE_64K) + pte = __pte(pte_val(pte) | _PAGE_COMBO); +#endif /* CONFIG_PPC_64K_PAGES */ + + *ptep = pte; +} + +/* Set the dirty and/or accessed bits atomically in a linux PTE, this + * function doesn't need to flush the hash entry + */ +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty) +{ + unsigned long bits = pte_val(entry) & + (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); + unsigned long old, tmp; + + __asm__ __volatile__( + "1: ldarx %0,0,%4\n\ + andi. %1,%0,%6\n\ + bne- 1b \n\ + or %0,%3,%0\n\ + stdcx. %0,0,%4\n\ + bne- 1b" + :"=&r" (old), "=&r" (tmp), "=m" (*ptep) + :"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY) + :"cc"); +} +#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ + do { \ + __ptep_set_access_flags(__ptep, __entry, __dirty); \ + flush_tlb_page_nohash(__vma, __address); \ + } while(0) + +/* + * Macro to mark a page protection value as "uncacheable". + */ +#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED)) + +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +#define __HAVE_PHYS_MEM_ACCESS_PROT + +#define __HAVE_ARCH_PTE_SAME +#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +extern pgd_t swapper_pg_dir[]; + +extern void paging_init(void); + +#ifdef CONFIG_HUGETLB_PAGE +#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ + free_pgd_range(tlb, addr, end, floor, ceiling) +#endif + +/* + * This gets called at the end of handling a page fault, when + * the kernel has put a new PTE into the page table for the process. + * We use it to put a corresponding HPTE into the hash table + * ahead of time, instead of waiting for the inevitable extra + * hash-table miss exception. + */ +struct vm_area_struct; +extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); + +/* Encode and de-code a swap entry */ +#define __swp_type(entry) (((entry).val >> 1) & 0x3f) +#define __swp_offset(entry) ((entry).val >> 8) +#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)}) +#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT}) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT }) +#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_RPN_SHIFT) +#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE}) +#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT) + +/* + * kern_addr_valid is intended to indicate whether an address is a valid + * kernel address. Most 32-bit archs define it as always true (like this) + * but most 64-bit archs actually perform a test. What should we do here? + * The only use is in fs/ncpfs/dir.c + */ +#define kern_addr_valid(addr) (1) + +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + +void pgtable_cache_init(void); + +/* + * find_linux_pte returns the address of a linux pte for a given + * effective address and directory. If not found, it returns zero. + */static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea) +{ + pgd_t *pg; + pud_t *pu; + pmd_t *pm; + pte_t *pt = NULL; + + pg = pgdir + pgd_index(ea); + if (!pgd_none(*pg)) { + pu = pud_offset(pg, ea); + if (!pud_none(*pu)) { + pm = pmd_offset(pu, ea); + if (pmd_present(*pm)) + pt = pte_offset_kernel(pm, ea); + } + } + return pt; +} + +#include <asm-generic/pgtable.h> + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_PPC64 */ +#endif /* _ASM_POWERPC_PGTABLE_H */ diff --git a/include/asm-powerpc/ppc-pci.h b/include/asm-powerpc/ppc-pci.h index 2e36e5a7f4f3..36cdc869e580 100644 --- a/include/asm-powerpc/ppc-pci.h +++ b/include/asm-powerpc/ppc-pci.h @@ -48,8 +48,6 @@ extern void pSeries_final_fixup(void); extern void pSeries_irq_bus_setup(struct pci_bus *bus); extern unsigned long pci_probe_only; -extern unsigned long pci_assign_all_buses; -extern int pci_read_irq_line(struct pci_dev *pci_dev); /* ---- EEH internal-use-only related routines ---- */ #ifdef CONFIG_EEH diff --git a/include/asm-powerpc/spinlock.h b/include/asm-powerpc/spinlock.h new file mode 100644 index 000000000000..caa4b14e0e94 --- /dev/null +++ b/include/asm-powerpc/spinlock.h @@ -0,0 +1,269 @@ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +/* + * Simple spin lock operations. + * + * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM + * Rework to support virtual processors + * + * Type of int is used as a full 64b word is not necessary. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * (the type definitions are in asm/spinlock_types.h) + */ +#ifdef CONFIG_PPC64 +#include <asm/paca.h> +#include <asm/hvcall.h> +#include <asm/iseries/hv_call.h> +#endif +#include <asm/asm-compat.h> +#include <asm/synch.h> + +#define __raw_spin_is_locked(x) ((x)->slock != 0) + +#ifdef CONFIG_PPC64 +/* use 0x800000yy when locked, where yy == CPU number */ +#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) +#else +#define LOCK_TOKEN 1 +#endif + +/* + * This returns the old value in the lock, so we succeeded + * in getting the lock if the return value is 0. + */ +static __inline__ unsigned long __spin_trylock(raw_spinlock_t *lock) +{ + unsigned long tmp, token; + + token = LOCK_TOKEN; + __asm__ __volatile__( +"1: lwarx %0,0,%2 # __spin_trylock\n\ + cmpwi 0,%0,0\n\ + bne- 2f\n\ + stwcx. %1,0,%2\n\ + bne- 1b\n\ + isync\n\ +2:" : "=&r" (tmp) + : "r" (token), "r" (&lock->slock) + : "cr0", "memory"); + + return tmp; +} + +static int __inline__ __raw_spin_trylock(raw_spinlock_t *lock) +{ + return __spin_trylock(lock) == 0; +} + +/* + * On a system with shared processors (that is, where a physical + * processor is multiplexed between several virtual processors), + * there is no point spinning on a lock if the holder of the lock + * isn't currently scheduled on a physical processor. Instead + * we detect this situation and ask the hypervisor to give the + * rest of our timeslice to the lock holder. + * + * So that we can tell which virtual processor is holding a lock, + * we put 0x80000000 | smp_processor_id() in the lock when it is + * held. Conveniently, we have a word in the paca that holds this + * value. + */ + +#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES) +/* We only yield to the hypervisor if we are in shared processor mode */ +#define SHARED_PROCESSOR (get_paca()->lppaca.shared_proc) +extern void __spin_yield(raw_spinlock_t *lock); +extern void __rw_yield(raw_rwlock_t *lock); +#else /* SPLPAR || ISERIES */ +#define __spin_yield(x) barrier() +#define __rw_yield(x) barrier() +#define SHARED_PROCESSOR 0 +#endif + +static void __inline__ __raw_spin_lock(raw_spinlock_t *lock) +{ + while (1) { + if (likely(__spin_trylock(lock) == 0)) + break; + do { + HMT_low(); + if (SHARED_PROCESSOR) + __spin_yield(lock); + } while (unlikely(lock->slock != 0)); + HMT_medium(); + } +} + +static void __inline__ __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) +{ + unsigned long flags_dis; + + while (1) { + if (likely(__spin_trylock(lock) == 0)) + break; + local_save_flags(flags_dis); + local_irq_restore(flags); + do { + HMT_low(); + if (SHARED_PROCESSOR) + __spin_yield(lock); + } while (unlikely(lock->slock != 0)); + HMT_medium(); + local_irq_restore(flags_dis); + } +} + +static __inline__ void __raw_spin_unlock(raw_spinlock_t *lock) +{ + __asm__ __volatile__(SYNC_ON_SMP" # __raw_spin_unlock" + : : :"memory"); + lock->slock = 0; +} + +#ifdef CONFIG_PPC64 +extern void __raw_spin_unlock_wait(raw_spinlock_t *lock); +#else +#define __raw_spin_unlock_wait(lock) \ + do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) +#endif + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ + +#define __raw_read_can_lock(rw) ((rw)->lock >= 0) +#define __raw_write_can_lock(rw) (!(rw)->lock) + +#ifdef CONFIG_PPC64 +#define __DO_SIGN_EXTEND "extsw %0,%0\n" +#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */ +#else +#define __DO_SIGN_EXTEND +#define WRLOCK_TOKEN (-1) +#endif + +/* + * This returns the old value in the lock + 1, + * so we got a read lock if the return value is > 0. + */ +static long __inline__ __read_trylock(raw_rwlock_t *rw) +{ + long tmp; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # read_trylock\n" + __DO_SIGN_EXTEND +" addic. %0,%0,1\n\ + ble- 2f\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b\n\ + isync\n\ +2:" : "=&r" (tmp) + : "r" (&rw->lock) + : "cr0", "xer", "memory"); + + return tmp; +} + +/* + * This returns the old value in the lock, + * so we got the write lock if the return value is 0. + */ +static __inline__ long __write_trylock(raw_rwlock_t *rw) +{ + long tmp, token; + + token = WRLOCK_TOKEN; + __asm__ __volatile__( +"1: lwarx %0,0,%2 # write_trylock\n\ + cmpwi 0,%0,0\n\ + bne- 2f\n" + PPC405_ERR77(0,%1) +" stwcx. %1,0,%2\n\ + bne- 1b\n\ + isync\n\ +2:" : "=&r" (tmp) + : "r" (token), "r" (&rw->lock) + : "cr0", "memory"); + + return tmp; +} + +static void __inline__ __raw_read_lock(raw_rwlock_t *rw) +{ + while (1) { + if (likely(__read_trylock(rw) > 0)) + break; + do { + HMT_low(); + if (SHARED_PROCESSOR) + __rw_yield(rw); + } while (unlikely(rw->lock < 0)); + HMT_medium(); + } +} + +static void __inline__ __raw_write_lock(raw_rwlock_t *rw) +{ + while (1) { + if (likely(__write_trylock(rw) == 0)) + break; + do { + HMT_low(); + if (SHARED_PROCESSOR) + __rw_yield(rw); + } while (unlikely(rw->lock != 0)); + HMT_medium(); + } +} + +static int __inline__ __raw_read_trylock(raw_rwlock_t *rw) +{ + return __read_trylock(rw) > 0; +} + +static int __inline__ __raw_write_trylock(raw_rwlock_t *rw) +{ + return __write_trylock(rw) == 0; +} + +static void __inline__ __raw_read_unlock(raw_rwlock_t *rw) +{ + long tmp; + + __asm__ __volatile__( + "eieio # read_unlock\n\ +1: lwarx %0,0,%1\n\ + addic %0,%0,-1\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b" + : "=&r"(tmp) + : "r"(&rw->lock) + : "cr0", "memory"); +} + +static __inline__ void __raw_write_unlock(raw_rwlock_t *rw) +{ + __asm__ __volatile__(SYNC_ON_SMP" # write_unlock" + : : :"memory"); + rw->lock = 0; +} + +#endif /* __ASM_SPINLOCK_H */ diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h index 015d28746e1b..db8095cbe09b 100644 --- a/include/asm-powerpc/topology.h +++ b/include/asm-powerpc/topology.h @@ -41,6 +41,10 @@ static inline int node_to_first_cpu(int node) .cache_hot_time = (10*1000000), \ .cache_nice_tries = 1, \ .per_cpu_gain = 100, \ + .busy_idx = 3, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ + .wake_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_NEWIDLE \ |