/* * Timer device implementation for SGI SN platforms. * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. * * Copyright (c) 2001-2006 Silicon Graphics, Inc. All rights reserved. * * This driver exports an API that should be supportable by any HPET or IA-PC * multimedia timer. The code below is currently specific to the SGI Altix * SHub RTC, however. * * 11/01/01 - jbarnes - initial revision * 9/10/04 - Christoph Lameter - remove interrupt support for kernel inclusion * 10/1/04 - Christoph Lameter - provide posix clock CLOCK_SGI_CYCLE * 10/13/04 - Christoph Lameter, Dimitri Sivanich - provide timer interrupt * support via the posix timer interface */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_AUTHOR("Jesse Barnes "); MODULE_DESCRIPTION("SGI Altix RTC Timer"); MODULE_LICENSE("GPL"); /* name of the device, usually in /dev */ #define MMTIMER_NAME "mmtimer" #define MMTIMER_DESC "SGI Altix RTC Timer" #define MMTIMER_VERSION "2.1" #define RTC_BITS 55 /* 55 bits for this implementation */ extern unsigned long sn_rtc_cycles_per_second; #define RTC_COUNTER_ADDR ((long *)LOCAL_MMR_ADDR(SH_RTC)) #define rtc_time() (*RTC_COUNTER_ADDR) static int mmtimer_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); static int mmtimer_mmap(struct file *file, struct vm_area_struct *vma); /* * Period in femtoseconds (10^-15 s) */ static unsigned long mmtimer_femtoperiod = 0; static struct file_operations mmtimer_fops = { .owner = THIS_MODULE, .mmap = mmtimer_mmap, .ioctl = mmtimer_ioctl, }; /* * We only have comparison registers RTC1-4 currently available per * node. RTC0 is used by SAL. */ #define NUM_COMPARATORS 3 /* Check for an RTC interrupt pending */ static int inline mmtimer_int_pending(int comparator) { if (HUB_L((unsigned long *)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)) & SH_EVENT_OCCURRED_RTC1_INT_MASK << comparator) return 1; else return 0; } /* Clear the RTC interrupt pending bit */ static void inline mmtimer_clr_int_pending(int comparator) { HUB_S((u64 *)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), SH_EVENT_OCCURRED_RTC1_INT_MASK << comparator); } /* Setup timer on comparator RTC1 */ static void inline mmtimer_setup_int_0(u64 expires) { u64 val; /* Disable interrupt */ HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC1_INT_ENABLE), 0UL); /* Initialize comparator value */ HUB_S((u64 *)LOCAL_MMR_ADDR(SH_INT_CMPB), -1L); /* Clear pending bit */ mmtimer_clr_int_pending(0); val = ((u64)SGI_MMTIMER_VECTOR << SH_RTC1_INT_CONFIG_IDX_SHFT) | ((u64)cpu_physical_id(smp_processor_id()) << SH_RTC1_INT_CONFIG_PID_SHFT); /* Set configuration */ HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC1_INT_CONFIG), val); /* Enable RTC interrupts */ HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC1_INT_ENABLE), 1UL); /* Initialize comparator value */ HUB_S((u64 *)LOCAL_MMR_ADDR(SH_INT_CMPB), expires); } /* Setup timer on comparator RTC2 */ static void inline mmtimer_setup_int_1(u64 expires) { u64 val; HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC2_INT_ENABLE), 0UL); HUB_S((u64 *)LOCAL_MMR_ADDR(SH_INT_CMPC), -1L); mmtimer_clr_int_pending(1); val = ((u64)SGI_MMTIMER_VECTOR << SH_RTC2_INT_CONFIG_IDX_SHFT) | ((u64)cpu_physical_id(smp_processor_id()) << SH_RTC2_INT_CONFIG_PID_SHFT); HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC2_INT_CONFIG), val); HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC2_INT_ENABLE), 1UL); HUB_S((u64 *)LOCAL_MMR_ADDR(SH_INT_CMPC), expires); } /* Setup timer on comparator RTC3 */ static void inline mmtimer_setup_int_2(u64 expires) { u64 val; HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC3_INT_ENABLE), 0UL); HUB_S((u64 *)LOCAL_MMR_ADDR(SH_INT_CMPD), -1L); mmtimer_clr_int_pending(2); val = ((u64)SGI_MMTIMER_VECTOR << SH_RTC3_INT_CONFIG_IDX_SHFT) | ((u64)cpu_physical_id(smp_processor_id()) << SH_RTC3_INT_CONFIG_PID_SHFT); HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC3_INT_CONFIG), val); HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC3_INT_ENABLE), 1UL); HUB_S((u64 *)LOCAL_MMR_ADDR(SH_INT_CMPD), expires); } /* * This function must be called with interrupts disabled and preemption off * in order to insure that the setup succeeds in a deterministic time frame. * It will check if the interrupt setup succeeded. */ static int inline mmtimer_setup(int comparator, unsigned long expires) { switch (comparator) { case 0: mmtimer_setup_int_0(expires); break; case 1: mmtimer_setup_int_1(expires); break; case 2: mmtimer_setup_int_2(expires); break; } /* We might've missed our expiration time */ if (rtc_time() < expires) return 1; /* * If an interrupt is already pending then its okay * if not then we failed */ return mmtimer_int_pending(comparator); } static int inline mmtimer_disable_int(long nasid, int comparator) { switch (comparator) { case 0: nasid == -1 ? HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC1_INT_ENABLE), 0UL) : REMOTE_HUB_S(nasid, SH_RTC1_INT_ENABLE, 0UL); break; case 1: nasid == -1 ? HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC2_INT_ENABLE), 0UL) : REMOTE_HUB_S(nasid, SH_RTC2_INT_ENABLE, 0UL); break; case 2: nasid == -1 ? HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC3_INT_ENABLE), 0UL) : REMOTE_HUB_S(nasid, SH_RTC3_INT_ENABLE, 0UL); break; default: return -EFAULT; } return 0; } #define TIMER_OFF 0xbadcabLL /* There is one of these for each comparator */ typedef struct mmtimer { spinlock_t lock ____cacheline_aligned; struct k_itimer *timer; int i; int cpu; struct tasklet_struct tasklet; } mmtimer_t; static mmtimer_t ** timers; /** * mmtimer_ioctl - ioctl interface for /dev/mmtimer * @inode: inode of the device * @file: file structure for the device * @cmd: command to execute * @arg: optional argument to command * * Executes the command specified by @cmd. Returns 0 for success, < 0 for * failure. * * Valid commands: * * %MMTIMER_GETOFFSET - Should return the offset (relative to the start * of the page where the registers are mapped) for the counter in question. * * %MMTIMER_GETRES - Returns the resolution of the clock in femto (10^-15) * seconds * * %MMTIMER_GETFREQ - Copies the frequency of the clock in Hz to the address * specified by @arg * * %MMTIMER_GETBITS - Returns the number of bits in the clock's counter * * %MMTIMER_MMAPAVAIL - Returns 1 if the registers can be mmap'd into userspace * * %MMTIMER_GETCOUNTER - Gets the current value in the counter and places it * in the address specified by @arg. */ static int mmtimer_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { int ret = 0; switch (cmd) { case MMTIMER_GETOFFSET: /* offset of the counter */ /* * SN RTC registers are on their own 64k page */ if(PAGE_SIZE <= (1 << 16)) ret = (((long)RTC_COUNTER_ADDR) & (PAGE_SIZE-1)) / 8; else ret = -ENOSYS; break; case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */ if(copy_to_user((unsigned long __user *)arg, &mmtimer_femtoperiod, sizeof(unsigned long))) return -EFAULT; break; case MMTIMER_GETFREQ: /* frequency in Hz */ if(copy_to_user((unsigned long __user *)arg, &sn_rtc_cycles_per_second, sizeof(unsigned long))) return -EFAULT; ret = 0; break; case MMTIMER_GETBITS: /* number of bits in the clock */ ret = RTC_BITS; break; case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */ ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0; break; case MMTIMER_GETCOUNTER: if(copy_to_user((unsigned long __user *)arg, RTC_COUNTER_ADDR, sizeof(unsigned long))) return -EFAULT; break; default: ret = -ENOSYS; break; } return ret; } /** * mmtimer_mmap - maps the clock's registers into userspace * @file: file structure for the device * @vma: VMA to map the registers into * * Calls remap_pfn_range() to map the clock's registers into * the calling process' address space. */ static int mmtimer_mmap(struct file *file, struct vm_area_struct *vma) { unsigned long mmtimer_addr; if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; if (vma->vm_flags & VM_WRITE) return -EPERM; if (PAGE_SIZE > (1 << 16)) return -ENOSYS; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); mmtimer_addr = __pa(RTC_COUNTER_ADDR); mmtimer_addr &= ~(PAGE_SIZE - 1); mmtimer_addr &= 0xfffffffffffffffUL; if (remap_pfn_range(vma, vma->vm_start, mmtimer_addr >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) { printk(KERN_ERR "remap_pfn_range failed in mmtimer.c\n"); return -EAGAIN; } return 0; } static struct miscdevice mmtimer_miscdev = { SGI_MMTIMER, MMTIMER_NAME, &mmtimer_fops }; static struct timespec sgi_clock_offset; static int sgi_clock_period; /* * Posix Timer Interface */ static struct timespec sgi_clock_offset; static int sgi_clock_period; static int sgi_clock_get(clockid_t clockid, struct timespec *tp) { u64 nsec; nsec = rtc_time() * sgi_clock_period + sgi_clock_offset.tv_nsec; tp->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &tp->tv_nsec) + sgi_clock_offset.tv_sec; return 0; }; static int sgi_clock_set(clockid_t clockid, struct timespec *tp) { u64 nsec; u64 rem; nsec = rtc_time() * sgi_clock_period; sgi_clock_offset.tv_sec = tp->tv_sec - div_long_long_rem(nsec, NSEC_PER_SEC, &rem); if (rem <= tp->tv_nsec) sgi_clock_offset.tv_nsec = tp->tv_sec - rem; else { sgi_clock_offset.tv_nsec = tp->tv_sec + NSEC_PER_SEC - rem; sgi_clock_offset.tv_sec--; } return 0; } /* * Schedule the next periodic interrupt. This function will attempt * to schedule a periodic interrupt later if necessary. If the scheduling * of an interrupt fails then the time to skip is lengthened * exponentially in order to ensure that the next interrupt * can be properly scheduled.. */ static int inline reschedule_periodic_timer(mmtimer_t *x) { int n; struct k_itimer *t = x->timer; t->it.mmtimer.clock = x->i; t->it_overrun--; n = 0; do { t->it.mmtimer.expires += t->it.mmtimer.incr << n; t->it_overrun += 1 << n; n++; if (n > 20) return 1; } while (!mmtimer_setup(x->i, t->it.mmtimer.expires)); return 0; } /** * mmtimer_interrupt - timer interrupt handler * @irq: irq received * @dev_id: device the irq came from * @regs: register state upon receipt of the interrupt * * Called when one of the comarators matches the counter, This * routine will send signals to processes that have requested * them. * * This interrupt is run in an interrupt context * by the SHUB. It is therefore safe to locally access SHub * registers. */ static irqreturn_t mmtimer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { int i; unsigned long expires = 0; int result = IRQ_NONE; unsigned indx = cpu_to_node(smp_processor_id()); /* * Do this once for each comparison register */ for (i = 0; i < NUM_COMPARATORS; i++) { mmtimer_t *base = timers[indx] + i; /* Make sure this doesn't get reused before tasklet_sched */ spin_lock(&base->lock); if (base->cpu == smp_processor_id()) { if (base->timer) expires = base->timer->it.mmtimer.expires; /* expires test won't work with shared irqs */ if ((mmtimer_int_pending(i) > 0) || (expires && (expires < rtc_time()))) { mmtimer_clr_int_pending(i); tasklet_schedule(&base->tasklet); result = IRQ_HANDLED; } } spin_unlock(&base->lock); expires = 0; } return result; } void mmtimer_tasklet(unsigned long data) { mmtimer_t *x = (mmtimer_t *)data; struct k_itimer *t = x->timer; unsigned long flags; if (t == NULL) return; /* Send signal and deal with periodic signals */ spin_lock_irqsave(&t->it_lock, flags); spin_lock(&x->lock); /* If timer was deleted between interrupt and here, leave */ if (t != x->timer) goto out; t->it_overrun = 0; if (posix_timer_event(t, 0) != 0) { // printk(KERN_WARNING "mmtimer: cannot deliver signal.\n"); t->it_overrun++; } if(t->it.mmtimer.incr) { /* Periodic timer */ if (reschedule_periodic_timer(x)) { printk(KERN_WARNING "mmtimer: unable to reschedule\n"); x->timer = NULL; } } else { /* Ensure we don't false trigger in mmtimer_interrupt */ t->it.mmtimer.expires = 0; } t->it_overrun_last = t->it_overrun; out: spin_unlock(&x->lock); spin_unlock_irqrestore(&t->it_lock, flags); } static int sgi_timer_create(struct k_itimer *timer) { /* Insure that a newly created timer is off */ timer->it.mmtimer.clock = TIMER_OFF; return 0; } /* This does not really delete a timer. It just insures * that the timer is not active * * Assumption: it_lock is already held with irq's disabled */ static int sgi_timer_del(struct k_itimer *timr) { int i = timr->it.mmtimer.clock; cnodeid_t nodeid = timr->it.mmtimer.node; mmtimer_t *t = timers[nodeid] + i; unsigned long irqflags; if (i != TIMER_OFF) { spin_lock_irqsave(&t->lock, irqflags); mmtimer_disable_int(cnodeid_to_nasid(nodeid),i); t->timer = NULL; timr->it.mmtimer.clock = TIMER_OFF; timr->it.mmtimer.expires = 0; spin_unlock_irqrestore(&t->lock, irqflags); } return 0; } #define timespec_to_ns(x) ((x).tv_nsec + (x).tv_sec * NSEC_PER_SEC) #define ns_to_timespec(ts, nsec) (ts).tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &(ts).tv_nsec) /* Assumption: it_lock is already held with irq's disabled */ static void sgi_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) { if (timr->it.mmtimer.clock == TIMER_OFF) { cur_setting->it_interval.tv_nsec = 0; cur_setting->it_interval.tv_sec = 0; cur_setting->it_value.tv_nsec = 0; cur_setting->it_value.tv_sec =0; return; } ns_to_timespec(cur_setting->it_interval, timr->it.mmtimer.incr * sgi_clock_period); ns_to_timespec(cur_setting->it_value, (timr->it.mmtimer.expires - rtc_time())* sgi_clock_period); return; } static int sgi_timer_set(struct k_itimer *timr, int flags, struct itimerspec * new_setting, struct itimerspec * old_setting) { int i; unsigned long when, period, irqflags; int err = 0; cnodeid_t nodeid; mmtimer_t *base; if (old_setting) sgi_timer_get(timr, old_setting); sgi_timer_del(timr); when = timespec_to_ns(new_setting->it_value); period = timespec_to_ns(new_setting->it_interval); if (when == 0) /* Clear timer */ return 0; if (flags & TIMER_ABSTIME) { struct timespec n; unsigned long now; getnstimeofday(&n); now = timespec_to_ns(n); if (when > now) when -= now; else /* Fire the timer immediately */ when = 0; } /* * Convert to sgi clock period. Need to keep rtc_time() as near as possible * to getnstimeofday() in order to be as faithful as possible to the time * specified. */ when = (when + sgi_clock_period - 1) / sgi_clock_period + rtc_time(); period = (period + sgi_clock_period - 1) / sgi_clock_period; /* * We are allocating a local SHub comparator. If we would be moved to another * cpu then another SHub may be local to us. Prohibit that by switching off * preemption. */ preempt_disable(); nodeid = cpu_to_node(smp_processor_id()); retry: /* Don't use an allocated timer, or a deleted one that's pending */ for(i = 0; i< NUM_COMPARATORS; i++) { base = timers[nodeid] + i; if (!base->timer && !base->tasklet.state) { break; } } if (i == NUM_COMPARATORS) { preempt_enable(); return -EBUSY; } spin_lock_irqsave(&base->lock, irqflags); if (base->timer || base->tasklet.state != 0) { spin_unlock_irqrestore(&base->lock, irqflags); goto retry; } base->timer = timr; base->cpu = smp_processor_id(); timr->it.mmtimer.clock = i; timr->it.mmtimer.node = nodeid; timr->it.mmtimer.incr = period; timr->it.mmtimer.expires = when; if (period == 0) { if (!mmtimer_setup(i, when)) { mmtimer_disable_int(-1, i); posix_timer_event(timr, 0); timr->it.mmtimer.expires = 0; } } else { timr->it.mmtimer.expires -= period; if (reschedule_periodic_timer(base)) err = -EINVAL; } spin_unlock_irqrestore(&base->lock, irqflags); preempt_enable(); return err; } static struct k_clock sgi_clock = { .res = 0, .clock_set = sgi_clock_set, .clock_get = sgi_clock_get, .timer_create = sgi_timer_create, .nsleep = do_posix_clock_nonanosleep, .timer_set = sgi_timer_set, .timer_del = sgi_timer_del, .timer_get = sgi_timer_get }; /** * mmtimer_init - device initialization routine * * Does initial setup for the mmtimer device. */ static int __init mmtimer_init(void) { unsigned i; cnodeid_t node, maxn = -1; if (!ia64_platform_is("sn2")) return 0; /* * Sanity check the cycles/sec variable */ if (sn_rtc_cycles_per_second < 100000) { printk(KERN_ERR "%s: unable to determine clock frequency\n", MMTIMER_NAME); return -1; } mmtimer_femtoperiod = ((unsigned long)1E15 + sn_rtc_cycles_per_second / 2) / sn_rtc_cycles_per_second; if (request_irq(SGI_MMTIMER_VECTOR, mmtimer_interrupt, SA_PERCPU_IRQ, MMTIMER_NAME, NULL)) { printk(KERN_WARNING "%s: unable to allocate interrupt.", MMTIMER_NAME); return -1; } strcpy(mmtimer_miscdev.devfs_name, MMTIMER_NAME); if (misc_register(&mmtimer_miscdev)) { printk(KERN_ERR "%s: failed to register device\n", MMTIMER_NAME); return -1; } /* Get max numbered node, calculate slots needed */ for_each_online_node(node) { maxn = node; } maxn++; /* Allocate list of node ptrs to mmtimer_t's */ timers = kmalloc(sizeof(mmtimer_t *)*maxn, GFP_KERNEL); if (timers == NULL) { printk(KERN_ERR "%s: failed to allocate memory for device\n", MMTIMER_NAME); return -1; } /* Allocate mmtimer_t's for each online node */ for_each_online_node(node) { timers[node] = kmalloc_node(sizeof(mmtimer_t)*NUM_COMPARATORS, GFP_KERNEL, node); if (timers[node] == NULL) { printk(KERN_ERR "%s: failed to allocate memory for device\n", MMTIMER_NAME); return -1; } for (i=0; i< NUM_COMPARATORS; i++) { mmtimer_t * base = timers[node] + i; spin_lock_init(&base->lock); base->timer = NULL; base->cpu = 0; base->i = i; tasklet_init(&base->tasklet, mmtimer_tasklet, (unsigned long) (base)); } } sgi_clock_period = sgi_clock.res = NSEC_PER_SEC / sn_rtc_cycles_per_second; register_posix_clock(CLOCK_SGI_CYCLE, &sgi_clock); printk(KERN_INFO "%s: v%s, %ld MHz\n", MMTIMER_DESC, MMTIMER_VERSION, sn_rtc_cycles_per_second/(unsigned long)1E6); return 0; } module_init(mmtimer_init);