diff options
Diffstat (limited to 'arch/x86/kvm/xen.c')
-rw-r--r-- | arch/x86/kvm/xen.c | 505 |
1 files changed, 358 insertions, 147 deletions
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index f3098c0e386a..d7af40240248 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -42,13 +42,12 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) int idx = srcu_read_lock(&kvm->srcu); if (gfn == GPA_INVALID) { - kvm_gpc_deactivate(kvm, gpc); + kvm_gpc_deactivate(gpc); goto out; } do { - ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa, - PAGE_SIZE); + ret = kvm_gpc_activate(gpc, gpa, PAGE_SIZE); if (ret) goto out; @@ -170,112 +169,45 @@ static void kvm_xen_init_timer(struct kvm_vcpu *vcpu) vcpu->arch.xen.timer.function = xen_timer_callback; } -static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) +static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) { struct kvm_vcpu_xen *vx = &v->arch.xen; - u64 now = get_kvmclock_ns(v->kvm); - u64 delta_ns = now - vx->runstate_entry_time; - u64 run_delay = current->sched_info.run_delay; - - if (unlikely(!vx->runstate_entry_time)) - vx->current_runstate = RUNSTATE_offline; - - /* - * Time waiting for the scheduler isn't "stolen" if the - * vCPU wasn't running anyway. - */ - if (vx->current_runstate == RUNSTATE_running) { - u64 steal_ns = run_delay - vx->last_steal; - - delta_ns -= steal_ns; - - vx->runstate_times[RUNSTATE_runnable] += steal_ns; - } - vx->last_steal = run_delay; - - vx->runstate_times[vx->current_runstate] += delta_ns; - vx->current_runstate = state; - vx->runstate_entry_time = now; -} - -void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) -{ - struct kvm_vcpu_xen *vx = &v->arch.xen; - struct gfn_to_pfn_cache *gpc = &vx->runstate_cache; - uint64_t *user_times; + struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache; + struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache; + size_t user_len, user_len1, user_len2; + struct vcpu_runstate_info rs; unsigned long flags; - size_t user_len; - int *user_state; - - kvm_xen_update_runstate(v, state); - - if (!vx->runstate_cache.active) - return; - - if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) - user_len = sizeof(struct vcpu_runstate_info); - else - user_len = sizeof(struct compat_vcpu_runstate_info); - - read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - user_len)) { - read_unlock_irqrestore(&gpc->lock, flags); - - /* When invoked from kvm_sched_out() we cannot sleep */ - if (state == RUNSTATE_runnable) - return; - - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, user_len)) - return; - - read_lock_irqsave(&gpc->lock, flags); - } + size_t times_ofs; + uint8_t *update_bit = NULL; + uint64_t entry_time; + uint64_t *rs_times; + int *rs_state; /* * The only difference between 32-bit and 64-bit versions of the - * runstate struct us the alignment of uint64_t in 32-bit, which + * runstate struct is the alignment of uint64_t in 32-bit, which * means that the 64-bit version has an additional 4 bytes of - * padding after the first field 'state'. - * - * So we use 'int __user *user_state' to point to the state field, - * and 'uint64_t __user *user_times' for runstate_entry_time. So - * the actual array of time[] in each state starts at user_times[1]. + * padding after the first field 'state'. Let's be really really + * paranoid about that, and matching it with our internal data + * structures that we memcpy into it... */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); #ifdef CONFIG_X86_64 + /* + * The 64-bit structure has 4 bytes of padding before 'state_entry_time' + * so each subsequent field is shifted by 4, and it's 4 bytes longer. + */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != offsetof(struct compat_vcpu_runstate_info, time) + 4); + BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4); #endif - - user_state = gpc->khva; - - if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) - user_times = gpc->khva + offsetof(struct vcpu_runstate_info, - state_entry_time); - else - user_times = gpc->khva + offsetof(struct compat_vcpu_runstate_info, - state_entry_time); - /* - * First write the updated state_entry_time at the appropriate - * location determined by 'offset'. - */ - BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != - sizeof(user_times[0])); - BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != - sizeof(user_times[0])); - - user_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE; - smp_wmb(); - - /* - * Next, write the new runstate. This is in the *same* place - * for 32-bit and 64-bit guests, asserted here for paranoia. + * The state field is in the same place at the start of both structs, + * and is the same size (int) as vx->current_runstate. */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != offsetof(struct compat_vcpu_runstate_info, state)); @@ -284,34 +216,238 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != sizeof(vx->current_runstate)); - *user_state = vx->current_runstate; + /* + * The state_entry_time field is 64 bits in both versions, and the + * XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86 + * is little-endian means that it's in the last *byte* of the word. + * That detail is important later. + */ + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != + sizeof(uint64_t)); + BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != + sizeof(uint64_t)); + BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80); /* - * Write the actual runstate times immediately after the - * runstate_entry_time. + * The time array is four 64-bit quantities in both versions, matching + * the vx->runstate_times and immediately following state_entry_time. */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != - offsetof(struct vcpu_runstate_info, time) - sizeof(u64)); + offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t)); BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != - offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64)); + offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t)); BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof_field(struct compat_vcpu_runstate_info, time)); BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof(vx->runstate_times)); - memcpy(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); - smp_wmb(); + if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { + user_len = sizeof(struct vcpu_runstate_info); + times_ofs = offsetof(struct vcpu_runstate_info, + state_entry_time); + } else { + user_len = sizeof(struct compat_vcpu_runstate_info); + times_ofs = offsetof(struct compat_vcpu_runstate_info, + state_entry_time); + } + + /* + * There are basically no alignment constraints. The guest can set it + * up so it crosses from one page to the next, and at arbitrary byte + * alignment (and the 32-bit ABI doesn't align the 64-bit integers + * anyway, even if the overall struct had been 64-bit aligned). + */ + if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) { + user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK); + user_len2 = user_len - user_len1; + } else { + user_len1 = user_len; + user_len2 = 0; + } + BUG_ON(user_len1 + user_len2 != user_len); + + retry: + /* + * Attempt to obtain the GPC lock on *both* (if there are two) + * gfn_to_pfn caches that cover the region. + */ + read_lock_irqsave(&gpc1->lock, flags); + while (!kvm_gpc_check(gpc1, user_len1)) { + read_unlock_irqrestore(&gpc1->lock, flags); + + /* When invoked from kvm_sched_out() we cannot sleep */ + if (atomic) + return; + + if (kvm_gpc_refresh(gpc1, user_len1)) + return; + + read_lock_irqsave(&gpc1->lock, flags); + } + + if (likely(!user_len2)) { + /* + * Set up three pointers directly to the runstate_info + * struct in the guest (via the GPC). + * + * • @rs_state → state field + * • @rs_times → state_entry_time field. + * • @update_bit → last byte of state_entry_time, which + * contains the XEN_RUNSTATE_UPDATE bit. + */ + rs_state = gpc1->khva; + rs_times = gpc1->khva + times_ofs; + if (v->kvm->arch.xen.runstate_update_flag) + update_bit = ((void *)(&rs_times[1])) - 1; + } else { + /* + * The guest's runstate_info is split across two pages and we + * need to hold and validate both GPCs simultaneously. We can + * declare a lock ordering GPC1 > GPC2 because nothing else + * takes them more than one at a time. + */ + read_lock(&gpc2->lock); + + if (!kvm_gpc_check(gpc2, user_len2)) { + read_unlock(&gpc2->lock); + read_unlock_irqrestore(&gpc1->lock, flags); + + /* When invoked from kvm_sched_out() we cannot sleep */ + if (atomic) + return; + + /* + * Use kvm_gpc_activate() here because if the runstate + * area was configured in 32-bit mode and only extends + * to the second page now because the guest changed to + * 64-bit mode, the second GPC won't have been set up. + */ + if (kvm_gpc_activate(gpc2, gpc1->gpa + user_len1, + user_len2)) + return; + + /* + * We dropped the lock on GPC1 so we have to go all the + * way back and revalidate that too. + */ + goto retry; + } + + /* + * In this case, the runstate_info struct will be assembled on + * the kernel stack (compat or not as appropriate) and will + * be copied to GPC1/GPC2 with a dual memcpy. Set up the three + * rs pointers accordingly. + */ + rs_times = &rs.state_entry_time; + + /* + * The rs_state pointer points to the start of what we'll + * copy to the guest, which in the case of a compat guest + * is the 32-bit field that the compiler thinks is padding. + */ + rs_state = ((void *)rs_times) - times_ofs; + + /* + * The update_bit is still directly in the guest memory, + * via one GPC or the other. + */ + if (v->kvm->arch.xen.runstate_update_flag) { + if (user_len1 >= times_ofs + sizeof(uint64_t)) + update_bit = gpc1->khva + times_ofs + + sizeof(uint64_t) - 1; + else + update_bit = gpc2->khva + times_ofs + + sizeof(uint64_t) - 1 - user_len1; + } + +#ifdef CONFIG_X86_64 + /* + * Don't leak kernel memory through the padding in the 64-bit + * version of the struct. + */ + memset(&rs, 0, offsetof(struct vcpu_runstate_info, state_entry_time)); +#endif + } + + /* + * First, set the XEN_RUNSTATE_UPDATE bit in the top bit of the + * state_entry_time field, directly in the guest. We need to set + * that (and write-barrier) before writing to the rest of the + * structure, and clear it last. Just as Xen does, we address the + * single *byte* in which it resides because it might be in a + * different cache line to the rest of the 64-bit word, due to + * the (lack of) alignment constraints. + */ + entry_time = vx->runstate_entry_time; + if (update_bit) { + entry_time |= XEN_RUNSTATE_UPDATE; + *update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56; + smp_wmb(); + } /* - * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's - * runstate_entry_time field. + * Now assemble the actual structure, either on our kernel stack + * or directly in the guest according to how the rs_state and + * rs_times pointers were set up above. */ - user_times[0] &= ~XEN_RUNSTATE_UPDATE; + *rs_state = vx->current_runstate; + rs_times[0] = entry_time; + memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); + + /* For the split case, we have to then copy it to the guest. */ + if (user_len2) { + memcpy(gpc1->khva, rs_state, user_len1); + memcpy(gpc2->khva, ((void *)rs_state) + user_len1, user_len2); + } smp_wmb(); - read_unlock_irqrestore(&gpc->lock, flags); + /* Finally, clear the XEN_RUNSTATE_UPDATE bit. */ + if (update_bit) { + entry_time &= ~XEN_RUNSTATE_UPDATE; + *update_bit = entry_time >> 56; + smp_wmb(); + } - mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); + if (user_len2) + read_unlock(&gpc2->lock); + + read_unlock_irqrestore(&gpc1->lock, flags); + + mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT); + if (user_len2) + mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT); +} + +void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) +{ + struct kvm_vcpu_xen *vx = &v->arch.xen; + u64 now = get_kvmclock_ns(v->kvm); + u64 delta_ns = now - vx->runstate_entry_time; + u64 run_delay = current->sched_info.run_delay; + + if (unlikely(!vx->runstate_entry_time)) + vx->current_runstate = RUNSTATE_offline; + + /* + * Time waiting for the scheduler isn't "stolen" if the + * vCPU wasn't running anyway. + */ + if (vx->current_runstate == RUNSTATE_running) { + u64 steal_ns = run_delay - vx->last_steal; + + delta_ns -= steal_ns; + + vx->runstate_times[RUNSTATE_runnable] += steal_ns; + } + vx->last_steal = run_delay; + + vx->runstate_times[vx->current_runstate] += delta_ns; + vx->current_runstate = state; + vx->runstate_entry_time = now; + + if (vx->runstate_cache.active) + kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable); } static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) @@ -352,12 +488,10 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) * little more honest about it. */ read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) + if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) return; read_lock_irqsave(&gpc->lock, flags); @@ -417,8 +551,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); /* @@ -432,8 +565,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) if (in_atomic() || !task_is_running(current)) return 1; - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) { /* * If this failed, userspace has screwed up the * vcpu_info mapping. No interrupts for you. @@ -493,6 +625,17 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) r = 0; break; + case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + mutex_lock(&kvm->lock); + kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag; + mutex_unlock(&kvm->lock); + r = 0; + break; + default: break; } @@ -530,6 +673,15 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) r = 0; break; + case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag; + r = 0; + break; + default: break; } @@ -554,15 +706,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) offsetof(struct compat_vcpu_info, time)); if (data->u.gpa == GPA_INVALID) { - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); r = 0; break; } - r = kvm_gpc_activate(vcpu->kvm, - &vcpu->arch.xen.vcpu_info_cache, NULL, - KVM_HOST_USES_PFN, data->u.gpa, - sizeof(struct vcpu_info)); + r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache, + data->u.gpa, sizeof(struct vcpu_info)); if (!r) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -570,37 +720,65 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: if (data->u.gpa == GPA_INVALID) { - kvm_gpc_deactivate(vcpu->kvm, - &vcpu->arch.xen.vcpu_time_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); r = 0; break; } - r = kvm_gpc_activate(vcpu->kvm, - &vcpu->arch.xen.vcpu_time_info_cache, - NULL, KVM_HOST_USES_PFN, data->u.gpa, + r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_time_info_cache, + data->u.gpa, sizeof(struct pvclock_vcpu_time_info)); if (!r) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); break; - case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: { + size_t sz, sz1, sz2; + if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.gpa == GPA_INVALID) { - kvm_gpc_deactivate(vcpu->kvm, - &vcpu->arch.xen.runstate_cache); r = 0; + deactivate_out: + kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); break; } - r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, - NULL, KVM_HOST_USES_PFN, data->u.gpa, - sizeof(struct vcpu_runstate_info)); - break; + /* + * If the guest switches to 64-bit mode after setting the runstate + * address, that's actually OK. kvm_xen_update_runstate_guest() + * will cope. + */ + if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode) + sz = sizeof(struct vcpu_runstate_info); + else + sz = sizeof(struct compat_vcpu_runstate_info); + + /* How much fits in the (first) page? */ + sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK); + r = kvm_gpc_activate(&vcpu->arch.xen.runstate_cache, + data->u.gpa, sz1); + if (r) + goto deactivate_out; + + /* Either map the second page, or deactivate the second GPC */ + if (sz1 >= sz) { + kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); + } else { + sz2 = sz - sz1; + BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK); + r = kvm_gpc_activate(&vcpu->arch.xen.runstate2_cache, + data->u.gpa + sz1, sz2); + if (r) + goto deactivate_out; + } + kvm_xen_update_runstate_guest(vcpu, false); + break; + } case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: if (!sched_info_on()) { r = -EOPNOTSUPP; @@ -693,6 +871,8 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) if (data->u.runstate.state <= RUNSTATE_offline) kvm_xen_update_runstate(vcpu, data->u.runstate.state); + else if (vcpu->arch.xen.runstate_cache.active) + kvm_xen_update_runstate_guest(vcpu, false); r = 0; break; @@ -972,9 +1152,9 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, bool ret = true; int idx, i; - read_lock_irqsave(&gpc->lock, flags); idx = srcu_read_lock(&kvm->srcu); - if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) + read_lock_irqsave(&gpc->lock, flags); + if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; ret = false; @@ -994,8 +1174,8 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, } out_rcu: - srcu_read_unlock(&kvm->srcu, idx); read_unlock_irqrestore(&gpc->lock, flags); + srcu_read_unlock(&kvm->srcu, idx); return ret; } @@ -1008,20 +1188,45 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, evtchn_port_t port, *ports; gpa_t gpa; - if (!longmode || !lapic_in_kernel(vcpu) || + if (!lapic_in_kernel(vcpu) || !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) return false; idx = srcu_read_lock(&vcpu->kvm->srcu); gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL); srcu_read_unlock(&vcpu->kvm->srcu, idx); - - if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &sched_poll, - sizeof(sched_poll))) { + if (!gpa) { *r = -EFAULT; return true; } + if (IS_ENABLED(CONFIG_64BIT) && !longmode) { + struct compat_sched_poll sp32; + + /* Sanity check that the compat struct definition is correct */ + BUILD_BUG_ON(sizeof(sp32) != 16); + + if (kvm_vcpu_read_guest(vcpu, gpa, &sp32, sizeof(sp32))) { + *r = -EFAULT; + return true; + } + + /* + * This is a 32-bit pointer to an array of evtchn_port_t which + * are uint32_t, so once it's converted no further compat + * handling is needed. + */ + sched_poll.ports = (void *)(unsigned long)(sp32.ports); + sched_poll.nr_ports = sp32.nr_ports; + sched_poll.timeout = sp32.timeout; + } else { + if (kvm_vcpu_read_guest(vcpu, gpa, &sched_poll, + sizeof(sched_poll))) { + *r = -EFAULT; + return true; + } + } + if (unlikely(sched_poll.nr_ports > 1)) { /* Xen (unofficially) limits number of pollers to 128 */ if (sched_poll.nr_ports > 128) { @@ -1256,7 +1461,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) } #endif cpl = static_call(kvm_x86_get_cpl)(vcpu); - trace_kvm_xen_hypercall(input, params[0], params[1], params[2], + trace_kvm_xen_hypercall(cpl, input, params[0], params[1], params[2], params[3], params[4], params[5]); /* @@ -1371,7 +1576,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) idx = srcu_read_lock(&kvm->srcu); read_lock_irqsave(&gpc->lock, flags); - if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) + if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { @@ -1405,7 +1610,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) gpc = &vcpu->arch.xen.vcpu_info_cache; read_lock_irqsave(&gpc->lock, flags); - if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) { + if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { /* * Could not access the vcpu_info. Set the bit in-kernel * and prod the vCPU to deliver it for itself. @@ -1503,7 +1708,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) break; idx = srcu_read_lock(&kvm->srcu); - rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE); + rc = kvm_gpc_refresh(gpc, PAGE_SIZE); srcu_read_unlock(&kvm->srcu, idx); } while(!rc); @@ -1833,9 +2038,14 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); - kvm_gpc_init(&vcpu->arch.xen.runstate_cache); - kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache); - kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache); + kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); + kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); + kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); + kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); } void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) @@ -1843,9 +2053,10 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) if (kvm_xen_timer_enabled(vcpu)) kvm_xen_stop_timer(vcpu); - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache); - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); del_timer_sync(&vcpu->arch.xen.poll_timer); } @@ -1853,7 +2064,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) void kvm_xen_init_vm(struct kvm *kvm) { idr_init(&kvm->arch.xen.evtchn_ports); - kvm_gpc_init(&kvm->arch.xen.shinfo_cache); + kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN); } void kvm_xen_destroy_vm(struct kvm *kvm) @@ -1861,7 +2072,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm) struct evtchnfd *evtchnfd; int i; - kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache); + kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { if (!evtchnfd->deliver.port.port) |