Commit e37a07e0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more KVM updates from Radim Krčmář:
 "Second batch of KVM updates for v4.13

  Common:
   - add uevents for VM creation/destruction
   - annotate and properly access RCU-protected objects

  s390:
   - rename IOCTL added in the first v4.13 merge

  x86:
   - emulate VMLOAD VMSAVE feature in SVM
   - support paravirtual asynchronous page fault while nested
   - add Hyper-V userspace interfaces for better migration
   - improve master clock corner cases
   - extend internal error reporting after EPT misconfig
   - correct single-stepping of emulated instructions in SVM
   - handle MCE during VM entry
   - fix nVMX VM entry checks and nVMX VMCS shadowing"

* tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
  kvm: x86: hyperv: make VP_INDEX managed by userspace
  KVM: async_pf: Let guest support delivery of async_pf from guest mode
  KVM: async_pf: Force a nested vmexit if the injected #PF is async_pf
  KVM: async_pf: Add L1 guest async_pf #PF vmexit handler
  KVM: x86: Simplify kvm_x86_ops->queue_exception parameter list
  kvm: x86: hyperv: add KVM_CAP_HYPERV_SYNIC2
  KVM: x86: make backwards_tsc_observed a per-VM variable
  KVM: trigger uevents when creating or destroying a VM
  KVM: SVM: Enable Virtual VMLOAD VMSAVE feature
  KVM: SVM: Add Virtual VMLOAD VMSAVE feature definition
  KVM: SVM: Rename lbr_ctl field in the vmcb control area
  KVM: SVM: Prepare for new bit definition in lbr_ctl
  KVM: SVM: handle singlestep exception when skipping emulated instructions
  KVM: x86: take slots_lock in kvm_free_pit
  KVM: s390: Fix KVM_S390_GET_CMMA_BITS ioctl definition
  kvm: vmx: Properly handle machine check during VM-entry
  KVM: x86: update master clock before computing kvmclock_offset
  kvm: nVMX: Shadow "high" parts of shadowed 64-bit VMCS fields
  kvm: nVMX: Fix nested_vmx_check_msr_bitmap_controls
  kvm: nVMX: Validate the I/O bitmaps on nested VM-entry
  ...
parents a80099a1 d3457c87
...@@ -4329,3 +4329,21 @@ Querying this capability returns a bitmap indicating the possible ...@@ -4329,3 +4329,21 @@ Querying this capability returns a bitmap indicating the possible
virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N
(counting from the right) is set, then a virtual SMT mode of 2^N is (counting from the right) is set, then a virtual SMT mode of 2^N is
available. available.
8.11 KVM_CAP_HYPERV_SYNIC2
Architectures: x86
This capability enables a newer version of Hyper-V Synthetic interrupt
controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
doesn't clear SynIC message and event flags pages when they are enabled by
writing to the respective MSRs.
8.12 KVM_CAP_HYPERV_VP_INDEX
Architectures: x86
This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr. Its
value is used to denote the target vcpu for a SynIC interrupt. For
compatibilty, KVM initializes this msr to KVM's internal vcpu index. When this
capability is absent, userspace can still query this msr's value.
...@@ -166,10 +166,11 @@ MSR_KVM_SYSTEM_TIME: 0x12 ...@@ -166,10 +166,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
MSR_KVM_ASYNC_PF_EN: 0x4b564d02 MSR_KVM_ASYNC_PF_EN: 0x4b564d02
data: Bits 63-6 hold 64-byte aligned physical address of a data: Bits 63-6 hold 64-byte aligned physical address of a
64 byte memory area which must be in guest RAM and must be 64 byte memory area which must be in guest RAM and must be
zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1 zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
when asynchronous page faults are enabled on the vcpu 0 when when asynchronous page faults are enabled on the vcpu 0 when
disabled. Bit 1 is 1 if asynchronous page faults can be injected disabled. Bit 1 is 1 if asynchronous page faults can be injected
when vcpu is in cpl == 0. when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
are delivered to L1 as #PF vmexits.
First 4 byte of 64 byte memory location will be written to by First 4 byte of 64 byte memory location will be written to by
the hypervisor at the time of asynchronous page fault (APF) the hypervisor at the time of asynchronous page fault (APF)
......
...@@ -286,6 +286,7 @@ ...@@ -286,6 +286,7 @@
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
......
...@@ -23,6 +23,7 @@ struct x86_exception { ...@@ -23,6 +23,7 @@ struct x86_exception {
u16 error_code; u16 error_code;
bool nested_page_fault; bool nested_page_fault;
u64 address; /* cr2 or nested page fault gpa */ u64 address; /* cr2 or nested page fault gpa */
u8 async_page_fault;
}; };
/* /*
......
...@@ -462,10 +462,12 @@ struct kvm_vcpu_hv_synic { ...@@ -462,10 +462,12 @@ struct kvm_vcpu_hv_synic {
DECLARE_BITMAP(auto_eoi_bitmap, 256); DECLARE_BITMAP(auto_eoi_bitmap, 256);
DECLARE_BITMAP(vec_bitmap, 256); DECLARE_BITMAP(vec_bitmap, 256);
bool active; bool active;
bool dont_zero_synic_pages;
}; };
/* Hyper-V per vcpu emulation context */ /* Hyper-V per vcpu emulation context */
struct kvm_vcpu_hv { struct kvm_vcpu_hv {
u32 vp_index;
u64 hv_vapic; u64 hv_vapic;
s64 runtime_offset; s64 runtime_offset;
struct kvm_vcpu_hv_synic synic; struct kvm_vcpu_hv_synic synic;
...@@ -549,6 +551,7 @@ struct kvm_vcpu_arch { ...@@ -549,6 +551,7 @@ struct kvm_vcpu_arch {
bool reinject; bool reinject;
u8 nr; u8 nr;
u32 error_code; u32 error_code;
u8 nested_apf;
} exception; } exception;
struct kvm_queued_interrupt { struct kvm_queued_interrupt {
...@@ -649,6 +652,9 @@ struct kvm_vcpu_arch { ...@@ -649,6 +652,9 @@ struct kvm_vcpu_arch {
u64 msr_val; u64 msr_val;
u32 id; u32 id;
bool send_user_only; bool send_user_only;
u32 host_apf_reason;
unsigned long nested_apf_token;
bool delivery_as_pf_vmexit;
} apf; } apf;
/* OSVW MSRs (AMD only) */ /* OSVW MSRs (AMD only) */
...@@ -803,6 +809,7 @@ struct kvm_arch { ...@@ -803,6 +809,7 @@ struct kvm_arch {
int audit_point; int audit_point;
#endif #endif
bool backwards_tsc_observed;
bool boot_vcpu_runs_old_kvmclock; bool boot_vcpu_runs_old_kvmclock;
u32 bsp_vcpu_id; u32 bsp_vcpu_id;
...@@ -952,9 +959,7 @@ struct kvm_x86_ops { ...@@ -952,9 +959,7 @@ struct kvm_x86_ops {
unsigned char *hypercall_addr); unsigned char *hypercall_addr);
void (*set_irq)(struct kvm_vcpu *vcpu); void (*set_irq)(struct kvm_vcpu *vcpu);
void (*set_nmi)(struct kvm_vcpu *vcpu); void (*set_nmi)(struct kvm_vcpu *vcpu);
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, void (*queue_exception)(struct kvm_vcpu *vcpu);
bool has_error_code, u32 error_code,
bool reinject);
void (*cancel_injection)(struct kvm_vcpu *vcpu); void (*cancel_injection)(struct kvm_vcpu *vcpu);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*nmi_allowed)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu);
......
...@@ -83,7 +83,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { ...@@ -83,7 +83,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 event_inj; u32 event_inj;
u32 event_inj_err; u32 event_inj_err;
u64 nested_cr3; u64 nested_cr3;
u64 lbr_ctl; u64 virt_ext;
u32 clean; u32 clean;
u32 reserved_5; u32 reserved_5;
u64 next_rip; u64 next_rip;
...@@ -119,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { ...@@ -119,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define AVIC_ENABLE_SHIFT 31 #define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
#define SVM_INTERRUPT_SHADOW_MASK 1 #define SVM_INTERRUPT_SHADOW_MASK 1
#define SVM_IOIO_STR_SHIFT 2 #define SVM_IOIO_STR_SHIFT 2
......
...@@ -67,6 +67,7 @@ struct kvm_clock_pairing { ...@@ -67,6 +67,7 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_ENABLED (1 << 0) #define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
/* Operations for KVM_HC_MMU_OP */ /* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1 #define KVM_MMU_OP_WRITE_PTE 1
......
...@@ -330,7 +330,12 @@ static void kvm_guest_cpu_init(void) ...@@ -330,7 +330,12 @@ static void kvm_guest_cpu_init(void)
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
pa |= KVM_ASYNC_PF_SEND_ALWAYS; pa |= KVM_ASYNC_PF_SEND_ALWAYS;
#endif #endif
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); pa |= KVM_ASYNC_PF_ENABLED;
/* Async page fault support for L1 hypervisor is optional */
if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
(pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1); __this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n", printk(KERN_INFO"KVM setup async PF for cpu %d\n",
smp_processor_id()); smp_processor_id());
......
...@@ -106,14 +106,27 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, ...@@ -106,14 +106,27 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
return 0; return 0;
} }
static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id) static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
{
struct kvm_vcpu *vcpu = NULL;
int i;
if (vpidx < KVM_MAX_VCPUS)
vcpu = kvm_get_vcpu(kvm, vpidx);
if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
return vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
return vcpu;
return NULL;
}
static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
{ {
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
struct kvm_vcpu_hv_synic *synic; struct kvm_vcpu_hv_synic *synic;
if (vcpu_id >= atomic_read(&kvm->online_vcpus)) vcpu = get_vcpu_by_vpidx(kvm, vpidx);
return NULL;
vcpu = kvm_get_vcpu(kvm, vcpu_id);
if (!vcpu) if (!vcpu)
return NULL; return NULL;
synic = vcpu_to_synic(vcpu); synic = vcpu_to_synic(vcpu);
...@@ -221,7 +234,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, ...@@ -221,7 +234,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
synic->version = data; synic->version = data;
break; break;
case HV_X64_MSR_SIEFP: case HV_X64_MSR_SIEFP:
if (data & HV_SYNIC_SIEFP_ENABLE) if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
!synic->dont_zero_synic_pages)
if (kvm_clear_guest(vcpu->kvm, if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) { data & PAGE_MASK, PAGE_SIZE)) {
ret = 1; ret = 1;
...@@ -232,7 +246,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, ...@@ -232,7 +246,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
synic_exit(synic, msr); synic_exit(synic, msr);
break; break;
case HV_X64_MSR_SIMP: case HV_X64_MSR_SIMP:
if (data & HV_SYNIC_SIMP_ENABLE) if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
!synic->dont_zero_synic_pages)
if (kvm_clear_guest(vcpu->kvm, if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) { data & PAGE_MASK, PAGE_SIZE)) {
ret = 1; ret = 1;
...@@ -318,11 +333,11 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) ...@@ -318,11 +333,11 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
return ret; return ret;
} }
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint) int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
{ {
struct kvm_vcpu_hv_synic *synic; struct kvm_vcpu_hv_synic *synic;
synic = synic_get(kvm, vcpu_id); synic = synic_get(kvm, vpidx);
if (!synic) if (!synic)
return -EINVAL; return -EINVAL;
...@@ -341,11 +356,11 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) ...@@ -341,11 +356,11 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
kvm_hv_notify_acked_sint(vcpu, i); kvm_hv_notify_acked_sint(vcpu, i);
} }
static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi) static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
{ {
struct kvm_vcpu_hv_synic *synic; struct kvm_vcpu_hv_synic *synic;
synic = synic_get(kvm, vcpu_id); synic = synic_get(kvm, vpidx);
if (!synic) if (!synic)
return -EINVAL; return -EINVAL;
...@@ -687,14 +702,24 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) ...@@ -687,14 +702,24 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
stimer_init(&hv_vcpu->stimer[i], i); stimer_init(&hv_vcpu->stimer[i], i);
} }
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu) void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
}
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
{ {
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
/* /*
* Hyper-V SynIC auto EOI SINT's are * Hyper-V SynIC auto EOI SINT's are
* not compatible with APICV, so deactivate APICV * not compatible with APICV, so deactivate APICV
*/ */
kvm_vcpu_deactivate_apicv(vcpu); kvm_vcpu_deactivate_apicv(vcpu);
vcpu_to_synic(vcpu)->active = true; synic->active = true;
synic->dont_zero_synic_pages = dont_zero_synic_pages;
return 0; return 0;
} }
...@@ -978,6 +1003,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) ...@@ -978,6 +1003,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
switch (msr) { switch (msr) {
case HV_X64_MSR_VP_INDEX:
if (!host)
return 1;
hv->vp_index = (u32)data;
break;
case HV_X64_MSR_APIC_ASSIST_PAGE: { case HV_X64_MSR_APIC_ASSIST_PAGE: {
u64 gfn; u64 gfn;
unsigned long addr; unsigned long addr;
...@@ -1089,18 +1119,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) ...@@ -1089,18 +1119,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
switch (msr) { switch (msr) {
case HV_X64_MSR_VP_INDEX: { case HV_X64_MSR_VP_INDEX:
int r; data = hv->vp_index;
struct kvm_vcpu *v;
kvm_for_each_vcpu(r, v, vcpu->kvm) {
if (v == vcpu) {
data = r;
break;
}
}
break; break;
}
case HV_X64_MSR_EOI: case HV_X64_MSR_EOI:
return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
case HV_X64_MSR_ICR: case HV_X64_MSR_ICR:
......
...@@ -56,9 +56,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu); ...@@ -56,9 +56,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
void kvm_hv_irq_routing_update(struct kvm *kvm); void kvm_hv_irq_routing_update(struct kvm *kvm);
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint); int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector); void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu); int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu); void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu, static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
......
...@@ -724,8 +724,10 @@ void kvm_free_pit(struct kvm *kvm) ...@@ -724,8 +724,10 @@ void kvm_free_pit(struct kvm *kvm)
struct kvm_pit *pit = kvm->arch.vpit; struct kvm_pit *pit = kvm->arch.vpit;
if (pit) { if (pit) {
mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev); kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
mutex_unlock(&kvm->slots_lock);
kvm_pit_set_reinject(pit, false); kvm_pit_set_reinject(pit, false);
hrtimer_cancel(&pit->pit_state.timer); hrtimer_cancel(&pit->pit_state.timer);
kthread_destroy_worker(pit->worker); kthread_destroy_worker(pit->worker);
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include <asm/io.h> #include <asm/io.h>
#include <asm/vmx.h> #include <asm/vmx.h>
#include <asm/kvm_page_track.h> #include <asm/kvm_page_track.h>
#include "trace.h"
/* /*
* When setting this variable to true it enables Two-Dimensional-Paging * When setting this variable to true it enables Two-Dimensional-Paging
...@@ -3748,7 +3749,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) ...@@ -3748,7 +3749,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
kvm_event_needs_reinjection(vcpu))) kvm_event_needs_reinjection(vcpu)))
return false; return false;
if (is_guest_mode(vcpu)) if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
return false; return false;
return kvm_x86_ops->interrupt_allowed(vcpu); return kvm_x86_ops->interrupt_allowed(vcpu);
...@@ -3780,6 +3781,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, ...@@ -3780,6 +3781,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
return false; return false;
} }
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len,
bool need_unprotect)
{
int r = 1;
switch (vcpu->arch.apf.host_apf_reason) {
default:
trace_kvm_page_fault(fault_address, error_code);
if (need_unprotect && kvm_event_needs_reinjection(vcpu))
kvm_mmu_unprotect_page_virt(vcpu, fault_address);
r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
insn_len);
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
vcpu->arch.apf.host_apf_reason = 0;
local_irq_disable();
kvm_async_pf_task_wait(fault_address);
local_irq_enable();
break;
case KVM_PV_REASON_PAGE_READY:
vcpu->arch.apf.host_apf_reason = 0;
local_irq_disable();
kvm_async_pf_task_wake(fault_address);
local_irq_enable();
break;
}
return r;
}
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
static bool static bool
check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
{ {
......
...@@ -77,6 +77,9 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); ...@@ -77,6 +77,9 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty); bool accessed_dirty);
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len,
bool need_unprotect);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{ {
......
This diff is collapsed.
...@@ -2422,28 +2422,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) ...@@ -2422,28 +2422,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
* KVM wants to inject page-faults which it got to the guest. This function * KVM wants to inject page-faults which it got to the guest. This function
* checks whether in a nested guest, we need to inject them to L1 or L2. * checks whether in a nested guest, we need to inject them to L1 or L2.
*/ */
static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
{ {
struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
unsigned int nr = vcpu->arch.exception.nr;
if (!(vmcs12->exception_bitmap & (1u << nr))) if (!((vmcs12->exception_bitmap & (1u << nr)) ||
(nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
return 0; return 0;
if (vcpu->arch.exception.nested_apf) {
vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code);
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
vcpu->arch.apf.nested_apf_token);
return 1;
}
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
vmcs_read32(VM_EXIT_INTR_INFO), vmcs_read32(VM_EXIT_INTR_INFO),
vmcs_readl(EXIT_QUALIFICATION)); vmcs_readl(EXIT_QUALIFICATION));
return 1; return 1;
} }
static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, static void vmx_queue_exception(struct kvm_vcpu *vcpu)
bool has_error_code, u32 error_code,
bool reinject)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned nr = vcpu->arch.exception.nr;
bool has_error_code = vcpu->arch.exception.has_error_code;
bool reinject = vcpu->arch.exception.reinject;
u32 error_code = vcpu->arch.exception.error_code;
u32 intr_info = nr | INTR_INFO_VALID_MASK; u32 intr_info = nr | INTR_INFO_VALID_MASK;
if (!reinject && is_guest_mode(vcpu) && if (!reinject && is_guest_mode(vcpu) &&
nested_vmx_check_exception(vcpu, nr)) nested_vmx_check_exception(vcpu))
return; return;
if (has_error_code) { if (has_error_code) {
...@@ -3764,6 +3777,25 @@ static void free_kvm_area(void) ...@@ -3764,6 +3777,25 @@ static void free_kvm_area(void)
} }
} }
enum vmcs_field_type {
VMCS_FIELD_TYPE_U16 = 0,
VMCS_FIELD_TYPE_U64 = 1,
VMCS_FIELD_TYPE_U32 = 2,
VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
};
static inline int vmcs_field_type(unsigned long field)
{
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
return VMCS_FIELD_TYPE_U32;
return (field >> 13) & 0x3 ;
}
static inline int vmcs_field_readonly(unsigned long field)
{
return (((field >> 10) & 0x3) == 1);
}
static void init_vmcs_shadow_fields(void) static void init_vmcs_shadow_fields(void)
{ {
int i, j; int i, j;
...@@ -3789,14 +3821,22 @@ static void init_vmcs_shadow_fields(void) ...@@ -3789,14 +3821,22 @@ static void init_vmcs_shadow_fields(void)
/* shadowed fields guest access without vmexit */ /* shadowed fields guest access without vmexit */
for (i = 0; i < max_shadow_read_write_fields; i++) { for (i = 0; i < max_shadow_read_write_fields; i++) {
clear_bit(shadow_read_write_fields[i], unsigned long field = shadow_read_write_fields[i];
vmx_vmwrite_bitmap);
clear_bit(shadow_read_write_fields[i], clear_bit(field, vmx_vmwrite_bitmap);
vmx_vmread_bitmap); clear_bit(field, vmx_vmread_bitmap);
if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
clear_bit(field + 1, vmx_vmwrite_bitmap);
clear_bit(field + 1, vmx_vmread_bitmap);
}
}
for (i = 0; i < max_shadow_read_only_fields; i++) {
unsigned long field = shadow_read_only_fields[i];
clear_bit(field, vmx_vmread_bitmap);
if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
clear_bit(field + 1, vmx_vmread_bitmap);
} }
for (i = 0; i < max_shadow_read_only_fields; i++)
clear_bit(shadow_read_only_fields[i],
vmx_vmread_bitmap);