diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 106988104bf44f3e1cb5c7e15e9ab40e8e8d7bc9..8ef42b4d7ef4b83fca9d1a039c106c31be8b68a4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1871,6 +1871,7 @@ extern u32 __read_mostly kvm_nr_uret_msrs; extern u64 __read_mostly host_efer; extern bool __read_mostly allow_smaller_maxphyaddr; extern bool __read_mostly enable_apicv; +extern bool __read_mostly enable_ipiv; extern struct kvm_x86_ops kvm_x86_ops; #define KVM_X86_OP(func) \ diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 63dea8ecd7efc3c90e497a990c41d110eb33bb01..4ae2340122bd45f4b1c1518442d87fa784357ba9 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -311,6 +311,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) new_entry = __sme_set((page_to_phys(svm->avic_backing_page) & AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | AVIC_PHYSICAL_ID_ENTRY_VALID_MASK); + svm->avic_physical_id_entry = new_entry; WRITE_ONCE(*entry, new_entry); svm->avic_physical_id_cache = entry; @@ -835,7 +836,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) * will update the pCPU info when the vCPU awkened and/or scheduled in. * See also avic_vcpu_load(). */ - entry = READ_ONCE(*(svm->avic_physical_id_cache)); + entry = svm->avic_physical_id_entry; if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK) amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK, true, pi->ir_data); @@ -1060,14 +1061,26 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) */ spin_lock_irqsave(&svm->ir_list_lock, flags); - entry = READ_ONCE(*(svm->avic_physical_id_cache)); + entry = svm->avic_physical_id_entry; WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK; entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK); entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; + svm->avic_physical_id_entry = entry; + + /* + * If IPI virtualization is disabled, clear IsRunning when updating the + * actual Physical ID table, so that the CPU never sees IsRunning=1. + * Keep the APIC ID up-to-date in the entry to minimize the chances of + * things going sideways if hardware peeks at the ID. + */ + if (!enable_ipiv) + entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; + WRITE_ONCE(*(svm->avic_physical_id_cache), entry); + avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); spin_unlock_irqrestore(&svm->ir_list_lock, flags); @@ -1088,7 +1101,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) * can't be scheduled out and thus avic_vcpu_{put,load}() can't run * recursively. */ - entry = READ_ONCE(*(svm->avic_physical_id_cache)); + entry = svm->avic_physical_id_entry; /* Nothing to do if IsRunning == '0' due to vCPU blocking. */ if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) @@ -1107,7 +1120,10 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) avic_update_iommu_vcpu_affinity(vcpu, -1, 0); entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; - WRITE_ONCE(*(svm->avic_physical_id_cache), entry); + svm->avic_physical_id_entry = entry; + + if (enable_ipiv) + WRITE_ONCE(*(svm->avic_physical_id_cache), entry); spin_unlock_irqrestore(&svm->ir_list_lock, flags); @@ -1225,6 +1241,16 @@ bool avic_hardware_setup(void) if (x2avic_enabled) pr_info("x2AVIC enabled\n"); + /* + * Disable IPI virtualization for AMD Family 17h (Zen1 and Zen2) and + * Hygon Family 18h (derived from AMD Zen1) CPUs due to erratum 1235, + * which results in missed VM-Exits on the sender and thus missed wake + * events for blocking vCPUs due to the CPU failing to see a software + * update to clear IsRunning. + */ + if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) + enable_ipiv = false; + amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); return true; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8cd8771cdae1bf19fa62c453e20843084a850659..0d5a94570dc9793e15c5757a707e2ebbd2d53a5e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -232,6 +232,7 @@ module_param(tsc_scaling, int, 0444); */ static bool avic; module_param(avic, bool, 0444); +module_param(enable_ipiv, bool, 0444); bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); @@ -5525,6 +5526,7 @@ static __init int svm_hardware_setup(void) enable_apicv = avic = avic && avic_hardware_setup(); if (!enable_apicv) { + enable_ipiv = false; svm_x86_ops.vcpu_blocking = NULL; svm_x86_ops.vcpu_unblocking = NULL; svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 091a6056dfca09fa86b7de5c0b9e14c01de61de4..a18d403459e0a3eaea09b6064ac756bc58eb9965 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -297,6 +297,14 @@ struct vcpu_svm { struct page *avic_backing_page; u64 *avic_physical_id_cache; + /* This is essentially a shadow of the vCPU's actual entry in the + * Physical ID table that is programmed into the VMCB, i.e. that is + * seen by the CPU. If IPI virtualization is disabled, IsRunning is + * only ever set in the shadow, i.e. is never propagated to the "real" + * table, so that hardware never sees IsRunning=1. + */ + u64 avic_physical_id_entry; + /* * Per-vcpu list of struct amd_svm_iommu_ir: * This is used mainly to store interrupt remapping information used diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index f287396720a92a437bdde7bb1841817daa5d6f3c..a0943ad5b5ee3992dff50519d71bb3446ebda434 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -15,7 +15,6 @@ extern bool __read_mostly enable_ept; extern bool __read_mostly enable_unrestricted_guest; extern bool __read_mostly enable_ept_ad_bits; extern bool __read_mostly enable_pml; -extern bool __read_mostly enable_ipiv; extern int __read_mostly pt_mode; #define PT_MODE_SYSTEM 0 diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5280e3dd4f51f6102888631f7f7829431a1cfbba..192cc7d18b63210b4ebcacaca36ed4be9cedebf4 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -109,7 +109,6 @@ module_param(fasteoi, bool, S_IRUGO); module_param(enable_apicv, bool, S_IRUGO); -bool __read_mostly enable_ipiv = true; module_param(enable_ipiv, bool, 0444); /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 24f754c7e8f25a647fe214724b8e0d61593db233..3d1bb14420c1e90a8474dc790e4a0fb9e9a87c37 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -235,6 +235,9 @@ EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); bool __read_mostly enable_apicv = true; EXPORT_SYMBOL_GPL(enable_apicv); +bool __read_mostly enable_ipiv = true; +EXPORT_SYMBOL_GPL(enable_ipiv); + u64 __read_mostly host_xss; EXPORT_SYMBOL_GPL(host_xss);