From e898e5c408dc6070aa815a605f7d1aa7d0b93b95 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:19 -0700 Subject: [PATCH 1/4] KVM: VMX: Move enable_ipiv knob to common x86 commit bafddc70001d1834b2f2e490e108bbb8812b4bed upstream. Move enable_ipiv to common x86 so that it can be reused by SVM to control IPI virtualization when AVIC is enabled. SVM doesn't actually provide a way to truly disable IPI virtualization, but KVM can get close enough by skipping the necessary table programming. Hygon-SIG: upstream commit bafddc70001d ("KVM: VMX: Move enable_ipiv knob to common x86") Link: https://lore.kernel.org/r/20250611224604.313496-18-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Tina Zhang --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/capabilities.h | 1 - arch/x86/kvm/vmx/vmx.c | 1 - arch/x86/kvm/x86.c | 3 +++ 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 106988104bf4..8ef42b4d7ef4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1871,6 +1871,7 @@ extern u32 __read_mostly kvm_nr_uret_msrs; extern u64 __read_mostly host_efer; extern bool __read_mostly allow_smaller_maxphyaddr; extern bool __read_mostly enable_apicv; +extern bool __read_mostly enable_ipiv; extern struct kvm_x86_ops kvm_x86_ops; #define KVM_X86_OP(func) \ diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index f287396720a9..a0943ad5b5ee 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -15,7 +15,6 @@ extern bool __read_mostly enable_ept; extern bool __read_mostly enable_unrestricted_guest; extern bool __read_mostly enable_ept_ad_bits; extern bool __read_mostly enable_pml; -extern bool __read_mostly enable_ipiv; extern int __read_mostly pt_mode; #define PT_MODE_SYSTEM 0 diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5280e3dd4f51..192cc7d18b63 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -109,7 +109,6 @@ module_param(fasteoi, bool, S_IRUGO); module_param(enable_apicv, bool, S_IRUGO); -bool __read_mostly enable_ipiv = true; module_param(enable_ipiv, bool, 0444); /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 24f754c7e8f2..3d1bb14420c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -235,6 +235,9 @@ EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); bool __read_mostly enable_apicv = true; EXPORT_SYMBOL_GPL(enable_apicv); +bool __read_mostly enable_ipiv = true; +EXPORT_SYMBOL_GPL(enable_ipiv); + u64 __read_mostly host_xss; EXPORT_SYMBOL_GPL(host_xss); -- Gitee From 346257d6bafb7f3e4b642e434623efb5bd3d73ef Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Wed, 11 Jun 2025 15:45:20 -0700 Subject: [PATCH 2/4] KVM: SVM: Add enable_ipiv param, never set IsRunning if disabled commit d921665e01ba86212bdace238bdff123bceffd46 upstream. Let userspace "disable" IPI virtualization for AVIC via the enable_ipiv module param, by never setting IsRunning. SVM doesn't provide a way to disable IPI virtualization in hardware, but by ensuring CPUs never see IsRunning=1, every IPI in the guest (except for self-IPIs) will generate a VM-Exit. To avoid setting the real IsRunning bit, while still allowing KVM to use each vCPU's entry to update GA log entries, simply maintain a shadow of the entry, without propagating IsRunning updates to the real table when IPI virtualization is disabled. Providing a way to effectively disable IPI virtualization will allow KVM to safely enable AVIC on hardware that is susceptible to erratum #1235, which causes hardware to sometimes fail to detect that the IsRunning bit has been cleared by software. Note, the table _must_ be fully populated, as broadcast IPIs skip invalid entries, i.e. won't generate VM-Exit if every entry is invalid, and so simply pointing the VMCB at a common dummy table won't work. Alternatively, KVM could allocate a shadow of the entire table, but that'd be a waste of 4KiB since the per-vCPU entry doesn't actually consume an additional 8 bytes of memory (vCPU structures are large enough that they are backed by order-N pages). Hygon-SIG: upstream commit d921665e01ba ("KVM: SVM: Add enable_ipiv param, never set IsRunning if disabled") Signed-off-by: Maxim Levitsky [sean: keep "entry" variables, reuse enable_ipiv, split from erratum] Link: https://lore.kernel.org/r/20250611224604.313496-19-seanjc@google.com Signed-off-by: Sean Christopherson [tina: adapt physical ID table accesses to use avic_physical_id_cache] Signed-off-by: Tina Zhang --- arch/x86/kvm/svm/avic.c | 24 ++++++++++++++++++++---- arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/svm/svm.h | 8 ++++++++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 63dea8ecd7ef..0cdea3f2e7b9 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -311,6 +311,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) new_entry = __sme_set((page_to_phys(svm->avic_backing_page) & AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | AVIC_PHYSICAL_ID_ENTRY_VALID_MASK); + svm->avic_physical_id_entry = new_entry; WRITE_ONCE(*entry, new_entry); svm->avic_physical_id_cache = entry; @@ -835,7 +836,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) * will update the pCPU info when the vCPU awkened and/or scheduled in. * See also avic_vcpu_load(). */ - entry = READ_ONCE(*(svm->avic_physical_id_cache)); + entry = svm->avic_physical_id_entry; if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK) amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK, true, pi->ir_data); @@ -1060,14 +1061,26 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) */ spin_lock_irqsave(&svm->ir_list_lock, flags); - entry = READ_ONCE(*(svm->avic_physical_id_cache)); + entry = svm->avic_physical_id_entry; WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK; entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK); entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; + svm->avic_physical_id_entry = entry; + + /* + * If IPI virtualization is disabled, clear IsRunning when updating the + * actual Physical ID table, so that the CPU never sees IsRunning=1. + * Keep the APIC ID up-to-date in the entry to minimize the chances of + * things going sideways if hardware peeks at the ID. + */ + if (!enable_ipiv) + entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; + WRITE_ONCE(*(svm->avic_physical_id_cache), entry); + avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); spin_unlock_irqrestore(&svm->ir_list_lock, flags); @@ -1088,7 +1101,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) * can't be scheduled out and thus avic_vcpu_{put,load}() can't run * recursively. */ - entry = READ_ONCE(*(svm->avic_physical_id_cache)); + entry = svm->avic_physical_id_entry; /* Nothing to do if IsRunning == '0' due to vCPU blocking. */ if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) @@ -1107,7 +1120,10 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) avic_update_iommu_vcpu_affinity(vcpu, -1, 0); entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; - WRITE_ONCE(*(svm->avic_physical_id_cache), entry); + svm->avic_physical_id_entry = entry; + + if (enable_ipiv) + WRITE_ONCE(*(svm->avic_physical_id_cache), entry); spin_unlock_irqrestore(&svm->ir_list_lock, flags); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8cd8771cdae1..0d5a94570dc9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -232,6 +232,7 @@ module_param(tsc_scaling, int, 0444); */ static bool avic; module_param(avic, bool, 0444); +module_param(enable_ipiv, bool, 0444); bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); @@ -5525,6 +5526,7 @@ static __init int svm_hardware_setup(void) enable_apicv = avic = avic && avic_hardware_setup(); if (!enable_apicv) { + enable_ipiv = false; svm_x86_ops.vcpu_blocking = NULL; svm_x86_ops.vcpu_unblocking = NULL; svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 091a6056dfca..a18d403459e0 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -297,6 +297,14 @@ struct vcpu_svm { struct page *avic_backing_page; u64 *avic_physical_id_cache; + /* This is essentially a shadow of the vCPU's actual entry in the + * Physical ID table that is programmed into the VMCB, i.e. that is + * seen by the CPU. If IPI virtualization is disabled, IsRunning is + * only ever set in the shadow, i.e. is never propagated to the "real" + * table, so that hardware never sees IsRunning=1. + */ + u64 avic_physical_id_entry; + /* * Per-vcpu list of struct amd_svm_iommu_ir: * This is used mainly to store interrupt remapping information used -- Gitee From c86794d2da504c9b46970289cba2ae3824fcd90b Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Wed, 11 Jun 2025 15:45:21 -0700 Subject: [PATCH 3/4] KVM: SVM: Disable (x2)AVIC IPI virtualization if CPU has erratum #1235 commit 8de4a1c8164e5b2e40d1df764840a31de983f40b upstream. Disable IPI virtualization on AMD Family 17h CPUs (Zen2 and Zen1), as hardware doesn't reliably detect changes to the 'IsRunning' bit during ICR write emulation, and might fail to VM-Exit on the sending vCPU, if IsRunning was recently cleared. The absence of the VM-Exit leads to KVM not waking (or triggering nested VM-Exit of) the target vCPU(s) of the IPI, which can lead to hung vCPUs, unbounded delays in L2 execution, etc. To workaround the erratum, simply disable IPI virtualization, which prevents KVM from setting IsRunning and thus eliminates the race where hardware sees a stale IsRunning=1. As a result, all ICR writes (except when "Self" shorthand is used) will VM-Exit and therefore be correctly emulated by KVM. Disabling IPI virtualization does carry a performance penalty, but benchmarkng shows that enabling AVIC without IPI virtualization is still much better than not using AVIC at all, because AVIC still accelerates posted interrupts and the receiving end of the IPIs. Note, when virtualizing Self-IPIs, the CPU skips reading the physical ID table and updates the vIRR directly (because the vCPU is by definition actively running), i.e. Self-IPI isn't susceptible to the erratum *and* is still accelerated by hardware. Hygon-SIG: upstream commit 8de4a1c8164e ("KVM: SVM: Disable (x2)AVIC IPI virtualization if CPU has erratum #1235") Signed-off-by: Maxim Levitsky [sean: rebase, massage changelog, disallow user override] Acked-by: Naveen N Rao (AMD) Link: https://lore.kernel.org/r/20250611224604.313496-20-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Tina Zhang --- arch/x86/kvm/svm/avic.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 0cdea3f2e7b9..e01ef8f13045 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -1241,6 +1241,14 @@ bool avic_hardware_setup(void) if (x2avic_enabled) pr_info("x2AVIC enabled\n"); + /* + * Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2) + * due to erratum 1235, which results in missed VM-Exits on the sender + * and thus missed wake events for blocking vCPUs due to the CPU + * failing to see a software update to clear IsRunning. + */ + enable_ipiv = enable_ipiv && boot_cpu_data.x86 != 0x17; + amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); return true; -- Gitee From 3331adee897bf31455de76f41e207f94328f3f15 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 22 May 2026 12:00:14 +0800 Subject: [PATCH 4/4] KVM: SVM: Disable AVIC IPI virtualization on Hygon Family 18h (erratum #1235) commit 9a12fa5213cfc391e0eed63902d3be98f0913765 upstream. Hygon Family 18h CPUs are derived from AMD Family 17h (Zen1) silicon and share the same erratum #1235: hardware may read a stale IsRunning=1 bit during ICR write emulation and silently fail to generate an AVIC_IPI_FAILURE_TARGET_NOT_RUNNING VM-Exit on the sending vCPU. The absence of the VM-Exit causes KVM to miss the required wakeup of blocking target vCPUs, leading to hung vCPUs and unbounded delays in guest execution. Extend the existing AMD Family 17h erratum #1235 workaround to also cover Hygon Family 18h. With IPI virtualization disabled, KVM never sets IsRunning=1 in the Physical ID table, so every non-self IPI generates a VM-Exit and is correctly emulated. Hygon-SIG: upstream commit 9a12fa5213cf ("KVM: SVM: Disable AVIC IPI virtualization on Hygon Family 18h (erratum #1235)") Fixes: 8de4a1c8164e ("KVM: SVM: Disable (x2)AVIC IPI virtualization if CPU has erratum #1235") Cc: Signed-off-by: Tina Zhang Message-ID: <20260522040014.3380201-1-zhang_wei@open-hieco.net> Signed-off-by: Tina Zhang Tested-by: Yongwei Xu --- arch/x86/kvm/svm/avic.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index e01ef8f13045..4ae2340122bd 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -1242,12 +1242,14 @@ bool avic_hardware_setup(void) pr_info("x2AVIC enabled\n"); /* - * Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2) - * due to erratum 1235, which results in missed VM-Exits on the sender - * and thus missed wake events for blocking vCPUs due to the CPU - * failing to see a software update to clear IsRunning. + * Disable IPI virtualization for AMD Family 17h (Zen1 and Zen2) and + * Hygon Family 18h (derived from AMD Zen1) CPUs due to erratum 1235, + * which results in missed VM-Exits on the sender and thus missed wake + * events for blocking vCPUs due to the CPU failing to see a software + * update to clear IsRunning. */ - enable_ipiv = enable_ipiv && boot_cpu_data.x86 != 0x17; + if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) + enable_ipiv = false; amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); -- Gitee