From 41456b77faad77bddcddd430c54452fe1fee5132 Mon Sep 17 00:00:00 2001 From: LeoLiu-oc Date: Fri, 6 Mar 2026 17:42:10 +0800 Subject: [PATCH 1/3] Revert the HOST and KVM ZXPAUSE patches zhaoxin inclusion category: feature ------------------- The original zxpause instruction has been updated with the official name pauseopt. Moreover, the original patches have been optimized and updated. Therefore, the original patches need to be withdrawn so that they can be replaced with the new ones later. Signed-off-by: LeoLiu-oc --- arch/x86/include/asm/cpufeature.h | 7 +- arch/x86/include/asm/cpufeatures.h | 5 +- arch/x86/include/asm/delay.h | 1 - arch/x86/include/asm/disabled-features.h | 3 +- arch/x86/include/asm/msr-index.h | 18 -- arch/x86/include/asm/mwait.h | 21 -- arch/x86/include/asm/required-features.h | 3 +- arch/x86/include/asm/vmx.h | 8 - arch/x86/include/asm/vmxfeatures.h | 10 +- arch/x86/kernel/cpu/Makefile | 1 - arch/x86/kernel/cpu/centaur.c | 3 - arch/x86/kernel/cpu/feat_ctl.c | 8 - arch/x86/kernel/cpu/zhaoxin.c | 3 - arch/x86/kernel/cpu/zxpause.c | 238 ----------------------- arch/x86/kernel/time.c | 3 - arch/x86/kvm/cpuid.c | 13 +- arch/x86/kvm/reverse_cpuid.h | 1 - arch/x86/kvm/vmx/capabilities.h | 7 - arch/x86/kvm/vmx/vmcs.h | 2 - arch/x86/kvm/vmx/vmx.c | 66 +------ arch/x86/kvm/vmx/vmx.h | 19 -- arch/x86/kvm/x86.c | 5 - arch/x86/lib/delay.c | 27 --- tools/arch/x86/include/asm/cpufeatures.h | 5 +- tools/arch/x86/include/asm/msr-index.h | 13 -- 25 files changed, 12 insertions(+), 478 deletions(-) delete mode 100644 arch/x86/kernel/cpu/zxpause.c diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0193a9d8c2be..e7afc9c36bb2 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -49,7 +49,6 @@ enum cpuid_leafs * CPUID_C000_0006_EAX must keep at the 30th position (count from 0)! * NR_CPUID_WORDS can not bigger than 31 (the cpuid_leafs only 32 bits!). */ - CPUID_C000_0006_EAX, /* 30 */ NR_CPUID_WORDS, }; @@ -118,9 +117,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 27, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 28, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 29, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 30, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 31)) + BUILD_BUG_ON_ZERO(NCAPINTS != 30)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -153,9 +151,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 27, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 28, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 29, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 30, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 31)) + BUILD_BUG_ON_ZERO(NCAPINTS != 30)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c06a62851611..c67d87eaac22 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 31 /* N 32-bit words worth of info */ +#define NCAPINTS 30 /* N 32-bit words worth of info */ #define NBUGINTS 4 /* N 32-bit bug flags */ /* @@ -494,9 +494,6 @@ #define X86_FEATURE_HYGON_SM3 (29*32 + 1) /* "sm3" SM3 instructions */ #define X86_FEATURE_HYGON_SM4 (29*32 + 2) /* "sm4" SM4 instructions */ -/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000006, word 21 */ -#define X86_FEATURE_ZXPAUSE (30*32 + 0) /* ZHAOXIN ZXPAUSE */ - /* * Extended auxiliary flags: Linux defined - for features scattered in various diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index 4dbb3fea67fb..630891d25819 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -7,7 +7,6 @@ void __init use_tsc_delay(void); void __init use_tpause_delay(void); -void __init use_zxpause_delay(void); void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index c7f91989a16f..5082faf136be 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -158,7 +158,6 @@ #define DISABLED_MASK27 0 #define DISABLED_MASK28 0 #define DISABLED_MASK29 0 -#define DISABLED_MASK30 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 31) +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 30) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 334a284615ef..11ab6780d160 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -75,23 +75,12 @@ #define MSR_IA32_UMWAIT_CONTROL 0xe1 #define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) #define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) - -#define MSR_ZX_PAUSE_CONTROL 0x187f -#define MSR_ZX_PAUSE_CONTROL_C02_DISABLE BIT(0) -#define MSR_ZX_PAUSE_CONTROL_RESERVED BIT(1) - /* * The time field is bit[31:2], but representing a 32bit value with * bit[1:0] zero. */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) -/* - * The time field is bit[31:2], but representing a 32bit value with - * bit[1:0] zero. - */ -#define MSR_ZX_PAUSE_CONTROL_TIME_MASK (~0x03U) - /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf #define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2 @@ -836,13 +825,6 @@ #define MSR_VIA_RNG 0x0000110b #define MSR_VIA_BCR2 0x00001147 -/* - * Zhaoxin extend VMCS capabilities: - * bit 0: exec-cntl3 VMCS field. - */ -#define MSR_ZX_EXT_VMCS_CAPS 0x1675 -#define MSR_ZX_VMCS_EXEC_CTL3 BIT(0) - /* Transmeta defined MSRs */ #define MSR_TMTA_LONGRUN_CTRL 0x80868010 #define MSR_TMTA_LONGRUN_FLAGS 0x80868011 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index bb293f9f9a2c..ae7a83e3f743 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -26,8 +26,6 @@ #define TPAUSE_C01_STATE 1 #define TPAUSE_C02_STATE 0 -#define ZXPAUSE_C01_STATE 1 - static __always_inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { @@ -153,23 +151,4 @@ static inline void __tpause(u32 ecx, u32 edx, u32 eax) #endif } -/* - * Caller can specify whether to enter C0.1 (low latency, less - * power saving) or C0.2 state (saves more power, but longer wakeup - * latency). This may be overridden by the ZX_PAUSE_CONTROL MSR - * which can force requests for C0.2 to be downgraded to C0.1. - */ -static inline void __zxpause(u32 ecx, u32 edx, u32 eax) -{ - /* "zxpause %ecx, %edx, %eax;" */ - #ifdef CONFIG_AS_ZXPAUSE - asm volatile("zxpause %%ecx\n" - : - : "c"(ecx), "d"(edx), "a"(eax)); - #else - asm volatile(".byte 0xf2, 0x0f, 0xa6, 0xd0\t\n" - : - : "c"(ecx), "d"(edx), "a"(eax)); - #endif -} #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 9abee54484e6..5d2017c24101 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -108,7 +108,6 @@ #define REQUIRED_MASK27 0 #define REQUIRED_MASK28 0 #define REQUIRED_MASK29 0 -#define REQUIRED_MASK30 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 31) +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 30) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 32dc7414b83b..0e73616b82f3 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -84,12 +84,6 @@ */ #define TERTIARY_EXEC_IPI_VIRT VMCS_CONTROL_BIT(IPI_VIRT) -/* - * Definitions of Zhaoxin Tertiary Processor-Based VM-Execution Controls. - */ -#define ZX_TERTIARY_EXEC_GUEST_ZXPAUSE VMCS_CONTROL_BIT(GUEST_ZXPAUSE) - - #define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) #define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) #define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS) @@ -241,7 +235,6 @@ enum vmcs_field { TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035, PID_POINTER_TABLE = 0x00002042, PID_POINTER_TABLE_HIGH = 0x00002043, - ZXPAUSE_VMEXIT_TSC = 0x00002200, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, @@ -291,7 +284,6 @@ enum vmcs_field { PLE_GAP = 0x00004020, PLE_WINDOW = 0x00004022, NOTIFY_WINDOW = 0x00004024, - ZX_TERTIARY_VM_EXEC_CONTROL = 0x00004200, VM_INSTRUCTION_ERROR = 0x00004400, VM_EXIT_REASON = 0x00004402, VM_EXIT_INTR_INFO = 0x00004404, diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index ff4b573dd191..c6a7eed03914 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -5,7 +5,7 @@ /* * Defines VMX CPU feature bits */ -#define NVMXINTS 6 /* N 32-bit words worth of info */ +#define NVMXINTS 5 /* N 32-bit words worth of info */ /* * Note: If the comment begins with a quoted string, that string is used @@ -87,10 +87,6 @@ #define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */ #define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* VM-Exit when no event windows after notify window */ -/* Zhaoxin Tertiary Processor-Based VM-Execution Controls, word 3 */ -#define VMX_FEATURE_GUEST_ZXPAUSE (3*32 + 0) /* zxpause instruction in guest mode */ - -/* Tertiary Processor-Based VM-Execution Controls, word 4 */ -#define VMX_FEATURE_IPI_VIRT (4*32 + 4) /* Enable IPI virtualization */ - +/* Tertiary Processor-Based VM-Execution Controls, word 3 */ +#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */ #endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 09bb2e72b7a3..eb4dbcdf41f1 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -26,7 +26,6 @@ obj-y += bugs.o obj-y += aperfmperf.o obj-y += cpuid-deps.o obj-y += umwait.o -obj-y += zxpause.o obj-y += capflags.o powerflags.o obj-$(CONFIG_X86_LOCAL_APIC) += topology.o diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 09e3d7f57204..5bd0fab944db 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -116,9 +116,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) */ if (c->x86 == 0x6 || (c->x86 == 0x7 && c->x86_model <= 0x3b)) set_cpu_cap(c, X86_FEATURE_CRC32C_LOW_PERF); - - if (cpuid_eax(0xC0000000) >= 0xC0000006) - c->x86_capability[CPUID_C000_0006_EAX] = cpuid_eax(0xC0000006); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index f16f19b06527..03851240c3e3 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -15,7 +15,6 @@ enum vmx_feature_leafs { MISC_FEATURES = 0, PRIMARY_CTLS, SECONDARY_CTLS, - ZX_TERTIARY_CTLS, TERTIARY_CTLS_LOW, TERTIARY_CTLS_HIGH, NR_VMX_FEATURE_WORDS, @@ -98,13 +97,6 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_EPT_AD); if (c->vmx_capability[MISC_FEATURES] & VMX_F(VPID)) set_cpu_cap(c, X86_FEATURE_VPID); - /* - * Initialize Zhaoxin Tertiary Exec Control feature flags. - */ - rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &supported, &ign); - if (supported & MSR_ZX_VMCS_EXEC_CTL3) - c->vmx_capability[ZX_TERTIARY_CTLS] |= VMX_F(GUEST_ZXPAUSE); - } #endif /* CONFIG_X86_VMX_FEATURE_NAMES */ diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index 3180e2026c40..6efe3c0aafda 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -72,9 +72,6 @@ static void early_init_zhaoxin(struct cpuinfo_x86 *c) */ if (c->x86 == 0x6 || (c->x86 == 0x7 && c->x86_model <= 0x3b)) set_cpu_cap(c, X86_FEATURE_CRC32C_LOW_PERF); - - if (cpuid_eax(0xC0000000) >= 0xC0000006) - c->x86_capability[CPUID_C000_0006_EAX] = cpuid_eax(0xC0000006); } static void init_zhaoxin(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/zxpause.c b/arch/x86/kernel/cpu/zxpause.c deleted file mode 100644 index 7f55f5d9e8c0..000000000000 --- a/arch/x86/kernel/cpu/zxpause.c +++ /dev/null @@ -1,238 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include - -#include -#include - -#define ZXPAUSE_C02_ENABLE 0 - -#define ZXPAUSE_CTRL_VAL(max_time, c02_disable) \ - (((max_time) & MSR_ZX_PAUSE_CONTROL_TIME_MASK) | \ - ((c02_disable) & MSR_ZX_PAUSE_CONTROL_C02_DISABLE)) - -/* - * Cache ZX_PAUSE_CONTROL MSR. This is a systemwide control. By default, - * zxpause max time is 100000 in TSC-quanta and C0.2 is enabled - */ -static u32 zxpause_control_cached = ZXPAUSE_CTRL_VAL(100000, ZXPAUSE_C02_ENABLE); - -/* - * Cache the original ZX_PAUSE_CONTROL MSR value which is configured by - * hardware or BIOS before kernel boot. - */ -static u32 orig_zxpause_control_cached __ro_after_init; - -/* - * Serialize access to zxpause_control_cached and ZX_PAUSE_CONTROL MSR in - * the sysfs write functions. - */ -static DEFINE_MUTEX(zxpause_lock); - -static void zxpause_update_control_msr(void *unused) -{ - lockdep_assert_irqs_disabled(); - wrmsr(MSR_ZX_PAUSE_CONTROL, READ_ONCE(zxpause_control_cached), 0); -} - -/* - * The CPU hotplug callback sets the control MSR to the global control - * value. - * - * Disable interrupts so the read of zxpause_control_cached and the WRMSR - * are protected against a concurrent sysfs write. Otherwise the sysfs - * write could update the cached value after it had been read on this CPU - * and issue the IPI before the old value had been written. The IPI would - * interrupt, write the new value and after return from IPI the previous - * value would be written by this CPU. - * - * With interrupts disabled the upcoming CPU either sees the new control - * value or the IPI is updating this CPU to the new control value after - * interrupts have been reenabled. - */ -static int zxpause_cpu_online(unsigned int cpu) -{ - local_irq_disable(); - zxpause_update_control_msr(NULL); - local_irq_enable(); - return 0; -} - -/* - * The CPU hotplug callback sets the control MSR to the original control - * value. - */ -static int zxpause_cpu_offline(unsigned int cpu) -{ - /* - * This code is protected by the CPU hotplug already and - * orig_zxpause_control_cached is never changed after it caches - * the original control MSR value in zxpause_init(). So there - * is no race condition here. - */ - wrmsr(MSR_ZX_PAUSE_CONTROL, orig_zxpause_control_cached, 0); - - return 0; -} - -/* - * On resume, restore ZX_PAUSE_CONTROL MSR on the boot processor which - * is the only active CPU at this time. The MSR is set up on the APs via the - * CPU hotplug callback. - * - * This function is invoked on resume from suspend and hibernation. On - * resume from suspend the restore should be not required, but we neither - * trust the firmware nor does it matter if the same value is written - * again. - */ -static void zxpause_syscore_resume(void) -{ - zxpause_update_control_msr(NULL); -} - -static struct syscore_ops zxpause_syscore_ops = { - .resume = zxpause_syscore_resume, -}; - -/* sysfs interface */ - -/* - * When bit 0 in ZX_PAUSE_CONTROL MSR is 1, C0.2 is disabled. - * Otherwise, C0.2 is enabled. - */ -static inline bool zxpause_ctrl_c02_enabled(u32 ctrl) -{ - return !(ctrl & MSR_ZX_PAUSE_CONTROL_C02_DISABLE); -} - -static inline u32 zxpause_ctrl_max_time(u32 ctrl) -{ - return ctrl & MSR_ZX_PAUSE_CONTROL_TIME_MASK; -} - -static inline void zxpause_update_control(u32 maxtime, bool c02_enable) -{ - u32 ctrl = maxtime & MSR_ZX_PAUSE_CONTROL_TIME_MASK; - - if (!c02_enable) - ctrl |= MSR_ZX_PAUSE_CONTROL_C02_DISABLE; - - WRITE_ONCE(zxpause_control_cached, ctrl); - /* Propagate to all CPUs */ - on_each_cpu(zxpause_update_control_msr, NULL, 1); -} - -static ssize_t -enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - u32 ctrl = READ_ONCE(zxpause_control_cached); - - return sprintf(buf, "%d\n", zxpause_ctrl_c02_enabled(ctrl)); -} - -static ssize_t enable_c02_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - bool c02_enable; - u32 ctrl; - int ret; - - ret = kstrtobool(buf, &c02_enable); - if (ret) - return ret; - - mutex_lock(&zxpause_lock); - - ctrl = READ_ONCE(zxpause_control_cached); - if (c02_enable != zxpause_ctrl_c02_enabled(ctrl)) - zxpause_update_control(ctrl, c02_enable); - - mutex_unlock(&zxpause_lock); - - return count; -} -static DEVICE_ATTR_RW(enable_c02); - -static ssize_t -max_time_show(struct device *kobj, struct device_attribute *attr, char *buf) -{ - u32 ctrl = READ_ONCE(zxpause_control_cached); - - return sprintf(buf, "%u\n", zxpause_ctrl_max_time(ctrl)); -} - -static ssize_t max_time_store(struct device *kobj, - struct device_attribute *attr, - const char *buf, size_t count) -{ - u32 max_time, ctrl; - int ret; - - ret = kstrtou32(buf, 0, &max_time); - if (ret) - return ret; - - /* bits[1:0] must be zero */ - if (max_time & ~MSR_ZX_PAUSE_CONTROL_TIME_MASK) - return -EINVAL; - - mutex_lock(&zxpause_lock); - - ctrl = READ_ONCE(zxpause_control_cached); - if (max_time != zxpause_ctrl_max_time(ctrl)) - zxpause_update_control(max_time, zxpause_ctrl_c02_enabled(ctrl)); - - mutex_unlock(&zxpause_lock); - - return count; -} -static DEVICE_ATTR_RW(max_time); - -static struct attribute *zxpause_attrs[] = { - &dev_attr_enable_c02.attr, - &dev_attr_max_time.attr, - NULL -}; - -static struct attribute_group zxpause_attr_group = { - .attrs = zxpause_attrs, - .name = "zxpause_control", -}; - -static int __init zxpause_init(void) -{ - struct device *dev; - int ret; - - if (!boot_cpu_has(X86_FEATURE_ZXPAUSE)) - return -ENODEV; - - /* - * Cache the original control MSR value before the control MSR is - * changed. This is the only place where orig_zxpause_control_cached - * is modified. - */ - rdmsrl(MSR_ZX_PAUSE_CONTROL, orig_zxpause_control_cached); - - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "zxpause:online", - zxpause_cpu_online, zxpause_cpu_offline); - if (ret < 0) { - /* - * On failure, the control MSR on all CPUs has the - * original control value. - */ - return ret; - } - - register_syscore_ops(&zxpause_syscore_ops); - - /* - * Add zxpause control interface. Ignore failure, so at least the - * default values are set up in case the machine manages to boot. - */ - dev = bus_get_dev_root(&cpu_subsys); - return sysfs_create_group(&dev->kobj, &zxpause_attr_group); -} -device_initcall(zxpause_init); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 6a6c8bd7843c..52e1f3f0b361 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -84,9 +84,6 @@ static __init void x86_late_time_init(void) if (static_cpu_has(X86_FEATURE_WAITPKG)) use_tpause_delay(); - - if (static_cpu_has(X86_FEATURE_ZXPAUSE)) - use_zxpause_delay(); } /* diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 697cdc1c57ad..c9f27a4df96f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -853,10 +853,6 @@ void kvm_set_cpu_caps(void) F(PMM) | F(PMM_EN) ); - /* Zhaoxin 0xC0000006 leaf */ - kvm_cpu_cap_mask(CPUID_C000_0006_EAX, 0 /* bit0: zxpause */ | 0 /* bit1 HMAC */); - - /* * Hide RDTSCP and RDPID if either feature is reported as supported but * probing MSR_TSC_AUX failed. This is purely a sanity check and @@ -1416,22 +1412,17 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) } /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: - /* Extended to 0xC0000006 */ - entry->eax = min(entry->eax, 0xC0000006); + /*Just support up to 0xC0000004 now*/ + entry->eax = min(entry->eax, 0xC0000004); break; case 0xC0000001: cpuid_entry_override(entry, CPUID_C000_0001_EDX); break; - case 0xC0000006: - cpuid_entry_override(entry, CPUID_C000_0006_EAX); - break; - case 3: /* Processor serial number */ case 5: /* MONITOR/MWAIT */ case 0xC0000002: case 0xC0000003: case 0xC0000004: - case 0xC0000005: default: entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 867d8ee96c50..74ef96cb2752 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -104,7 +104,6 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, - [CPUID_C000_0006_EAX] = {0xc0000006, 0, CPUID_EAX}, [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, }; diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 631e65a21228..41a4533f9989 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -60,7 +60,6 @@ struct vmcs_config { u32 pin_based_exec_ctrl; u32 cpu_based_exec_ctrl; u32 cpu_based_2nd_exec_ctrl; - u32 zx_cpu_based_3rd_exec_ctrl; u64 cpu_based_3rd_exec_ctrl; u32 vmexit_ctrl; u32 vmentry_ctrl; @@ -256,12 +255,6 @@ static inline bool cpu_has_vmx_xsaves(void) SECONDARY_EXEC_ENABLE_XSAVES; } -static inline bool cpu_has_vmx_zxpause(void) -{ - return vmcs_config.zx_cpu_based_3rd_exec_ctrl & - ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; -} - static inline bool cpu_has_vmx_waitpkg(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 4eabed8e5813..7c1996b433e2 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -50,9 +50,7 @@ struct vmcs_controls_shadow { u32 pin; u32 exec; u32 secondary_exec; - u32 zx_tertiary_exec; u64 tertiary_exec; - u64 zx_vmexit_tsc; }; /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8725a059c4c6..247bdad01cda 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -220,7 +220,6 @@ int __read_mostly pt_mode = PT_MODE_SYSTEM; module_param(pt_mode, int, S_IRUGO); #endif -static u32 zx_ext_vmcs_cap; struct x86_pmu_lbr __ro_after_init vmx_lbr_caps; static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); @@ -2021,11 +2020,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_UMWAIT_CONTROL: if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) return 1; - msr_info->data = vmx->msr_ia32_umwait_control; - break; - case MSR_ZX_PAUSE_CONTROL: - if (!msr_info->host_initiated && !vmx_guest_zxpause_enabled(vmx)) - return 1; + msr_info->data = vmx->msr_ia32_umwait_control; break; case MSR_IA32_SPEC_CTRL: @@ -2294,15 +2289,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* The reserved bit 1 and non-32 bit [63:32] should be zero */ if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) return 1; - vmx->msr_ia32_umwait_control = data; - break; - case MSR_ZX_PAUSE_CONTROL: - if (!msr_info->host_initiated && !vmx_guest_zxpause_enabled(vmx)) - return 1; - /* The reserved bit 1 and non-32 bit [63:32] should be zero */ - if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) - return 1; vmx->msr_ia32_umwait_control = data; break; case MSR_IA32_SPEC_CTRL: @@ -2758,10 +2745,6 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, vmcs_conf->vmentry_ctrl = _vmentry_control; vmcs_conf->misc = misc_msr; - /* Setup Zhaoxin exec-cntl3 VMCS field. */ - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) - vmcs_conf->zx_cpu_based_3rd_exec_ctrl |= ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; - #if IS_ENABLED(CONFIG_HYPERV) if (enlightened_vmcs) evmcs_sanitize_exec_ctrls(vmcs_conf); @@ -4562,28 +4545,6 @@ static u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx) return exec_control; } -static u32 vmx_zx_tertiary_exec_control(struct vcpu_vmx *vmx) -{ - struct kvm_vcpu *vcpu = &vmx->vcpu; - u32 exec_control = vmcs_config.zx_cpu_based_3rd_exec_ctrl; - - /* - * Show errors if Qemu wants to enable guest_zxpause while - * vmx not support it. - */ - if (guest_cpuid_has(vcpu, X86_FEATURE_ZXPAUSE)) { - if (!cpu_has_vmx_zxpause()) - pr_err("VMX not support guest_zxpause!\n"); - else - exec_control |= ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; - } else - exec_control &= ~ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; - - /* enable other features here */ - - return exec_control; -} - /* * Adjust a single secondary execution control bit to intercept/allow an * instruction in the guest. This is usually done based on whether or not a @@ -4791,11 +4752,6 @@ static void init_vmcs(struct vcpu_vmx *vmx) if (cpu_has_secondary_exec_ctrls()) secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx)); - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) { - zx_tertiary_exec_controls_set(vmx, vmx_zx_tertiary_exec_control(vmx)); - zx_vmexit_tsc_controls_set(vmx, 0); - } - if (cpu_has_tertiary_exec_ctrls()) tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx)); @@ -6357,13 +6313,6 @@ void dump_vmcs(struct kvm_vcpu *vcpu) else tertiary_exec_control = 0; - pr_err("*** Zhaoxin Specific Fields ***\n"); - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) { - pr_err("Zhaoxin TertiaryExec Cntl = 0x%016x\n", - vmcs_read32(ZX_TERTIARY_VM_EXEC_CONTROL)); - pr_err("ZXPAUSE Saved TSC = 0x%016llx\n", vmcs_read64(ZXPAUSE_VMEXIT_TSC)); - } - pr_err("VMCS %p, last attempted VM-entry on CPU %d\n", vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu); pr_err("*** Guest State ***\n"); @@ -7919,11 +7868,6 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmcs_set_secondary_exec_control(vmx, vmx_secondary_exec_control(vmx)); - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) { - zx_tertiary_exec_controls_set(vmx, vmx_zx_tertiary_exec_control(vmx)); - zx_vmexit_tsc_controls_set(vmx, 0); - } - if (guest_can_use(vcpu, X86_FEATURE_VMX)) vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX | @@ -8082,10 +8026,6 @@ static __init void vmx_set_cpu_caps(void) if (cpu_has_vmx_waitpkg()) kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); - - if (cpu_has_vmx_zxpause()) - kvm_cpu_cap_check_and_set(X86_FEATURE_ZXPAUSE); - } static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, @@ -8652,10 +8592,6 @@ static __init int hardware_setup(void) unsigned long host_bndcfgs; struct desc_ptr dt; int r; - u32 ign; - - /* Caches Zhaoxin extend VMCS capabilities. */ - rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &zx_ext_vmcs_cap, &ign); store_idt(&dt); host_idt_base = dt.address; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 7c997b07c85d..30c86e88eb84 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -602,17 +602,6 @@ static inline u8 vmx_get_rvi(void) #define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \ (TERTIARY_EXEC_IPI_VIRT) -#define KVM_REQUIRED_VMX_ZX_TERTIARY_VM_EXEC_CONTROL 0 -#define KVM_OPTIONAL_VMX_ZX_TERTIARY_VM_EXEC_CONTROL \ - (ZX_TERTIARY_EXEC_GUEST_ZXPAUSE) - -/* - * We shouldn't rw zxpause_vmexit_tsc vmcs field in this - * way, try to use another way in the future. - */ -#define KVM_REQUIRED_VMX_ZXPAUSE_VMEXIT_TSC 0 -#define KVM_OPTIONAL_VMX_ZXPAUSE_VMEXIT_TSC 1 - #define BUILD_CONTROLS_SHADOW(lname, uname, bits) \ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \ { \ @@ -645,8 +634,6 @@ BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32) BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32) BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32) BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) -BUILD_CONTROLS_SHADOW(zx_tertiary_exec, ZX_TERTIARY_VM_EXEC_CONTROL, 32) -BUILD_CONTROLS_SHADOW(zx_vmexit_tsc, ZXPAUSE_VMEXIT_TSC, 64) /* * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the @@ -749,12 +736,6 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; } -static inline bool vmx_guest_zxpause_enabled(struct vcpu_vmx *vmx) -{ - return zx_tertiary_exec_controls_get(vmx) & - ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; -} - static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) { if (!enable_ept) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 21855275e867..0e6d18764a4e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -345,7 +345,6 @@ static const u32 msrs_to_save_base[] = { MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, MSR_IA32_UMWAIT_CONTROL, - MSR_ZX_PAUSE_CONTROL, MSR_IA32_XFD, MSR_IA32_XFD_ERR, }; @@ -7257,10 +7256,6 @@ static void kvm_probe_msr_to_save(u32 msr_index) if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG)) return; break; - case MSR_ZX_PAUSE_CONTROL: - if (!kvm_cpu_cap_has(X86_FEATURE_ZXPAUSE)) - return; - break; case MSR_IA32_RTIT_CTL: case MSR_IA32_RTIT_STATUS: if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 3946badbd78f..0e65d00e2339 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -117,27 +117,6 @@ static void delay_halt_tpause(u64 start, u64 cycles) __tpause(TPAUSE_C02_STATE, edx, eax); } -/* - * On ZHAOXIN the ZXPAUSE instruction waits until any of: - * 1) the delta of TSC counter exceeds the value provided in EDX:EAX - * 2) global timeout in ZX_PAUSE_CONTROL is exceeded - * 3) an external interrupt occurs - */ -static void delay_halt_zxpause(u64 unused, u64 cycles) -{ - u64 until = cycles; - u32 eax, edx; - - eax = lower_32_bits(until); - edx = upper_32_bits(until); - - /* - * Hard code the deeper (C0.1) sleep state because exit latency is - * small compared to the "microseconds" that usleep() will delay. - */ - __zxpause(ZXPAUSE_C01_STATE, edx, eax); -} - /* * On some AMD platforms, MWAITX has a configurable 32-bit timer, that * counts with TSC frequency. The input value is the number of TSC cycles @@ -204,12 +183,6 @@ void __init use_tpause_delay(void) delay_fn = delay_halt; } -void __init use_zxpause_delay(void) -{ - delay_halt_fn = delay_halt_zxpause; - delay_fn = delay_halt; -} - void use_mwaitx_delay(void) { delay_halt_fn = delay_halt_mwaitx; diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index b8e6840fb997..5053119fb488 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 31 /* N 32-bit words worth of info */ +#define NCAPINTS 30 /* N 32-bit words worth of info */ #define NBUGINTS 4 /* N 32-bit bug flags */ /* @@ -496,9 +496,6 @@ #define X86_FEATURE_HYGON_SM3 (29*32 + 1) /* "sm3" SM3 instructions */ #define X86_FEATURE_HYGON_SM4 (29*32 + 2) /* "sm4" SM4 instructions */ -/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000006, word 21 */ -#define X86_FEATURE_ZXPAUSE (30*32 + 0) /* ZHAOXIN ZXPAUSE */ - #define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ /* diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 38d71379a781..42e13e6e3980 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -86,12 +86,6 @@ */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) -/* - * The time field is bit[31:2], but representing a 32bit value with - * bit[1:0] zero. - */ -#define MSR_ZX_PAUSE_CONTROL_TIME_MASK (~0x03U) - /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf #define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2 @@ -835,13 +829,6 @@ #define MSR_VIA_RNG 0x0000110b #define MSR_VIA_BCR2 0x00001147 -/* - * Zhaoxin extend VMCS capabilities: - * bit 0: exec-cntl3 VMCS field. - */ -#define MSR_ZX_EXT_VMCS_CAPS 0x1675 -#define MSR_ZX_VMCS_EXEC_CTL3 BIT(0) - /* Transmeta defined MSRs */ #define MSR_TMTA_LONGRUN_CTRL 0x80868010 #define MSR_TMTA_LONGRUN_FLAGS 0x80868011 -- Gitee From 6ca21ee78409eb475e4f4cbb31d1ab993322ff91 Mon Sep 17 00:00:00 2001 From: LeoLiu-oc Date: Fri, 6 Mar 2026 17:47:00 +0800 Subject: [PATCH 2/3] x86/delay: add support for Zhaoxin PAUSEOPT instruction zhaoxin inclusion category: feature ------------------- PAUSEOPT instructs the processor to enter an implementation-dependent optimized state. The instruction execution wakes up when the time-stamp counter reaches or exceeds the implicit EDX:EAX 64-bit input value. The instruction execution also wakes up due to the expiration of the operating system time-limit or by an external interrupt. PAUSEOPT is available on processors with X86_FEATURE_PAUSEOPT. PAUSEOPT allows the processor to enter a light-weight power/performance optimized state (C0.1 state) for a period specified by the instruction or until the system time limit. MSR_ZX_PAUSE_CONTROL MSR register allows the OS to enable/disable C0.2 on the processor and to set the maximum time the processor can reside in C0.1 or C0.2. By default C0.2 is disabled. A sysfs interface to adjust the time and the C0.2 enablement is provided in a follow up change. Signed-off-by: LeoLiu-oc --- arch/x86/Kconfig.assembler | 5 + arch/x86/include/asm/cpufeature.h | 7 +- arch/x86/include/asm/cpufeatures.h | 5 +- arch/x86/include/asm/delay.h | 1 + arch/x86/include/asm/disabled-features.h | 3 +- arch/x86/include/asm/msr-index.h | 7 + arch/x86/include/asm/mwait.h | 21 +++ arch/x86/include/asm/required-features.h | 3 +- arch/x86/kernel/cpu/Makefile | 1 + arch/x86/kernel/cpu/centaur.c | 3 + arch/x86/kernel/cpu/pauseopt.c | 208 +++++++++++++++++++++++ arch/x86/kernel/cpu/zhaoxin.c | 3 + arch/x86/kernel/time.c | 3 + arch/x86/lib/delay.c | 23 +++ tools/arch/x86/include/asm/cpufeatures.h | 5 +- tools/arch/x86/include/asm/msr-index.h | 8 + 16 files changed, 300 insertions(+), 6 deletions(-) create mode 100644 arch/x86/kernel/cpu/pauseopt.c diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 16d0b022d6ff..1195554bb92c 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -29,3 +29,8 @@ config AS_WRUSS def_bool $(as-instr64,wrussq %rax$(comma)(%rbx)) help Supported by binutils >= 2.31 and LLVM integrated assembler + +config AS_PAUSEOPT + def_bool $(as-instr,pauseopt) + help + Supported by binutils >= xxx-TBD and LLVM integrated assembler xxx-TBD diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index e7afc9c36bb2..564b678fbbbd 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -49,6 +49,7 @@ enum cpuid_leafs * CPUID_C000_0006_EAX must keep at the 30th position (count from 0)! * NR_CPUID_WORDS can not bigger than 31 (the cpuid_leafs only 32 bits!). */ + CPUID_C000_0006_EAX, NR_CPUID_WORDS, }; @@ -117,8 +118,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 27, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 28, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 29, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 30, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 30)) + BUILD_BUG_ON_ZERO(NCAPINTS != 31)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -151,8 +153,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 27, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 28, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 29, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 30, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 30)) + BUILD_BUG_ON_ZERO(NCAPINTS != 31)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c67d87eaac22..671e82b5ee74 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 30 /* N 32-bit words worth of info */ +#define NCAPINTS 31 /* N 32-bit words worth of info */ #define NBUGINTS 4 /* N 32-bit bug flags */ /* @@ -514,6 +514,9 @@ #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* "" Clear CPU buffers using VERW before VMRUN */ #define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ +/* Zhaoxin/Centaur-defined CPU features, CPUID level 0xC0000006, word 30 */ +#define X86_FEATURE_PAUSEOPT (30*32+ 0) /* ZHAOXIN PAUSEOPT */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index 630891d25819..c844077f19b6 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -7,6 +7,7 @@ void __init use_tsc_delay(void); void __init use_tpause_delay(void); +void __init use_pauseopt_delay(void); void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 5082faf136be..c7f91989a16f 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -158,6 +158,7 @@ #define DISABLED_MASK27 0 #define DISABLED_MASK28 0 #define DISABLED_MASK29 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 30) +#define DISABLED_MASK30 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 31) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 11ab6780d160..62d22086a2d9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -81,6 +81,13 @@ */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) +#define MSR_PAUSEOPT_CONTROL 0x187f +/* + * The time field is bit[31:2], but representing a 32bit value with + * bit[1:0] zero. + */ +#define MSR_PAUSEOPT_CONTROL_TIME_MASK (~0x03U) + /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf #define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index ae7a83e3f743..d480006b6360 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -26,6 +26,8 @@ #define TPAUSE_C01_STATE 1 #define TPAUSE_C02_STATE 0 +#define PAUSEOPT_P01_STATE 1 + static __always_inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { @@ -151,4 +153,23 @@ static inline void __tpause(u32 ecx, u32 edx, u32 eax) #endif } +/* + * Caller can specify to enter P0.1 (low latency, less power saving). + */ +static inline void __pauseopt(u32 ecx, u32 edx, u32 eax) +{ + /* "pauseopt %ecx, %edx, %eax;" */ +#ifdef CONFIG_AS_PAUSEOPT + asm volatile( + "pauseopt\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); +#else + asm volatile( + ".byte 0xf2, 0x0f, 0xa6, 0xd0\t\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); +#endif +} + #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 5d2017c24101..9abee54484e6 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -108,6 +108,7 @@ #define REQUIRED_MASK27 0 #define REQUIRED_MASK28 0 #define REQUIRED_MASK29 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 30) +#define REQUIRED_MASK30 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 31) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index eb4dbcdf41f1..1acb62a04312 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -26,6 +26,7 @@ obj-y += bugs.o obj-y += aperfmperf.o obj-y += cpuid-deps.o obj-y += umwait.o +obj-y += pauseopt.o obj-y += capflags.o powerflags.o obj-$(CONFIG_X86_LOCAL_APIC) += topology.o diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 5bd0fab944db..09e3d7f57204 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -116,6 +116,9 @@ static void early_init_centaur(struct cpuinfo_x86 *c) */ if (c->x86 == 0x6 || (c->x86 == 0x7 && c->x86_model <= 0x3b)) set_cpu_cap(c, X86_FEATURE_CRC32C_LOW_PERF); + + if (cpuid_eax(0xC0000000) >= 0xC0000006) + c->x86_capability[CPUID_C000_0006_EAX] = cpuid_eax(0xC0000006); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/pauseopt.c b/arch/x86/kernel/cpu/pauseopt.c new file mode 100644 index 000000000000..58a490da7b3e --- /dev/null +++ b/arch/x86/kernel/cpu/pauseopt.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#include +#include + +#define PAUSEOPT_CTRL_VAL(max_time) (((max_time) & MSR_PAUSEOPT_CONTROL_TIME_MASK)) + +/* + * Cache PAUSEOPT_CONTROL MSR. This is a systemwide control. By default, + * pauseopt max time is 100000 in TSC-quanta and P0.1 is enabled. + */ +static u32 pauseopt_control_cached = PAUSEOPT_CTRL_VAL(100000); + +/* + * Cache the original PAUSEOPT_CONTROL MSR value which is configured by + * hardware or BIOS before kernel boot. + */ +static u32 orig_pauseopt_control_cached __ro_after_init; + +/* + * Serialize access to pauseopt_control_cached and PAUSEOPT_CONTROL MSR in + * the sysfs write functions. + */ +static DEFINE_MUTEX(pauseopt_lock); + +static void pauseopt_update_control_msr(void *unused) +{ + lockdep_assert_irqs_disabled(); + wrmsr(MSR_PAUSEOPT_CONTROL, READ_ONCE(pauseopt_control_cached), 0); +} + +/* + * The CPU hotplug callback sets the control MSR to the global control + * value. + * + * Disable interrupts so the read of pauseopt_control_cached and the WRMSR + * are protected against a concurrent sysfs write. Otherwise the sysfs + * write could update the cached value after it had been read on this CPU + * and issue the IPI before the old value had been written. The IPI would + * interrupt, write the new value and after return from IPI the previous + * value would be written by this CPU. + * + * With interrupts disabled the upcoming CPU either sees the new control + * value or the IPI is updating this CPU to the new control value after + * interrupts have been reenabled. + */ +static int pauseopt_cpu_online(unsigned int cpu) +{ + local_irq_disable(); + pauseopt_update_control_msr(NULL); + local_irq_enable(); + return 0; +} + +/* + * The CPU hotplug callback sets the control MSR to the original control + * value. + */ +static int pauseopt_cpu_offline(unsigned int cpu) +{ + /* + * This code is protected by the CPU hotplug already and + * orig_pauseopt_control_cached is never changed after it caches + * the original control MSR value in pauseopt_init(). So there + * is no race condition here. + */ + wrmsr(MSR_PAUSEOPT_CONTROL, orig_pauseopt_control_cached, 0); + + return 0; +} + +/* + * On resume, restore PAUSEOPT_CONTROL MSR on the boot processor which + * is the only active CPU at this time. The MSR is set up on the APs via the + * CPU hotplug callback. + * + * This function is invoked on resume from suspend and hibernation. On + * resume from suspend the restore should be not required, but we neither + * trust the firmware nor does it matter if the same value is written + * again. + */ +static void pauseopt_syscore_resume(void) +{ + pauseopt_update_control_msr(NULL); +} + +static struct syscore_ops pauseopt_syscore_ops = { + .resume = pauseopt_syscore_resume, +}; + +/* sysfs interface */ + +static inline u32 pauseopt_ctrl_max_time(u32 ctrl) +{ + return ctrl & MSR_PAUSEOPT_CONTROL_TIME_MASK; +} + +static inline void pauseopt_update_control(u32 maxtime) +{ + u32 ctrl = maxtime & MSR_PAUSEOPT_CONTROL_TIME_MASK; + + WRITE_ONCE(pauseopt_control_cached, ctrl); + /* Propagate to all CPUs */ + on_each_cpu(pauseopt_update_control_msr, NULL, 1); +} + +static ssize_t +enable_p01_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + u32 ret; + + if (boot_cpu_has(X86_FEATURE_PAUSEOPT)) + ret = 1; + else + ret = 0; + + return sprintf(buf, "%d\n", ret); +} +static DEVICE_ATTR_RO(enable_p01); + +static ssize_t +max_time_show(struct device *kobj, struct device_attribute *attr, char *buf) +{ + u32 ctrl = READ_ONCE(pauseopt_control_cached); + + return sprintf(buf, "%u\n", pauseopt_ctrl_max_time(ctrl)); +} + +static ssize_t max_time_store(struct device *kobj, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 max_time, ctrl; + int ret; + + ret = kstrtou32(buf, 0, &max_time); + if (ret) + return ret; + + /* bits[1:0] must be zero */ + if (max_time & ~MSR_PAUSEOPT_CONTROL_TIME_MASK) + return -EINVAL; + + mutex_lock(&pauseopt_lock); + + ctrl = READ_ONCE(pauseopt_control_cached); + if (max_time != pauseopt_ctrl_max_time(ctrl)) + pauseopt_update_control(max_time); + + mutex_unlock(&pauseopt_lock); + + return count; +} +static DEVICE_ATTR_RW(max_time); + +static struct attribute *pauseopt_attrs[] = { + &dev_attr_enable_p01.attr, + &dev_attr_max_time.attr, + NULL +}; + +static struct attribute_group pauseopt_attr_group = { + .attrs = pauseopt_attrs, + .name = "pauseopt_control", +}; + +static int __init pauseopt_init(void) +{ + struct device *dev; + int ret; + + if (!boot_cpu_has(X86_FEATURE_PAUSEOPT)) + return -ENODEV; + + /* + * Cache the original control MSR value before the control MSR is + * changed. This is the only place where orig_pauseopt_control_cached + * is modified. + */ + rdmsrl(MSR_PAUSEOPT_CONTROL, orig_pauseopt_control_cached); + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "pauseopt:online", + pauseopt_cpu_online, pauseopt_cpu_offline); + if (ret < 0) { + /* + * On failure, the control MSR on all CPUs has the + * original control value. + */ + return ret; + } + + register_syscore_ops(&pauseopt_syscore_ops); + + /* + * Add pauseopt control interface. Ignore failure, so at least the + * default values are set up in case the machine manages to boot. + */ + dev = bus_get_dev_root(&cpu_subsys); + if (dev) { + ret = sysfs_create_group(&dev->kobj, &pauseopt_attr_group); + put_device(dev); + } + return ret; +} +device_initcall(pauseopt_init); diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index 6efe3c0aafda..3180e2026c40 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -72,6 +72,9 @@ static void early_init_zhaoxin(struct cpuinfo_x86 *c) */ if (c->x86 == 0x6 || (c->x86 == 0x7 && c->x86_model <= 0x3b)) set_cpu_cap(c, X86_FEATURE_CRC32C_LOW_PERF); + + if (cpuid_eax(0xC0000000) >= 0xC0000006) + c->x86_capability[CPUID_C000_0006_EAX] = cpuid_eax(0xC0000006); } static void init_zhaoxin(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 52e1f3f0b361..99355d27415e 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -84,6 +84,9 @@ static __init void x86_late_time_init(void) if (static_cpu_has(X86_FEATURE_WAITPKG)) use_tpause_delay(); + + if (static_cpu_has(X86_FEATURE_PAUSEOPT)) + use_pauseopt_delay(); } /* diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 0e65d00e2339..96bf5b3baacd 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -117,6 +117,23 @@ static void delay_halt_tpause(u64 start, u64 cycles) __tpause(TPAUSE_C02_STATE, edx, eax); } +/* + * On ZHAOXIN the PAUSEOPT instruction waits until any of: + * 1) the delta of TSC counter exceeds the value provided in EDX:EAX + * 2) global timeout in PAUSEOPT_CONTROL is exceeded + * 3) an external interrupt occurs + */ +static void delay_halt_pauseopt(u64 unused, u64 cycles) +{ + u64 until = cycles; + u32 eax, edx; + + eax = lower_32_bits(until); + edx = upper_32_bits(until); + + __pauseopt(PAUSEOPT_P01_STATE, edx, eax); +} + /* * On some AMD platforms, MWAITX has a configurable 32-bit timer, that * counts with TSC frequency. The input value is the number of TSC cycles @@ -183,6 +200,12 @@ void __init use_tpause_delay(void) delay_fn = delay_halt; } +void __init use_pauseopt_delay(void) +{ + delay_halt_fn = delay_halt_pauseopt; + delay_fn = delay_halt; +} + void use_mwaitx_delay(void) { delay_halt_fn = delay_halt_mwaitx; diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 5053119fb488..cd4af761cdde 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 30 /* N 32-bit words worth of info */ +#define NCAPINTS 31 /* N 32-bit words worth of info */ #define NBUGINTS 4 /* N 32-bit bug flags */ /* @@ -496,6 +496,9 @@ #define X86_FEATURE_HYGON_SM3 (29*32 + 1) /* "sm3" SM3 instructions */ #define X86_FEATURE_HYGON_SM4 (29*32 + 2) /* "sm4" SM4 instructions */ +/* Zhaoxin/Centaur-defined CPU features, CPUID level 0xC0000006, word 30 */ +#define X86_FEATURE_PAUSEOPT (30*32+ 0) /* ZHAOXIN PAUSEOPT */ + #define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ /* diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 42e13e6e3980..eb2157f552b6 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -86,6 +86,14 @@ */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) +#define MSR_PAUSEOPT_CONTROL 0x187f + +/* + * The time field is bit[31:2], but representing a 32bit value with + * bit[1:0] zero. + */ +#define MSR_PAUSEOPT_CONTROL_TIME_MASK (~0x03U) + /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf #define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2 -- Gitee From 5f755ee0f5fe5196fa4f7cf1a052722436d4949f Mon Sep 17 00:00:00 2001 From: LeoLiu-oc Date: Fri, 6 Mar 2026 17:49:03 +0800 Subject: [PATCH 3/3] KVM: x86: Introduce support for Zhaoxin PAUSEOPT instruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for the PAUSEOPT instruction, a Zhaoxin-specific counterpart to Intel’s TPAUSE. Two key differences distinguish PAUSEOPT from TPAUSE: 1. PAUSEOPT uses a delta TSC, calculated as the smaller of (MSR_PAUSEOPT_CONTROL[31:2] << 2) and the EDX:EAX input to PAUSEOPT, subtracted from the current TSC. TPAUSE, by contrast, uses a target TSC computed from the smaller of (MSR_IA32_UMWAIT_CONTROL[31:2] << 2) and its EDX:EAX input. 2. PAUSEOPT currently supports only the C0.1 optimization state, whereas TPAUSE may support both C0.1 and C0.2 states. This feature depends on a pending QEMU patch to recognize PAUSEOPT, as well as the preceding patch in this series that adds Linux kernel support for PAUSEOPT. Guest Behavioral notes: - Writes to the PAUSEOPT/TPAUSE CONTROL MSR are ignored (WRMSR is nopped). - Executing PAUSEOPT or TPAUSE succeeds in entering the optimized state for the duration specified by EDX:EAX. - External interrupts and other defined events will break out of this optimized state. - On any VM exit that interrupts PAUSEOPT, such as an external-interrupt VM exit, if the VMM will resume execution at the instruction following PAUSEOPT, the software must clear the PAUSEOPT_TARGET_TSC field in the VMCS. This cleanup is implemented in this patch. Signed-off-by: LeoLiu-oc --- arch/x86/include/asm/msr-index.h | 10 ++ arch/x86/include/asm/vmx.h | 7 + arch/x86/include/asm/vmxfeatures.h | 5 +- arch/x86/include/uapi/asm/vmx.h | 1 + arch/x86/kernel/cpu/feat_ctl.c | 28 ++++ arch/x86/kvm/cpuid.c | 14 +- arch/x86/kvm/reverse_cpuid.h | 1 + arch/x86/kvm/vmx/capabilities.h | 12 ++ arch/x86/kvm/vmx/vmcs.h | 1 + arch/x86/kvm/vmx/vmx.c | 205 ++++++++++++++++++++++++++ arch/x86/kvm/vmx/vmx.h | 21 +++ arch/x86/kvm/x86.c | 5 + tools/arch/x86/include/uapi/asm/vmx.h | 1 + 13 files changed, 308 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 62d22086a2d9..f7d2e6306ea0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -832,6 +832,13 @@ #define MSR_VIA_RNG 0x0000110b #define MSR_VIA_BCR2 0x00001147 +/* + * Zhaoxin extend VMCS capabilities: + * bit 0: exec-cntl3 VMCS field. + */ +#define MSR_ZX_EXT_VMCS_CAPS 0x1675 +#define MSR_ZX_VMCS_EXEC_CTL3_EN BIT(0) + /* Transmeta defined MSRs */ #define MSR_TMTA_LONGRUN_CTRL 0x80868010 #define MSR_TMTA_LONGRUN_FLAGS 0x80868011 @@ -1168,6 +1175,9 @@ #define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 +/* Zhaoxin VT MSRs */ +#define MSR_ZX_VMX_PROCBASED_CTLS3 0x12A7 + /* VMX_BASIC bits and bitmasks */ #define VMX_BASIC_VMCS_SIZE_SHIFT 32 #define VMX_BASIC_TRUE_CTLS (1ULL << 55) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 0e73616b82f3..ebd9e13c5f59 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -84,6 +84,11 @@ */ #define TERTIARY_EXEC_IPI_VIRT VMCS_CONTROL_BIT(IPI_VIRT) +/* + * Definitions of Zhaoxin Tertiary Processor-Based VM-Execution Controls. + */ +#define ZX_TERTIARY_EXEC_GUEST_PAUSEOPT VMCS_CONTROL_BIT(GUEST_PAUSEOPT) + #define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) #define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) #define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS) @@ -235,6 +240,7 @@ enum vmcs_field { TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035, PID_POINTER_TABLE = 0x00002042, PID_POINTER_TABLE_HIGH = 0x00002043, + PAUSEOPT_TARGET_TSC = 0x00002200, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, @@ -284,6 +290,7 @@ enum vmcs_field { PLE_GAP = 0x00004020, PLE_WINDOW = 0x00004022, NOTIFY_WINDOW = 0x00004024, + ZX_TERTIARY_VM_EXEC_CONTROL = 0x00004200, VM_INSTRUCTION_ERROR = 0x00004400, VM_EXIT_REASON = 0x00004402, VM_EXIT_INTR_INFO = 0x00004404, diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index c6a7eed03914..eaafc0fabf61 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -5,7 +5,7 @@ /* * Defines VMX CPU feature bits */ -#define NVMXINTS 5 /* N 32-bit words worth of info */ +#define NVMXINTS 6 /* N 32-bit words worth of info */ /* * Note: If the comment begins with a quoted string, that string is used @@ -89,4 +89,7 @@ /* Tertiary Processor-Based VM-Execution Controls, word 3 */ #define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */ + +/* Zhaoxin Tertiary Processor-Based VM-Execution Controls, word 3 */ +#define VMX_FEATURE_GUEST_PAUSEOPT ( 4*32+ 0) /* pauseopt instruction in guest mode */ #endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index a5faf6d88f1b..afab76af3e43 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -90,6 +90,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_PAUSEOPT 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index 03851240c3e3..964f067f0b2d 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -17,11 +17,37 @@ enum vmx_feature_leafs { SECONDARY_CTLS, TERTIARY_CTLS_LOW, TERTIARY_CTLS_HIGH, + ZX_TERTIARY_CTLS, NR_VMX_FEATURE_WORDS, }; #define VMX_F(x) BIT(VMX_FEATURE_##x & 0x1f) +static void init_zhaoxin_ext_capabilities(struct cpuinfo_x86 *c) +{ + u32 ext_vmcs_cap = 0; + u32 proc_based_ctls3_high = 0; + u32 ign, msr_high; + int err; + + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN || + boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)) + return; + + err = rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &ext_vmcs_cap, &ign); + + if (!(ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3_EN)) + return; + + err = rdmsr_safe(MSR_ZX_VMX_PROCBASED_CTLS3, &ign, &msr_high); + if (!(msr_high & 0x1)) /* CTLS3 MSR doesn't exist */ + proc_based_ctls3_high = 0x1; /* set PAUSEOPT(bit0) */ + else + proc_based_ctls3_high = msr_high; + + c->vmx_capability[ZX_TERTIARY_CTLS] = proc_based_ctls3_high; +} + static void init_vmx_capabilities(struct cpuinfo_x86 *c) { u32 supported, funcs, ept, vpid, ign, low, high; @@ -97,6 +123,8 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_EPT_AD); if (c->vmx_capability[MISC_FEATURES] & VMX_F(VPID)) set_cpu_cap(c, X86_FEATURE_VPID); + + init_zhaoxin_ext_capabilities(c); } #endif /* CONFIG_X86_VMX_FEATURE_NAMES */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c9f27a4df96f..b21c921fc9c0 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -867,6 +867,12 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); kvm_cpu_cap_clear(X86_FEATURE_RDPID); } + + /* + * Do not hide any features supported by this leaf, allow the guest to see + * the original information.Now leaf 0xC000_0006 EAX only supports PAUSEOPT. + */ + kvm_cpu_cap_mask(CPUID_C000_0006_EAX, F(PAUSEOPT)); } EXPORT_SYMBOL_GPL(kvm_set_cpu_caps); @@ -1412,17 +1418,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) } /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: - /*Just support up to 0xC0000004 now*/ - entry->eax = min(entry->eax, 0xC0000004); + /* Extended to 0xC0000006 */ + entry->eax = min(entry->eax, 0xC0000006); break; case 0xC0000001: cpuid_entry_override(entry, CPUID_C000_0001_EDX); break; + case 0xC0000006: + cpuid_entry_override(entry, CPUID_C000_0006_EAX); + break; case 3: /* Processor serial number */ case 5: /* MONITOR/MWAIT */ case 0xC0000002: case 0xC0000003: case 0xC0000004: + case 0xC0000005: default: entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 74ef96cb2752..5fe7761bb97c 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -86,6 +86,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, + [CPUID_C000_0006_EAX] = {0xc0000006, 0, CPUID_EAX}, [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 41a4533f9989..f287396720a9 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -61,6 +61,7 @@ struct vmcs_config { u32 cpu_based_exec_ctrl; u32 cpu_based_2nd_exec_ctrl; u64 cpu_based_3rd_exec_ctrl; + u32 zx_cpu_based_3rd_exec_ctrl; u32 vmexit_ctrl; u32 vmentry_ctrl; u64 misc; @@ -138,6 +139,11 @@ static inline bool cpu_has_tertiary_exec_ctrls(void) CPU_BASED_ACTIVATE_TERTIARY_CONTROLS; } +static inline bool cpu_has_zx_tertiary_exec_ctrls(void) +{ + return !!vmcs_config.zx_cpu_based_3rd_exec_ctrl; +} + static inline bool cpu_has_vmx_virtualize_apic_accesses(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -255,6 +261,12 @@ static inline bool cpu_has_vmx_xsaves(void) SECONDARY_EXEC_ENABLE_XSAVES; } +static inline bool cpu_has_vmx_pauseopt(void) +{ + return vmcs_config.zx_cpu_based_3rd_exec_ctrl & + ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; +} + static inline bool cpu_has_vmx_waitpkg(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 7c1996b433e2..0e07394f02dd 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -51,6 +51,7 @@ struct vmcs_controls_shadow { u32 exec; u32 secondary_exec; u64 tertiary_exec; + u32 zx_tertiary_exec; }; /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 247bdad01cda..5280e3dd4f51 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1987,6 +1987,24 @@ int vmx_get_feature_msr(u32 msr, u64 *data) } } +static int zx_vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!is_zhaoxin_cpu()) + return KVM_MSR_RET_UNHANDLED; + + switch (msr_info->index) { + case MSR_PAUSEOPT_CONTROL: + if (!msr_info->host_initiated && !vmx_guest_pauseopt_enabled(vmx)) + return 1; + msr_info->data = vmx->msr_pauseopt_control; + return 0; + default: + return KVM_MSR_RET_UNHANDLED; /* Non-zhaoxin MSRs */ + } +} + /* * Reads an msr value (of 'msr_info->index') into 'msr_info->data'. * Returns 0 on success, non-0 otherwise. @@ -1997,6 +2015,17 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmx_uret_msr *msr; u32 index; + int ret = 0; + + ret = zx_vmx_get_msr(vcpu, msr_info); + switch (ret) { + case 0: + case 1: + return ret; + case KVM_MSR_RET_UNHANDLED: + ret = 0; + break; + } switch (msr_info->index) { #ifdef CONFIG_X86_64 @@ -2182,6 +2211,31 @@ bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) return !invalid; } +static int zx_vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 msr_index = msr_info->index; + u64 data = msr_info->data; + + if (!is_zhaoxin_cpu()) + return KVM_MSR_RET_UNHANDLED; + + switch (msr_index) { + case MSR_PAUSEOPT_CONTROL: + if (!msr_info->host_initiated && !vmx_guest_pauseopt_enabled(vmx)) + return 1; + + /* The reserved bit 1 and non-32 bit [63:32] should be zero */ + if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) + return 1; + + vmx->msr_pauseopt_control = data; + return 0; + default: + return KVM_MSR_RET_UNHANDLED; /* Non-zhaoxin MSRs*/ + } +} + /* * Writes msr value into the appropriate "register". * Returns 0 on success, non-0 otherwise. @@ -2196,6 +2250,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u64 data = msr_info->data; u32 index; + ret = zx_vmx_set_msr(vcpu, msr_info); + switch (ret) { + case 0: + case 1: + return ret; + case KVM_MSR_RET_UNHANDLED: + ret = 0; + break; + } + switch (msr_index) { case MSR_EFER: ret = kvm_set_msr_common(vcpu, msr_info); @@ -2586,6 +2650,44 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) return ctl_opt & allowed; } +static int setup_zhaoxin_vmcs_controls(struct vmcs_config *vmcs_conf) +{ + u32 zx_ext_vmcs_cap, msr_high, ign; + u32 zx_ctl3 = 0; + int ret; + + if (!is_zhaoxin_cpu()) + return 0; + + /* + * Zhaoxin uses MSR_ZX_EXT_VMCS_CAPS to enumerate the 3rd CPU-based + * control, rather than a bit in the 2nd CPU-based control. + */ + rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &zx_ext_vmcs_cap, &ign); + if (!(zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3_EN)) + return 0; + + ret = rdmsr_safe(MSR_ZX_VMX_PROCBASED_CTLS3, &ign, &msr_high); + if (msr_high & 0x1) { + /* ZX CPU with ZX_VMX_PROCBASED_CTLS3 support */ + ret = adjust_vmx_controls(KVM_REQUIRED_VMX_ZX_TERTIARY_VM_EXEC_CONTROL, + KVM_OPTIONAL_VMX_ZX_TERTIARY_VM_EXEC_CONTROL, + MSR_ZX_VMX_PROCBASED_CTLS3, &zx_ctl3); + if (ret) + return -EIO; + } else { + /* ZX CPU without ZX_VMX_PROCBASED_CTLS3 support: + * assume PAUSEOPT is supported and set that bit + */ + zx_ctl3 |= ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; + } + + /* Will be exetended in the future for more 3rd controls */ + vmcs_conf->zx_cpu_based_3rd_exec_ctrl = zx_ctl3; + + return 0; +} + static int setup_vmcs_config(struct vmcs_config *vmcs_conf, struct vmx_capability *vmx_cap) { @@ -2714,6 +2816,9 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, _vmexit_control &= ~x_ctrl; } + if (setup_zhaoxin_vmcs_controls(vmcs_conf)) + return -EIO; + rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ @@ -4545,6 +4650,26 @@ static u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx) return exec_control; } +/* + * We might need to modify the way the third level control corrections + * are handled here in the future by introducing a check using the + * CTLS3 MSR. The current hardware does not include the design for CTLS3, + * but the designer is attempting to add this MSR implementation + * through ucode. + */ +static u32 zx_vmx_tertiary_exec_control(struct vcpu_vmx *vmx) +{ + struct kvm_vcpu *vcpu = &vmx->vcpu; + u32 exec_control = vmcs_config.zx_cpu_based_3rd_exec_ctrl; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_PAUSEOPT)) + exec_control &= ~ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; + + /* Adjust other features here */ + + return exec_control; +} + /* * Adjust a single secondary execution control bit to intercept/allow an * instruction in the guest. This is usually done based on whether or not a @@ -4731,6 +4856,25 @@ static int vmx_vcpu_precreate(struct kvm *kvm) #define VMX_XSS_EXIT_BITMAP 0 +static void zx_setup_3rd_ctrls(struct vcpu_vmx *vmx) +{ + if (cpu_has_zx_tertiary_exec_ctrls()) { + zx_tertiary_exec_controls_set(vmx, zx_vmx_tertiary_exec_control(vmx)); + /* + * Regardless of whether the guest has PAUSEOPT support or not, + * as long as there is a 3rd control, we need to initialize this + * field to 0 + */ + if (cpu_has_vmx_pauseopt()) + vmcs_write64(PAUSEOPT_TARGET_TSC, 0); + } +} + +static void zx_init_vmcs(struct vcpu_vmx *vmx) +{ + zx_setup_3rd_ctrls(vmx); +} + static void init_vmcs(struct vcpu_vmx *vmx) { struct kvm *kvm = vmx->vcpu.kvm; @@ -4847,6 +4991,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmcs_write32(TPR_THRESHOLD, 0); } + zx_init_vmcs(vmx); vmx_setup_uret_msrs(vmx); } @@ -4888,6 +5033,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vmx->msr_pauseopt_control = 0; + vmx->pauseopt_in_progress = false; + vmx->pauseopt_rip = 0; vmx->msr_ia32_umwait_control = 0; @@ -6315,6 +6463,18 @@ void dump_vmcs(struct kvm_vcpu *vcpu) pr_err("VMCS %p, last attempted VM-entry on CPU %d\n", vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu); + + if (cpu_has_zx_tertiary_exec_ctrls()) { + /* + * Now zhaoxin only support specific vmcs fields on 3rd exec control, + * may exetend in the future. + */ + pr_err("*** Zhaoxin Specific Fields ***\n"); + pr_err("Zhaoxin TertiaryExec Cntl = 0x%016x\n", + vmcs_read32(ZX_TERTIARY_VM_EXEC_CONTROL)); + pr_err("PAUSEOPT Saved TSC = 0x%016llx\n", vmcs_read64(PAUSEOPT_TARGET_TSC)); + } + pr_err("*** Guest State ***\n"); pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), @@ -7362,6 +7522,37 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, guest_state_exit_irqoff(); } +static void zx_vmx_vcpu_run_pre(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long new_rip; + + if (vmx->pauseopt_in_progress) { + new_rip = kvm_rip_read(vcpu); + if (new_rip != vmx->pauseopt_rip) { + /* + * When the execution of PAUSEOPT in the guest is interrupted by + * other events, causing a vmexit, the pauseopt target tsc should be + * cleared to zero before the next vmentry if guest rip changed, + * avoiding re-enter pauseopt optimized state after enter guest. + */ + vmcs_write64(PAUSEOPT_TARGET_TSC, 0); + vmx->pauseopt_in_progress = false; + vmx->pauseopt_rip = 0; + } + } +} + +static void zx_vmx_vcpu_run_post(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (cpu_has_vmx_pauseopt() && vmcs_read64(PAUSEOPT_TARGET_TSC)) { + vmx->pauseopt_in_progress = true; + vmx->pauseopt_rip = kvm_rip_read(vcpu); + } +} + static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) { bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT; @@ -7408,6 +7599,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP)) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vcpu->arch.regs_dirty = 0; + if (is_zhaoxin_cpu()) + zx_vmx_vcpu_run_pre(vcpu); if (run_flags & KVM_RUN_LOAD_GUEST_DR6) set_debugreg(vcpu->arch.dr6, 6); @@ -7513,6 +7706,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) return EXIT_FASTPATH_NONE; vmx->loaded_vmcs->launched = 1; + if (is_zhaoxin_cpu()) + zx_vmx_vcpu_run_post(vcpu); vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); @@ -7868,6 +8063,8 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmcs_set_secondary_exec_control(vmx, vmx_secondary_exec_control(vmx)); + zx_setup_3rd_ctrls(vmx); + if (guest_can_use(vcpu, X86_FEATURE_VMX)) vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX | @@ -7978,6 +8175,12 @@ static __init u64 vmx_get_perf_capabilities(void) return perf_cap; } +static void zx_vmx_set_cpu_caps(void) +{ + if (cpu_has_vmx_pauseopt()) + kvm_cpu_cap_check_and_set(X86_FEATURE_PAUSEOPT); +} + static __init void vmx_set_cpu_caps(void) { kvm_set_cpu_caps(); @@ -8026,6 +8229,8 @@ static __init void vmx_set_cpu_caps(void) if (cpu_has_vmx_waitpkg()) kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); + + zx_vmx_set_cpu_caps(); } static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 30c86e88eb84..2404c7957d7b 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -279,6 +279,9 @@ struct vcpu_vmx { u64 spec_ctrl; u32 msr_ia32_umwait_control; + u32 msr_pauseopt_control; + bool pauseopt_in_progress; + unsigned long pauseopt_rip; /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a @@ -602,6 +605,9 @@ static inline u8 vmx_get_rvi(void) #define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \ (TERTIARY_EXEC_IPI_VIRT) +#define KVM_REQUIRED_VMX_ZX_TERTIARY_VM_EXEC_CONTROL 0 +#define KVM_OPTIONAL_VMX_ZX_TERTIARY_VM_EXEC_CONTROL (ZX_TERTIARY_EXEC_GUEST_PAUSEOPT) + #define BUILD_CONTROLS_SHADOW(lname, uname, bits) \ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \ { \ @@ -634,6 +640,7 @@ BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32) BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32) BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32) BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) +BUILD_CONTROLS_SHADOW(zx_tertiary_exec, ZX_TERTIARY_VM_EXEC_CONTROL, 32) /* * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the @@ -736,6 +743,12 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; } +static inline bool vmx_guest_pauseopt_enabled(struct vcpu_vmx *vmx) +{ + return zx_tertiary_exec_controls_get(vmx) & + ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; +} + static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) { if (!enable_ept) @@ -779,4 +792,12 @@ static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu) to_vmx(vcpu)->nested.enlightened_vmcs_enabled; } +static inline bool is_zhaoxin_cpu(void) +{ + /* Now zhaoxin owns 2 x86 vendor brands, Zhaoxin and Centaur */ + return (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN || + boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR); +} + +#define KVM_MSR_RET_UNHANDLED 2 #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0e6d18764a4e..ef8a9684500d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -345,6 +345,7 @@ static const u32 msrs_to_save_base[] = { MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, MSR_IA32_UMWAIT_CONTROL, + MSR_PAUSEOPT_CONTROL, MSR_IA32_XFD, MSR_IA32_XFD_ERR, }; @@ -7256,6 +7257,10 @@ static void kvm_probe_msr_to_save(u32 msr_index) if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG)) return; break; + case MSR_PAUSEOPT_CONTROL: + if (!kvm_cpu_cap_has(X86_FEATURE_PAUSEOPT)) + return; + break; case MSR_IA32_RTIT_CTL: case MSR_IA32_RTIT_STATUS: if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index a5faf6d88f1b..afab76af3e43 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -90,6 +90,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_PAUSEOPT 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 -- Gitee