diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 16d0b022d6fff66ca5c19b403ca098b826f79614..1195554bb92c64ec53d496f15d4c1efec29b615d 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -29,3 +29,8 @@ config AS_WRUSS def_bool $(as-instr64,wrussq %rax$(comma)(%rbx)) help Supported by binutils >= 2.31 and LLVM integrated assembler + +config AS_PAUSEOPT + def_bool $(as-instr,pauseopt) + help + Supported by binutils >= xxx-TBD and LLVM integrated assembler xxx-TBD diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0193a9d8c2beac91f33182da997913ee64bc73b0..564b678fbbbd8551f4c7918f6f4b47b8b6d79913 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -49,7 +49,7 @@ enum cpuid_leafs * CPUID_C000_0006_EAX must keep at the 30th position (count from 0)! * NR_CPUID_WORDS can not bigger than 31 (the cpuid_leafs only 32 bits!). */ - CPUID_C000_0006_EAX, /* 30 */ + CPUID_C000_0006_EAX, NR_CPUID_WORDS, }; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c06a62851611bbbe1a9d1c30875972df79c81890..671e82b5ee7433c62733e40b93777e899ca86e99 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -494,9 +494,6 @@ #define X86_FEATURE_HYGON_SM3 (29*32 + 1) /* "sm3" SM3 instructions */ #define X86_FEATURE_HYGON_SM4 (29*32 + 2) /* "sm4" SM4 instructions */ -/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000006, word 21 */ -#define X86_FEATURE_ZXPAUSE (30*32 + 0) /* ZHAOXIN ZXPAUSE */ - /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -517,6 +514,9 @@ #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* "" Clear CPU buffers using VERW before VMRUN */ #define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ +/* Zhaoxin/Centaur-defined CPU features, CPUID level 0xC0000006, word 30 */ +#define X86_FEATURE_PAUSEOPT (30*32+ 0) /* ZHAOXIN PAUSEOPT */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index 4dbb3fea67fb510c051de76b86b08d322caf3521..c844077f19b6f7bb0e3b81bc790a5fc0e93d4a1d 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -7,7 +7,7 @@ void __init use_tsc_delay(void); void __init use_tpause_delay(void); -void __init use_zxpause_delay(void); +void __init use_pauseopt_delay(void); void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 334a284615efaf2cb1db6c61cfee22078393d6cf..f7d2e6306ea0c738d5f94fae26fbc1c85765086a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -75,22 +75,18 @@ #define MSR_IA32_UMWAIT_CONTROL 0xe1 #define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) #define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) - -#define MSR_ZX_PAUSE_CONTROL 0x187f -#define MSR_ZX_PAUSE_CONTROL_C02_DISABLE BIT(0) -#define MSR_ZX_PAUSE_CONTROL_RESERVED BIT(1) - /* * The time field is bit[31:2], but representing a 32bit value with * bit[1:0] zero. */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) +#define MSR_PAUSEOPT_CONTROL 0x187f /* * The time field is bit[31:2], but representing a 32bit value with * bit[1:0] zero. */ -#define MSR_ZX_PAUSE_CONTROL_TIME_MASK (~0x03U) +#define MSR_PAUSEOPT_CONTROL_TIME_MASK (~0x03U) /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf @@ -838,10 +834,10 @@ /* * Zhaoxin extend VMCS capabilities: - * bit 0: exec-cntl3 VMCS field. + * bit 0: exec-cntl3 VMCS field. */ -#define MSR_ZX_EXT_VMCS_CAPS 0x1675 -#define MSR_ZX_VMCS_EXEC_CTL3 BIT(0) +#define MSR_ZX_EXT_VMCS_CAPS 0x1675 +#define MSR_ZX_VMCS_EXEC_CTL3_EN BIT(0) /* Transmeta defined MSRs */ #define MSR_TMTA_LONGRUN_CTRL 0x80868010 @@ -1179,6 +1175,9 @@ #define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 +/* Zhaoxin VT MSRs */ +#define MSR_ZX_VMX_PROCBASED_CTLS3 0x12A7 + /* VMX_BASIC bits and bitmasks */ #define VMX_BASIC_VMCS_SIZE_SHIFT 32 #define VMX_BASIC_TRUE_CTLS (1ULL << 55) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index bb293f9f9a2c8bb0116fb7998e32bec99cad2eef..d480006b6360c31ba03c142faa160383cfce7d32 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -26,7 +26,7 @@ #define TPAUSE_C01_STATE 1 #define TPAUSE_C02_STATE 0 -#define ZXPAUSE_C01_STATE 1 +#define PAUSEOPT_P01_STATE 1 static __always_inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) @@ -154,22 +154,22 @@ static inline void __tpause(u32 ecx, u32 edx, u32 eax) } /* - * Caller can specify whether to enter C0.1 (low latency, less - * power saving) or C0.2 state (saves more power, but longer wakeup - * latency). This may be overridden by the ZX_PAUSE_CONTROL MSR - * which can force requests for C0.2 to be downgraded to C0.1. + * Caller can specify to enter P0.1 (low latency, less power saving). */ -static inline void __zxpause(u32 ecx, u32 edx, u32 eax) +static inline void __pauseopt(u32 ecx, u32 edx, u32 eax) { - /* "zxpause %ecx, %edx, %eax;" */ - #ifdef CONFIG_AS_ZXPAUSE - asm volatile("zxpause %%ecx\n" - : - : "c"(ecx), "d"(edx), "a"(eax)); - #else - asm volatile(".byte 0xf2, 0x0f, 0xa6, 0xd0\t\n" - : - : "c"(ecx), "d"(edx), "a"(eax)); - #endif + /* "pauseopt %ecx, %edx, %eax;" */ +#ifdef CONFIG_AS_PAUSEOPT + asm volatile( + "pauseopt\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); +#else + asm volatile( + ".byte 0xf2, 0x0f, 0xa6, 0xd0\t\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); +#endif } + #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 32dc7414b83be2a61f494e6e8e4bcdbb0b706eb2..ebd9e13c5f59f6f5043c77c9cc1c9525dad3423a 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -87,8 +87,7 @@ /* * Definitions of Zhaoxin Tertiary Processor-Based VM-Execution Controls. */ -#define ZX_TERTIARY_EXEC_GUEST_ZXPAUSE VMCS_CONTROL_BIT(GUEST_ZXPAUSE) - +#define ZX_TERTIARY_EXEC_GUEST_PAUSEOPT VMCS_CONTROL_BIT(GUEST_PAUSEOPT) #define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) #define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) @@ -241,7 +240,7 @@ enum vmcs_field { TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035, PID_POINTER_TABLE = 0x00002042, PID_POINTER_TABLE_HIGH = 0x00002043, - ZXPAUSE_VMEXIT_TSC = 0x00002200, + PAUSEOPT_TARGET_TSC = 0x00002200, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, @@ -291,7 +290,7 @@ enum vmcs_field { PLE_GAP = 0x00004020, PLE_WINDOW = 0x00004022, NOTIFY_WINDOW = 0x00004024, - ZX_TERTIARY_VM_EXEC_CONTROL = 0x00004200, + ZX_TERTIARY_VM_EXEC_CONTROL = 0x00004200, VM_INSTRUCTION_ERROR = 0x00004400, VM_EXIT_REASON = 0x00004402, VM_EXIT_INTR_INFO = 0x00004404, diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index ff4b573dd191b605d107e46e7ae1765b9cbd9153..eaafc0fabf619249302b161d8f853e303b8a5054 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -87,10 +87,9 @@ #define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */ #define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* VM-Exit when no event windows after notify window */ -/* Zhaoxin Tertiary Processor-Based VM-Execution Controls, word 3 */ -#define VMX_FEATURE_GUEST_ZXPAUSE (3*32 + 0) /* zxpause instruction in guest mode */ - -/* Tertiary Processor-Based VM-Execution Controls, word 4 */ -#define VMX_FEATURE_IPI_VIRT (4*32 + 4) /* Enable IPI virtualization */ +/* Tertiary Processor-Based VM-Execution Controls, word 3 */ +#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */ +/* Zhaoxin Tertiary Processor-Based VM-Execution Controls, word 3 */ +#define VMX_FEATURE_GUEST_PAUSEOPT ( 4*32+ 0) /* pauseopt instruction in guest mode */ #endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index a5faf6d88f1bf614a997e120bb364d8c695ab94a..afab76af3e43b79e64843d503b341cb9b880b83f 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -90,6 +90,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_PAUSEOPT 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 09bb2e72b7a3ef74de31a2d8bc4fc2a9e8a6ad3f..1acb62a04312ea5ae00900f7b0d2dc9c19f460fd 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -26,7 +26,7 @@ obj-y += bugs.o obj-y += aperfmperf.o obj-y += cpuid-deps.o obj-y += umwait.o -obj-y += zxpause.o +obj-y += pauseopt.o obj-y += capflags.o powerflags.o obj-$(CONFIG_X86_LOCAL_APIC) += topology.o diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index f16f19b06527e18d7f85316ab9e2bced62c099cc..964f067f0b2d0f9af2e680e26cb14678aac9f7dc 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -15,14 +15,39 @@ enum vmx_feature_leafs { MISC_FEATURES = 0, PRIMARY_CTLS, SECONDARY_CTLS, - ZX_TERTIARY_CTLS, TERTIARY_CTLS_LOW, TERTIARY_CTLS_HIGH, + ZX_TERTIARY_CTLS, NR_VMX_FEATURE_WORDS, }; #define VMX_F(x) BIT(VMX_FEATURE_##x & 0x1f) +static void init_zhaoxin_ext_capabilities(struct cpuinfo_x86 *c) +{ + u32 ext_vmcs_cap = 0; + u32 proc_based_ctls3_high = 0; + u32 ign, msr_high; + int err; + + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN || + boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)) + return; + + err = rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &ext_vmcs_cap, &ign); + + if (!(ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3_EN)) + return; + + err = rdmsr_safe(MSR_ZX_VMX_PROCBASED_CTLS3, &ign, &msr_high); + if (!(msr_high & 0x1)) /* CTLS3 MSR doesn't exist */ + proc_based_ctls3_high = 0x1; /* set PAUSEOPT(bit0) */ + else + proc_based_ctls3_high = msr_high; + + c->vmx_capability[ZX_TERTIARY_CTLS] = proc_based_ctls3_high; +} + static void init_vmx_capabilities(struct cpuinfo_x86 *c) { u32 supported, funcs, ept, vpid, ign, low, high; @@ -98,13 +123,8 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_EPT_AD); if (c->vmx_capability[MISC_FEATURES] & VMX_F(VPID)) set_cpu_cap(c, X86_FEATURE_VPID); - /* - * Initialize Zhaoxin Tertiary Exec Control feature flags. - */ - rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &supported, &ign); - if (supported & MSR_ZX_VMCS_EXEC_CTL3) - c->vmx_capability[ZX_TERTIARY_CTLS] |= VMX_F(GUEST_ZXPAUSE); + init_zhaoxin_ext_capabilities(c); } #endif /* CONFIG_X86_VMX_FEATURE_NAMES */ diff --git a/arch/x86/kernel/cpu/zxpause.c b/arch/x86/kernel/cpu/pauseopt.c similarity index 40% rename from arch/x86/kernel/cpu/zxpause.c rename to arch/x86/kernel/cpu/pauseopt.c index 7f55f5d9e8c0cbb70c30c260705040901eee2a53..58a490da7b3e5e17d890c70b3d38a7ad95158bb3 100644 --- a/arch/x86/kernel/cpu/zxpause.c +++ b/arch/x86/kernel/cpu/pauseopt.c @@ -6,41 +6,37 @@ #include #include -#define ZXPAUSE_C02_ENABLE 0 - -#define ZXPAUSE_CTRL_VAL(max_time, c02_disable) \ - (((max_time) & MSR_ZX_PAUSE_CONTROL_TIME_MASK) | \ - ((c02_disable) & MSR_ZX_PAUSE_CONTROL_C02_DISABLE)) +#define PAUSEOPT_CTRL_VAL(max_time) (((max_time) & MSR_PAUSEOPT_CONTROL_TIME_MASK)) /* - * Cache ZX_PAUSE_CONTROL MSR. This is a systemwide control. By default, - * zxpause max time is 100000 in TSC-quanta and C0.2 is enabled + * Cache PAUSEOPT_CONTROL MSR. This is a systemwide control. By default, + * pauseopt max time is 100000 in TSC-quanta and P0.1 is enabled. */ -static u32 zxpause_control_cached = ZXPAUSE_CTRL_VAL(100000, ZXPAUSE_C02_ENABLE); +static u32 pauseopt_control_cached = PAUSEOPT_CTRL_VAL(100000); /* - * Cache the original ZX_PAUSE_CONTROL MSR value which is configured by + * Cache the original PAUSEOPT_CONTROL MSR value which is configured by * hardware or BIOS before kernel boot. */ -static u32 orig_zxpause_control_cached __ro_after_init; +static u32 orig_pauseopt_control_cached __ro_after_init; /* - * Serialize access to zxpause_control_cached and ZX_PAUSE_CONTROL MSR in + * Serialize access to pauseopt_control_cached and PAUSEOPT_CONTROL MSR in * the sysfs write functions. */ -static DEFINE_MUTEX(zxpause_lock); +static DEFINE_MUTEX(pauseopt_lock); -static void zxpause_update_control_msr(void *unused) +static void pauseopt_update_control_msr(void *unused) { lockdep_assert_irqs_disabled(); - wrmsr(MSR_ZX_PAUSE_CONTROL, READ_ONCE(zxpause_control_cached), 0); + wrmsr(MSR_PAUSEOPT_CONTROL, READ_ONCE(pauseopt_control_cached), 0); } /* * The CPU hotplug callback sets the control MSR to the global control * value. * - * Disable interrupts so the read of zxpause_control_cached and the WRMSR + * Disable interrupts so the read of pauseopt_control_cached and the WRMSR * are protected against a concurrent sysfs write. Otherwise the sysfs * write could update the cached value after it had been read on this CPU * and issue the IPI before the old value had been written. The IPI would @@ -51,10 +47,10 @@ static void zxpause_update_control_msr(void *unused) * value or the IPI is updating this CPU to the new control value after * interrupts have been reenabled. */ -static int zxpause_cpu_online(unsigned int cpu) +static int pauseopt_cpu_online(unsigned int cpu) { local_irq_disable(); - zxpause_update_control_msr(NULL); + pauseopt_update_control_msr(NULL); local_irq_enable(); return 0; } @@ -63,21 +59,21 @@ static int zxpause_cpu_online(unsigned int cpu) * The CPU hotplug callback sets the control MSR to the original control * value. */ -static int zxpause_cpu_offline(unsigned int cpu) +static int pauseopt_cpu_offline(unsigned int cpu) { /* * This code is protected by the CPU hotplug already and - * orig_zxpause_control_cached is never changed after it caches - * the original control MSR value in zxpause_init(). So there + * orig_pauseopt_control_cached is never changed after it caches + * the original control MSR value in pauseopt_init(). So there * is no race condition here. */ - wrmsr(MSR_ZX_PAUSE_CONTROL, orig_zxpause_control_cached, 0); + wrmsr(MSR_PAUSEOPT_CONTROL, orig_pauseopt_control_cached, 0); return 0; } /* - * On resume, restore ZX_PAUSE_CONTROL MSR on the boot processor which + * On resume, restore PAUSEOPT_CONTROL MSR on the boot processor which * is the only active CPU at this time. The MSR is set up on the APs via the * CPU hotplug callback. * @@ -86,81 +82,51 @@ static int zxpause_cpu_offline(unsigned int cpu) * trust the firmware nor does it matter if the same value is written * again. */ -static void zxpause_syscore_resume(void) +static void pauseopt_syscore_resume(void) { - zxpause_update_control_msr(NULL); + pauseopt_update_control_msr(NULL); } -static struct syscore_ops zxpause_syscore_ops = { - .resume = zxpause_syscore_resume, +static struct syscore_ops pauseopt_syscore_ops = { + .resume = pauseopt_syscore_resume, }; /* sysfs interface */ -/* - * When bit 0 in ZX_PAUSE_CONTROL MSR is 1, C0.2 is disabled. - * Otherwise, C0.2 is enabled. - */ -static inline bool zxpause_ctrl_c02_enabled(u32 ctrl) -{ - return !(ctrl & MSR_ZX_PAUSE_CONTROL_C02_DISABLE); -} - -static inline u32 zxpause_ctrl_max_time(u32 ctrl) +static inline u32 pauseopt_ctrl_max_time(u32 ctrl) { - return ctrl & MSR_ZX_PAUSE_CONTROL_TIME_MASK; + return ctrl & MSR_PAUSEOPT_CONTROL_TIME_MASK; } -static inline void zxpause_update_control(u32 maxtime, bool c02_enable) +static inline void pauseopt_update_control(u32 maxtime) { - u32 ctrl = maxtime & MSR_ZX_PAUSE_CONTROL_TIME_MASK; - - if (!c02_enable) - ctrl |= MSR_ZX_PAUSE_CONTROL_C02_DISABLE; + u32 ctrl = maxtime & MSR_PAUSEOPT_CONTROL_TIME_MASK; - WRITE_ONCE(zxpause_control_cached, ctrl); + WRITE_ONCE(pauseopt_control_cached, ctrl); /* Propagate to all CPUs */ - on_each_cpu(zxpause_update_control_msr, NULL, 1); + on_each_cpu(pauseopt_update_control_msr, NULL, 1); } static ssize_t -enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf) +enable_p01_show(struct device *dev, struct device_attribute *attr, char *buf) { - u32 ctrl = READ_ONCE(zxpause_control_cached); + u32 ret; - return sprintf(buf, "%d\n", zxpause_ctrl_c02_enabled(ctrl)); -} + if (boot_cpu_has(X86_FEATURE_PAUSEOPT)) + ret = 1; + else + ret = 0; -static ssize_t enable_c02_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - bool c02_enable; - u32 ctrl; - int ret; - - ret = kstrtobool(buf, &c02_enable); - if (ret) - return ret; - - mutex_lock(&zxpause_lock); - - ctrl = READ_ONCE(zxpause_control_cached); - if (c02_enable != zxpause_ctrl_c02_enabled(ctrl)) - zxpause_update_control(ctrl, c02_enable); - - mutex_unlock(&zxpause_lock); - - return count; + return sprintf(buf, "%d\n", ret); } -static DEVICE_ATTR_RW(enable_c02); +static DEVICE_ATTR_RO(enable_p01); static ssize_t max_time_show(struct device *kobj, struct device_attribute *attr, char *buf) { - u32 ctrl = READ_ONCE(zxpause_control_cached); + u32 ctrl = READ_ONCE(pauseopt_control_cached); - return sprintf(buf, "%u\n", zxpause_ctrl_max_time(ctrl)); + return sprintf(buf, "%u\n", pauseopt_ctrl_max_time(ctrl)); } static ssize_t max_time_store(struct device *kobj, @@ -175,49 +141,49 @@ static ssize_t max_time_store(struct device *kobj, return ret; /* bits[1:0] must be zero */ - if (max_time & ~MSR_ZX_PAUSE_CONTROL_TIME_MASK) + if (max_time & ~MSR_PAUSEOPT_CONTROL_TIME_MASK) return -EINVAL; - mutex_lock(&zxpause_lock); + mutex_lock(&pauseopt_lock); - ctrl = READ_ONCE(zxpause_control_cached); - if (max_time != zxpause_ctrl_max_time(ctrl)) - zxpause_update_control(max_time, zxpause_ctrl_c02_enabled(ctrl)); + ctrl = READ_ONCE(pauseopt_control_cached); + if (max_time != pauseopt_ctrl_max_time(ctrl)) + pauseopt_update_control(max_time); - mutex_unlock(&zxpause_lock); + mutex_unlock(&pauseopt_lock); return count; } static DEVICE_ATTR_RW(max_time); -static struct attribute *zxpause_attrs[] = { - &dev_attr_enable_c02.attr, +static struct attribute *pauseopt_attrs[] = { + &dev_attr_enable_p01.attr, &dev_attr_max_time.attr, NULL }; -static struct attribute_group zxpause_attr_group = { - .attrs = zxpause_attrs, - .name = "zxpause_control", +static struct attribute_group pauseopt_attr_group = { + .attrs = pauseopt_attrs, + .name = "pauseopt_control", }; -static int __init zxpause_init(void) +static int __init pauseopt_init(void) { struct device *dev; int ret; - if (!boot_cpu_has(X86_FEATURE_ZXPAUSE)) + if (!boot_cpu_has(X86_FEATURE_PAUSEOPT)) return -ENODEV; /* * Cache the original control MSR value before the control MSR is - * changed. This is the only place where orig_zxpause_control_cached + * changed. This is the only place where orig_pauseopt_control_cached * is modified. */ - rdmsrl(MSR_ZX_PAUSE_CONTROL, orig_zxpause_control_cached); + rdmsrl(MSR_PAUSEOPT_CONTROL, orig_pauseopt_control_cached); - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "zxpause:online", - zxpause_cpu_online, zxpause_cpu_offline); + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "pauseopt:online", + pauseopt_cpu_online, pauseopt_cpu_offline); if (ret < 0) { /* * On failure, the control MSR on all CPUs has the @@ -226,13 +192,17 @@ static int __init zxpause_init(void) return ret; } - register_syscore_ops(&zxpause_syscore_ops); + register_syscore_ops(&pauseopt_syscore_ops); /* - * Add zxpause control interface. Ignore failure, so at least the + * Add pauseopt control interface. Ignore failure, so at least the * default values are set up in case the machine manages to boot. */ dev = bus_get_dev_root(&cpu_subsys); - return sysfs_create_group(&dev->kobj, &zxpause_attr_group); + if (dev) { + ret = sysfs_create_group(&dev->kobj, &pauseopt_attr_group); + put_device(dev); + } + return ret; } -device_initcall(zxpause_init); +device_initcall(pauseopt_init); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 6a6c8bd7843c67c0e30fe3ed8e03921517fc546a..99355d27415ee3b52ba0bb84edc1ae806a298310 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -85,8 +85,8 @@ static __init void x86_late_time_init(void) if (static_cpu_has(X86_FEATURE_WAITPKG)) use_tpause_delay(); - if (static_cpu_has(X86_FEATURE_ZXPAUSE)) - use_zxpause_delay(); + if (static_cpu_has(X86_FEATURE_PAUSEOPT)) + use_pauseopt_delay(); } /* diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 697cdc1c57ad8892fd1be38628767065d3bbf5e5..b21c921fc9c03573b23af3da2bf13b87e03a4a17 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -853,10 +853,6 @@ void kvm_set_cpu_caps(void) F(PMM) | F(PMM_EN) ); - /* Zhaoxin 0xC0000006 leaf */ - kvm_cpu_cap_mask(CPUID_C000_0006_EAX, 0 /* bit0: zxpause */ | 0 /* bit1 HMAC */); - - /* * Hide RDTSCP and RDPID if either feature is reported as supported but * probing MSR_TSC_AUX failed. This is purely a sanity check and @@ -871,6 +867,12 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); kvm_cpu_cap_clear(X86_FEATURE_RDPID); } + + /* + * Do not hide any features supported by this leaf, allow the guest to see + * the original information.Now leaf 0xC000_0006 EAX only supports PAUSEOPT. + */ + kvm_cpu_cap_mask(CPUID_C000_0006_EAX, F(PAUSEOPT)); } EXPORT_SYMBOL_GPL(kvm_set_cpu_caps); @@ -1425,7 +1427,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) case 0xC0000006: cpuid_entry_override(entry, CPUID_C000_0006_EAX); break; - case 3: /* Processor serial number */ case 5: /* MONITOR/MWAIT */ case 0xC0000002: diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 867d8ee96c50f3859fede75237d0df739287ecc7..5fe7761bb97c0a06b537043df5cb4eea8ff06bbc 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -86,6 +86,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, + [CPUID_C000_0006_EAX] = {0xc0000006, 0, CPUID_EAX}, [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, @@ -104,7 +105,6 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, - [CPUID_C000_0006_EAX] = {0xc0000006, 0, CPUID_EAX}, [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, }; diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 631e65a212285073924d90df28ea1ee981efe747..f287396720a92a437bdde7bb1841817daa5d6f3c 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -60,8 +60,8 @@ struct vmcs_config { u32 pin_based_exec_ctrl; u32 cpu_based_exec_ctrl; u32 cpu_based_2nd_exec_ctrl; - u32 zx_cpu_based_3rd_exec_ctrl; u64 cpu_based_3rd_exec_ctrl; + u32 zx_cpu_based_3rd_exec_ctrl; u32 vmexit_ctrl; u32 vmentry_ctrl; u64 misc; @@ -139,6 +139,11 @@ static inline bool cpu_has_tertiary_exec_ctrls(void) CPU_BASED_ACTIVATE_TERTIARY_CONTROLS; } +static inline bool cpu_has_zx_tertiary_exec_ctrls(void) +{ + return !!vmcs_config.zx_cpu_based_3rd_exec_ctrl; +} + static inline bool cpu_has_vmx_virtualize_apic_accesses(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -256,10 +261,10 @@ static inline bool cpu_has_vmx_xsaves(void) SECONDARY_EXEC_ENABLE_XSAVES; } -static inline bool cpu_has_vmx_zxpause(void) +static inline bool cpu_has_vmx_pauseopt(void) { return vmcs_config.zx_cpu_based_3rd_exec_ctrl & - ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; + ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; } static inline bool cpu_has_vmx_waitpkg(void) diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 4eabed8e5813af72bac6e9d9377747cdf3a219fa..0e07394f02dd37b210f36ac60ecd6626117b0d26 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -50,9 +50,8 @@ struct vmcs_controls_shadow { u32 pin; u32 exec; u32 secondary_exec; - u32 zx_tertiary_exec; u64 tertiary_exec; - u64 zx_vmexit_tsc; + u32 zx_tertiary_exec; }; /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8725a059c4c6de8c4555383b9e2394149c70d24c..5280e3dd4f51f6102888631f7f7829431a1cfbba 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -220,7 +220,6 @@ int __read_mostly pt_mode = PT_MODE_SYSTEM; module_param(pt_mode, int, S_IRUGO); #endif -static u32 zx_ext_vmcs_cap; struct x86_pmu_lbr __ro_after_init vmx_lbr_caps; static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); @@ -1988,6 +1987,24 @@ int vmx_get_feature_msr(u32 msr, u64 *data) } } +static int zx_vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!is_zhaoxin_cpu()) + return KVM_MSR_RET_UNHANDLED; + + switch (msr_info->index) { + case MSR_PAUSEOPT_CONTROL: + if (!msr_info->host_initiated && !vmx_guest_pauseopt_enabled(vmx)) + return 1; + msr_info->data = vmx->msr_pauseopt_control; + return 0; + default: + return KVM_MSR_RET_UNHANDLED; /* Non-zhaoxin MSRs */ + } +} + /* * Reads an msr value (of 'msr_info->index') into 'msr_info->data'. * Returns 0 on success, non-0 otherwise. @@ -1998,6 +2015,17 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmx_uret_msr *msr; u32 index; + int ret = 0; + + ret = zx_vmx_get_msr(vcpu, msr_info); + switch (ret) { + case 0: + case 1: + return ret; + case KVM_MSR_RET_UNHANDLED: + ret = 0; + break; + } switch (msr_info->index) { #ifdef CONFIG_X86_64 @@ -2021,11 +2049,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_UMWAIT_CONTROL: if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) return 1; - msr_info->data = vmx->msr_ia32_umwait_control; - break; - case MSR_ZX_PAUSE_CONTROL: - if (!msr_info->host_initiated && !vmx_guest_zxpause_enabled(vmx)) - return 1; + msr_info->data = vmx->msr_ia32_umwait_control; break; case MSR_IA32_SPEC_CTRL: @@ -2187,6 +2211,31 @@ bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) return !invalid; } +static int zx_vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 msr_index = msr_info->index; + u64 data = msr_info->data; + + if (!is_zhaoxin_cpu()) + return KVM_MSR_RET_UNHANDLED; + + switch (msr_index) { + case MSR_PAUSEOPT_CONTROL: + if (!msr_info->host_initiated && !vmx_guest_pauseopt_enabled(vmx)) + return 1; + + /* The reserved bit 1 and non-32 bit [63:32] should be zero */ + if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) + return 1; + + vmx->msr_pauseopt_control = data; + return 0; + default: + return KVM_MSR_RET_UNHANDLED; /* Non-zhaoxin MSRs*/ + } +} + /* * Writes msr value into the appropriate "register". * Returns 0 on success, non-0 otherwise. @@ -2201,6 +2250,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u64 data = msr_info->data; u32 index; + ret = zx_vmx_set_msr(vcpu, msr_info); + switch (ret) { + case 0: + case 1: + return ret; + case KVM_MSR_RET_UNHANDLED: + ret = 0; + break; + } + switch (msr_index) { case MSR_EFER: ret = kvm_set_msr_common(vcpu, msr_info); @@ -2294,15 +2353,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* The reserved bit 1 and non-32 bit [63:32] should be zero */ if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) return 1; - vmx->msr_ia32_umwait_control = data; - break; - case MSR_ZX_PAUSE_CONTROL: - if (!msr_info->host_initiated && !vmx_guest_zxpause_enabled(vmx)) - return 1; - /* The reserved bit 1 and non-32 bit [63:32] should be zero */ - if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) - return 1; vmx->msr_ia32_umwait_control = data; break; case MSR_IA32_SPEC_CTRL: @@ -2599,6 +2650,44 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) return ctl_opt & allowed; } +static int setup_zhaoxin_vmcs_controls(struct vmcs_config *vmcs_conf) +{ + u32 zx_ext_vmcs_cap, msr_high, ign; + u32 zx_ctl3 = 0; + int ret; + + if (!is_zhaoxin_cpu()) + return 0; + + /* + * Zhaoxin uses MSR_ZX_EXT_VMCS_CAPS to enumerate the 3rd CPU-based + * control, rather than a bit in the 2nd CPU-based control. + */ + rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &zx_ext_vmcs_cap, &ign); + if (!(zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3_EN)) + return 0; + + ret = rdmsr_safe(MSR_ZX_VMX_PROCBASED_CTLS3, &ign, &msr_high); + if (msr_high & 0x1) { + /* ZX CPU with ZX_VMX_PROCBASED_CTLS3 support */ + ret = adjust_vmx_controls(KVM_REQUIRED_VMX_ZX_TERTIARY_VM_EXEC_CONTROL, + KVM_OPTIONAL_VMX_ZX_TERTIARY_VM_EXEC_CONTROL, + MSR_ZX_VMX_PROCBASED_CTLS3, &zx_ctl3); + if (ret) + return -EIO; + } else { + /* ZX CPU without ZX_VMX_PROCBASED_CTLS3 support: + * assume PAUSEOPT is supported and set that bit + */ + zx_ctl3 |= ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; + } + + /* Will be exetended in the future for more 3rd controls */ + vmcs_conf->zx_cpu_based_3rd_exec_ctrl = zx_ctl3; + + return 0; +} + static int setup_vmcs_config(struct vmcs_config *vmcs_conf, struct vmx_capability *vmx_cap) { @@ -2727,6 +2816,9 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, _vmexit_control &= ~x_ctrl; } + if (setup_zhaoxin_vmcs_controls(vmcs_conf)) + return -EIO; + rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ @@ -2758,10 +2850,6 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, vmcs_conf->vmentry_ctrl = _vmentry_control; vmcs_conf->misc = misc_msr; - /* Setup Zhaoxin exec-cntl3 VMCS field. */ - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) - vmcs_conf->zx_cpu_based_3rd_exec_ctrl |= ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; - #if IS_ENABLED(CONFIG_HYPERV) if (enlightened_vmcs) evmcs_sanitize_exec_ctrls(vmcs_conf); @@ -4562,24 +4650,22 @@ static u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx) return exec_control; } -static u32 vmx_zx_tertiary_exec_control(struct vcpu_vmx *vmx) +/* + * We might need to modify the way the third level control corrections + * are handled here in the future by introducing a check using the + * CTLS3 MSR. The current hardware does not include the design for CTLS3, + * but the designer is attempting to add this MSR implementation + * through ucode. + */ +static u32 zx_vmx_tertiary_exec_control(struct vcpu_vmx *vmx) { struct kvm_vcpu *vcpu = &vmx->vcpu; u32 exec_control = vmcs_config.zx_cpu_based_3rd_exec_ctrl; - /* - * Show errors if Qemu wants to enable guest_zxpause while - * vmx not support it. - */ - if (guest_cpuid_has(vcpu, X86_FEATURE_ZXPAUSE)) { - if (!cpu_has_vmx_zxpause()) - pr_err("VMX not support guest_zxpause!\n"); - else - exec_control |= ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; - } else - exec_control &= ~ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; + if (!guest_cpuid_has(vcpu, X86_FEATURE_PAUSEOPT)) + exec_control &= ~ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; - /* enable other features here */ + /* Adjust other features here */ return exec_control; } @@ -4770,6 +4856,25 @@ static int vmx_vcpu_precreate(struct kvm *kvm) #define VMX_XSS_EXIT_BITMAP 0 +static void zx_setup_3rd_ctrls(struct vcpu_vmx *vmx) +{ + if (cpu_has_zx_tertiary_exec_ctrls()) { + zx_tertiary_exec_controls_set(vmx, zx_vmx_tertiary_exec_control(vmx)); + /* + * Regardless of whether the guest has PAUSEOPT support or not, + * as long as there is a 3rd control, we need to initialize this + * field to 0 + */ + if (cpu_has_vmx_pauseopt()) + vmcs_write64(PAUSEOPT_TARGET_TSC, 0); + } +} + +static void zx_init_vmcs(struct vcpu_vmx *vmx) +{ + zx_setup_3rd_ctrls(vmx); +} + static void init_vmcs(struct vcpu_vmx *vmx) { struct kvm *kvm = vmx->vcpu.kvm; @@ -4791,11 +4896,6 @@ static void init_vmcs(struct vcpu_vmx *vmx) if (cpu_has_secondary_exec_ctrls()) secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx)); - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) { - zx_tertiary_exec_controls_set(vmx, vmx_zx_tertiary_exec_control(vmx)); - zx_vmexit_tsc_controls_set(vmx, 0); - } - if (cpu_has_tertiary_exec_ctrls()) tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx)); @@ -4891,6 +4991,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmcs_write32(TPR_THRESHOLD, 0); } + zx_init_vmcs(vmx); vmx_setup_uret_msrs(vmx); } @@ -4932,6 +5033,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vmx->msr_pauseopt_control = 0; + vmx->pauseopt_in_progress = false; + vmx->pauseopt_rip = 0; vmx->msr_ia32_umwait_control = 0; @@ -6357,15 +6461,20 @@ void dump_vmcs(struct kvm_vcpu *vcpu) else tertiary_exec_control = 0; - pr_err("*** Zhaoxin Specific Fields ***\n"); - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) { + pr_err("VMCS %p, last attempted VM-entry on CPU %d\n", + vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu); + + if (cpu_has_zx_tertiary_exec_ctrls()) { + /* + * Now zhaoxin only support specific vmcs fields on 3rd exec control, + * may exetend in the future. + */ + pr_err("*** Zhaoxin Specific Fields ***\n"); pr_err("Zhaoxin TertiaryExec Cntl = 0x%016x\n", - vmcs_read32(ZX_TERTIARY_VM_EXEC_CONTROL)); - pr_err("ZXPAUSE Saved TSC = 0x%016llx\n", vmcs_read64(ZXPAUSE_VMEXIT_TSC)); + vmcs_read32(ZX_TERTIARY_VM_EXEC_CONTROL)); + pr_err("PAUSEOPT Saved TSC = 0x%016llx\n", vmcs_read64(PAUSEOPT_TARGET_TSC)); } - pr_err("VMCS %p, last attempted VM-entry on CPU %d\n", - vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu); pr_err("*** Guest State ***\n"); pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), @@ -7413,6 +7522,37 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, guest_state_exit_irqoff(); } +static void zx_vmx_vcpu_run_pre(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long new_rip; + + if (vmx->pauseopt_in_progress) { + new_rip = kvm_rip_read(vcpu); + if (new_rip != vmx->pauseopt_rip) { + /* + * When the execution of PAUSEOPT in the guest is interrupted by + * other events, causing a vmexit, the pauseopt target tsc should be + * cleared to zero before the next vmentry if guest rip changed, + * avoiding re-enter pauseopt optimized state after enter guest. + */ + vmcs_write64(PAUSEOPT_TARGET_TSC, 0); + vmx->pauseopt_in_progress = false; + vmx->pauseopt_rip = 0; + } + } +} + +static void zx_vmx_vcpu_run_post(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (cpu_has_vmx_pauseopt() && vmcs_read64(PAUSEOPT_TARGET_TSC)) { + vmx->pauseopt_in_progress = true; + vmx->pauseopt_rip = kvm_rip_read(vcpu); + } +} + static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) { bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT; @@ -7459,6 +7599,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP)) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vcpu->arch.regs_dirty = 0; + if (is_zhaoxin_cpu()) + zx_vmx_vcpu_run_pre(vcpu); if (run_flags & KVM_RUN_LOAD_GUEST_DR6) set_debugreg(vcpu->arch.dr6, 6); @@ -7564,6 +7706,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) return EXIT_FASTPATH_NONE; vmx->loaded_vmcs->launched = 1; + if (is_zhaoxin_cpu()) + zx_vmx_vcpu_run_post(vcpu); vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); @@ -7919,10 +8063,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmcs_set_secondary_exec_control(vmx, vmx_secondary_exec_control(vmx)); - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) { - zx_tertiary_exec_controls_set(vmx, vmx_zx_tertiary_exec_control(vmx)); - zx_vmexit_tsc_controls_set(vmx, 0); - } + zx_setup_3rd_ctrls(vmx); if (guest_can_use(vcpu, X86_FEATURE_VMX)) vmx->msr_ia32_feature_control_valid_bits |= @@ -8034,6 +8175,12 @@ static __init u64 vmx_get_perf_capabilities(void) return perf_cap; } +static void zx_vmx_set_cpu_caps(void) +{ + if (cpu_has_vmx_pauseopt()) + kvm_cpu_cap_check_and_set(X86_FEATURE_PAUSEOPT); +} + static __init void vmx_set_cpu_caps(void) { kvm_set_cpu_caps(); @@ -8083,9 +8230,7 @@ static __init void vmx_set_cpu_caps(void) if (cpu_has_vmx_waitpkg()) kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); - if (cpu_has_vmx_zxpause()) - kvm_cpu_cap_check_and_set(X86_FEATURE_ZXPAUSE); - + zx_vmx_set_cpu_caps(); } static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, @@ -8652,10 +8797,6 @@ static __init int hardware_setup(void) unsigned long host_bndcfgs; struct desc_ptr dt; int r; - u32 ign; - - /* Caches Zhaoxin extend VMCS capabilities. */ - rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &zx_ext_vmcs_cap, &ign); store_idt(&dt); host_idt_base = dt.address; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 7c997b07c85d4a7b27c1b491cd27bd64fc9ce2cf..2404c7957d7b39e8ab49b3852432f578e2cfd678 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -279,6 +279,9 @@ struct vcpu_vmx { u64 spec_ctrl; u32 msr_ia32_umwait_control; + u32 msr_pauseopt_control; + bool pauseopt_in_progress; + unsigned long pauseopt_rip; /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a @@ -603,15 +606,7 @@ static inline u8 vmx_get_rvi(void) (TERTIARY_EXEC_IPI_VIRT) #define KVM_REQUIRED_VMX_ZX_TERTIARY_VM_EXEC_CONTROL 0 -#define KVM_OPTIONAL_VMX_ZX_TERTIARY_VM_EXEC_CONTROL \ - (ZX_TERTIARY_EXEC_GUEST_ZXPAUSE) - -/* - * We shouldn't rw zxpause_vmexit_tsc vmcs field in this - * way, try to use another way in the future. - */ -#define KVM_REQUIRED_VMX_ZXPAUSE_VMEXIT_TSC 0 -#define KVM_OPTIONAL_VMX_ZXPAUSE_VMEXIT_TSC 1 +#define KVM_OPTIONAL_VMX_ZX_TERTIARY_VM_EXEC_CONTROL (ZX_TERTIARY_EXEC_GUEST_PAUSEOPT) #define BUILD_CONTROLS_SHADOW(lname, uname, bits) \ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \ @@ -646,7 +641,6 @@ BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32) BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32) BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) BUILD_CONTROLS_SHADOW(zx_tertiary_exec, ZX_TERTIARY_VM_EXEC_CONTROL, 32) -BUILD_CONTROLS_SHADOW(zx_vmexit_tsc, ZXPAUSE_VMEXIT_TSC, 64) /* * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the @@ -749,10 +743,10 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; } -static inline bool vmx_guest_zxpause_enabled(struct vcpu_vmx *vmx) +static inline bool vmx_guest_pauseopt_enabled(struct vcpu_vmx *vmx) { return zx_tertiary_exec_controls_get(vmx) & - ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; + ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; } static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) @@ -798,4 +792,12 @@ static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu) to_vmx(vcpu)->nested.enlightened_vmcs_enabled; } +static inline bool is_zhaoxin_cpu(void) +{ + /* Now zhaoxin owns 2 x86 vendor brands, Zhaoxin and Centaur */ + return (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN || + boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR); +} + +#define KVM_MSR_RET_UNHANDLED 2 #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 21855275e86700bc160dcdb0122a0a85c5547cad..ef8a9684500dd27b0f1cc45a09ae6a9aeac4a3d1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -345,7 +345,7 @@ static const u32 msrs_to_save_base[] = { MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, MSR_IA32_UMWAIT_CONTROL, - MSR_ZX_PAUSE_CONTROL, + MSR_PAUSEOPT_CONTROL, MSR_IA32_XFD, MSR_IA32_XFD_ERR, }; @@ -7257,8 +7257,8 @@ static void kvm_probe_msr_to_save(u32 msr_index) if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG)) return; break; - case MSR_ZX_PAUSE_CONTROL: - if (!kvm_cpu_cap_has(X86_FEATURE_ZXPAUSE)) + case MSR_PAUSEOPT_CONTROL: + if (!kvm_cpu_cap_has(X86_FEATURE_PAUSEOPT)) return; break; case MSR_IA32_RTIT_CTL: diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 3946badbd78fd7058a871b3d6779766a8f0c606d..96bf5b3baacd399f69d662fabe140b7cf739bfc6 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -118,12 +118,12 @@ static void delay_halt_tpause(u64 start, u64 cycles) } /* - * On ZHAOXIN the ZXPAUSE instruction waits until any of: + * On ZHAOXIN the PAUSEOPT instruction waits until any of: * 1) the delta of TSC counter exceeds the value provided in EDX:EAX - * 2) global timeout in ZX_PAUSE_CONTROL is exceeded + * 2) global timeout in PAUSEOPT_CONTROL is exceeded * 3) an external interrupt occurs */ -static void delay_halt_zxpause(u64 unused, u64 cycles) +static void delay_halt_pauseopt(u64 unused, u64 cycles) { u64 until = cycles; u32 eax, edx; @@ -131,11 +131,7 @@ static void delay_halt_zxpause(u64 unused, u64 cycles) eax = lower_32_bits(until); edx = upper_32_bits(until); - /* - * Hard code the deeper (C0.1) sleep state because exit latency is - * small compared to the "microseconds" that usleep() will delay. - */ - __zxpause(ZXPAUSE_C01_STATE, edx, eax); + __pauseopt(PAUSEOPT_P01_STATE, edx, eax); } /* @@ -204,9 +200,9 @@ void __init use_tpause_delay(void) delay_fn = delay_halt; } -void __init use_zxpause_delay(void) +void __init use_pauseopt_delay(void) { - delay_halt_fn = delay_halt_zxpause; + delay_halt_fn = delay_halt_pauseopt; delay_fn = delay_halt; } diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index b8e6840fb997867dac6b6fc57834483837bd116d..cd4af761cddef5cbd7604aeebb89f5c9265d066d 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -496,8 +496,8 @@ #define X86_FEATURE_HYGON_SM3 (29*32 + 1) /* "sm3" SM3 instructions */ #define X86_FEATURE_HYGON_SM4 (29*32 + 2) /* "sm4" SM4 instructions */ -/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000006, word 21 */ -#define X86_FEATURE_ZXPAUSE (30*32 + 0) /* ZHAOXIN ZXPAUSE */ +/* Zhaoxin/Centaur-defined CPU features, CPUID level 0xC0000006, word 30 */ +#define X86_FEATURE_PAUSEOPT (30*32+ 0) /* ZHAOXIN PAUSEOPT */ #define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 38d71379a7811eec24792b9d064a883a13105aa5..eb2157f552b60708e70cd4f1a9ed9b6cfc6176d8 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -86,11 +86,13 @@ */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) +#define MSR_PAUSEOPT_CONTROL 0x187f + /* * The time field is bit[31:2], but representing a 32bit value with * bit[1:0] zero. */ -#define MSR_ZX_PAUSE_CONTROL_TIME_MASK (~0x03U) +#define MSR_PAUSEOPT_CONTROL_TIME_MASK (~0x03U) /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf @@ -835,13 +837,6 @@ #define MSR_VIA_RNG 0x0000110b #define MSR_VIA_BCR2 0x00001147 -/* - * Zhaoxin extend VMCS capabilities: - * bit 0: exec-cntl3 VMCS field. - */ -#define MSR_ZX_EXT_VMCS_CAPS 0x1675 -#define MSR_ZX_VMCS_EXEC_CTL3 BIT(0) - /* Transmeta defined MSRs */ #define MSR_TMTA_LONGRUN_CTRL 0x80868010 #define MSR_TMTA_LONGRUN_FLAGS 0x80868011 diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index a5faf6d88f1bf614a997e120bb364d8c695ab94a..afab76af3e43b79e64843d503b341cb9b880b83f 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -90,6 +90,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_PAUSEOPT 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75