diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 83dc7d6a0294428af9051d2441c05307eb4e469e..69b3a7c3001397f65f9cfde541b042347827ee98 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -16,7 +16,7 @@ kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
 			   hyperv.o page_track.o debugfs.o
 
-kvm-intel-y		+= vmx/vmx.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
+kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
 kvm-amd-y		+= svm.o pmu_amd.o
 
 obj-$(CONFIG_KVM)	+= kvm.o
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index da16f022a52925c555eb98616ca358e013cdd97e..abbad93ed26a27bd32fb1a008c64acb18c06e67a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -19,8 +19,6 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
 static bool __read_mostly nested_early_check = 0;
 module_param(nested_early_check, bool, S_IRUGO);
 
-extern const ulong vmx_early_consistency_check_return;
-
 /*
  * Hyper-V requires all of these, so mark them as supported even though
  * they are just treated the same as all-context.
@@ -2715,7 +2713,7 @@ static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-static int __noclone nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
+static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long cr3, cr4;
@@ -2740,8 +2738,6 @@ static int __noclone nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 	 */
 	vmcs_writel(GUEST_RFLAGS, 0);
 
-	vmcs_writel(HOST_RIP, vmx_early_consistency_check_return);
-
 	cr3 = __get_current_cr3_fast();
 	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
 		vmcs_writel(HOST_CR3, cr3);
@@ -2758,33 +2754,27 @@ static int __noclone nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 
 	asm(
 		/* Set HOST_RSP */
+		"sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
 		__ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
-		"mov %%" _ASM_SP ", %c[host_rsp](%% " _ASM_CX")\n\t"
+		"mov %%" _ASM_SP ", %c[host_rsp](%1)\n\t"
+		"add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
 
 		/* Check if vmlaunch or vmresume is needed */
 		"cmpl $0, %c[launched](%% " _ASM_CX")\n\t"
-		"jne 1f\n\t"
-		__ex("vmlaunch") "\n\t"
-		"jmp 2f\n\t"
-		"1: " __ex("vmresume") "\n\t"
-		"2: "
+
+		"call vmx_vmenter\n\t"
+
 		/* Set vmx->fail accordingly */
 		"setbe %c[fail](%% " _ASM_CX")\n\t"
-
-		".pushsection .rodata\n\t"
-		".global vmx_early_consistency_check_return\n\t"
-		"vmx_early_consistency_check_return: " _ASM_PTR " 2b\n\t"
-		".popsection"
-	      :
+	      : ASM_CALL_CONSTRAINT
 	      : "c"(vmx), "d"((unsigned long)HOST_RSP),
 		[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
 		[fail]"i"(offsetof(struct vcpu_vmx, fail)),
-		[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp))
+		[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
+		[wordsize]"i"(sizeof(ulong))
 	      : "rax", "cc", "memory"
 	);
 
-	vmcs_writel(HOST_RIP, vmx_return);
-
 	preempt_enable();
 
 	if (vmx->msr_autoload.host.nr)
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
new file mode 100644
index 0000000000000000000000000000000000000000..bcef2c7e9bc48cf658c83dab5a95d45887ac92af
--- /dev/null
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+	.text
+
+/**
+ * vmx_vmenter - VM-Enter the current loaded VMCS
+ *
+ * %RFLAGS.ZF:	!VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
+ *
+ * Returns:
+ *	%RFLAGS.CF is set on VM-Fail Invalid
+ *	%RFLAGS.ZF is set on VM-Fail Valid
+ *	%RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
+ *
+ * Note that VMRESUME/VMLAUNCH fall-through and return directly if
+ * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
+ * to vmx_vmexit.
+ */
+ENTRY(vmx_vmenter)
+	/* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
+	je 2f
+
+1:	vmresume
+	ret
+
+2:	vmlaunch
+	ret
+
+3:	cmpb $0, kvm_rebooting
+	jne 4f
+	call kvm_spurious_fault
+4:	ret
+
+	.pushsection .fixup, "ax"
+5:	jmp 3b
+	.popsection
+
+	_ASM_EXTABLE(1b, 5b)
+	_ASM_EXTABLE(2b, 5b)
+
+ENDPROC(vmx_vmenter)
+
+/**
+ * vmx_vmexit - Handle a VMX VM-Exit
+ *
+ * Returns:
+ *	%RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
+ *
+ * This is vmx_vmenter's partner in crime.  On a VM-Exit, control will jump
+ * here after hardware loads the host's state, i.e. this is the destination
+ * referred to by VMCS.HOST_RIP.
+ */
+ENTRY(vmx_vmexit)
+	ret
+ENDPROC(vmx_vmexit)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e82319fbdceba8edd137f66b217fd538bb5dcfea..41d6f7081ff76f3894646d027ad4d84d25a87f6a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -336,6 +336,8 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
 static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
 							  u32 msr, int type);
 
+void vmx_vmexit(void);
+
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 /*
@@ -3773,7 +3775,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 	vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
 	vmx->host_idt_base = dt.address;
 
-	vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
+	vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */
 
 	rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
 	vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
@@ -6360,7 +6362,7 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
 	vmx->loaded_vmcs->hv_timer_armed = false;
 }
 
-static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long cr3, cr4, evmcs_rsp;
@@ -6440,6 +6442,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"push %%" _ASM_DX "; push %%" _ASM_BP ";"
 		"push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
 		"push %%" _ASM_CX " \n\t"
+		"sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
 		"cmp %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t"
 		"je 1f \n\t"
 		"mov %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t"
@@ -6451,6 +6454,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"2: \n\t"
 		__ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
 		"1: \n\t"
+		"add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
+
 		/* Reload cr2 if changed */
 		"mov %c[cr2](%%" _ASM_CX "), %%" _ASM_AX " \n\t"
 		"mov %%cr2, %%" _ASM_DX " \n\t"
@@ -6481,11 +6486,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %c[rcx](%%" _ASM_CX "), %%" _ASM_CX " \n\t"
 
 		/* Enter guest mode */
-		"jne 1f \n\t"
-		__ex("vmlaunch") "\n\t"
-		"jmp 2f \n\t"
-		"1: " __ex("vmresume") "\n\t"
-		"2: "
+		"call vmx_vmenter\n\t"
 
 		/* Save guest's RCX to the stack placeholder (see above) */
 		"mov %%" _ASM_CX ", %c[wordsize](%%" _ASM_SP ") \n\t"
@@ -6534,11 +6535,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"xor %%esi, %%esi \n\t"
 		"xor %%edi, %%edi \n\t"
 		"pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
-		".pushsection .rodata \n\t"
-		".global vmx_return \n\t"
-		"vmx_return: " _ASM_PTR " 2b \n\t"
-		".popsection"
-	      : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
+	      : ASM_CALL_CONSTRAINT
+	      : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
 		[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
 		[fail]"i"(offsetof(struct vcpu_vmx, fail)),
 		[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 20172c11d5f8fc084750dfefc441051a694be13d..99328954c2fc1c43d8573f727b0e9219a2828065 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -12,7 +12,6 @@
 #include "vmcs.h"
 
 extern const u32 vmx_msr_index[];
-extern const ulong vmx_return;
 extern u64 host_efer;
 
 #define MSR_TYPE_R	1