Commit 1e02ce4c authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Ingo Molnar
Browse files

x86: Store a per-cpu shadow copy of CR4



Context switches and TLB flushes can change individual bits of CR4.
CR4 reads take several cycles, so store a shadow copy of CR4 in a
per-cpu variable.

To avoid wasting a cache line, I added the CR4 shadow to
cpu_tlbstate, which is already touched in switch_mm.  The heaviest
users of the cr4 shadow will be switch_mm and __switch_to_xtra, and
__switch_to_xtra is called shortly after switch_mm during context
switch, so the cacheline is likely to be hot.
Signed-off-by: default avatarAndy Lutomirski <luto@amacapital.net>
Reviewed-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Vince Weaver <vince@deater.net>
Cc: "hillf.zj" <hillf.zj@alibaba-inc.com>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/3a54dd3353fffbf84804398e00dfdc5b7c1afd7d.1414190806.git.luto@amacapital.net

Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 375074cc
...@@ -80,16 +80,16 @@ static inline void write_cr3(unsigned long x) ...@@ -80,16 +80,16 @@ static inline void write_cr3(unsigned long x)
PVOP_VCALL1(pv_mmu_ops.write_cr3, x); PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
} }
static inline unsigned long read_cr4(void) static inline unsigned long __read_cr4(void)
{ {
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
} }
static inline unsigned long read_cr4_safe(void) static inline unsigned long __read_cr4_safe(void)
{ {
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
} }
static inline void write_cr4(unsigned long x) static inline void __write_cr4(unsigned long x)
{ {
PVOP_VCALL1(pv_cpu_ops.write_cr4, x); PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
} }
......
...@@ -137,17 +137,17 @@ static inline void write_cr3(unsigned long x) ...@@ -137,17 +137,17 @@ static inline void write_cr3(unsigned long x)
native_write_cr3(x); native_write_cr3(x);
} }
static inline unsigned long read_cr4(void) static inline unsigned long __read_cr4(void)
{ {
return native_read_cr4(); return native_read_cr4();
} }
static inline unsigned long read_cr4_safe(void) static inline unsigned long __read_cr4_safe(void)
{ {
return native_read_cr4_safe(); return native_read_cr4_safe();
} }
static inline void write_cr4(unsigned long x) static inline void __write_cr4(unsigned long x)
{ {
native_write_cr4(x); native_write_cr4(x);
} }
......
...@@ -15,14 +15,37 @@ ...@@ -15,14 +15,37 @@
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
#endif #endif
struct tlb_state {
#ifdef CONFIG_SMP
struct mm_struct *active_mm;
int state;
#endif
/*
* Access to this CR4 shadow and to H/W CR4 is protected by
* disabling interrupts when modifying either one.
*/
unsigned long cr4;
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
{
this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
}
/* Set in this cpu's CR4. */ /* Set in this cpu's CR4. */
static inline void cr4_set_bits(unsigned long mask) static inline void cr4_set_bits(unsigned long mask)
{ {
unsigned long cr4; unsigned long cr4;
cr4 = read_cr4(); cr4 = this_cpu_read(cpu_tlbstate.cr4);
cr4 |= mask; if ((cr4 | mask) != cr4) {
write_cr4(cr4); cr4 |= mask;
this_cpu_write(cpu_tlbstate.cr4, cr4);
__write_cr4(cr4);
}
} }
/* Clear in this cpu's CR4. */ /* Clear in this cpu's CR4. */
...@@ -30,9 +53,18 @@ static inline void cr4_clear_bits(unsigned long mask) ...@@ -30,9 +53,18 @@ static inline void cr4_clear_bits(unsigned long mask)
{ {
unsigned long cr4; unsigned long cr4;
cr4 = read_cr4(); cr4 = this_cpu_read(cpu_tlbstate.cr4);
cr4 &= ~mask; if ((cr4 & ~mask) != cr4) {
write_cr4(cr4); cr4 &= ~mask;
this_cpu_write(cpu_tlbstate.cr4, cr4);
__write_cr4(cr4);
}
}
/* Read the CR4 shadow. */
static inline unsigned long cr4_read_shadow(void)
{
return this_cpu_read(cpu_tlbstate.cr4);
} }
/* /*
...@@ -61,7 +93,7 @@ static inline void __native_flush_tlb_global_irq_disabled(void) ...@@ -61,7 +93,7 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
{ {
unsigned long cr4; unsigned long cr4;
cr4 = native_read_cr4(); cr4 = this_cpu_read(cpu_tlbstate.cr4);
/* clear PGE */ /* clear PGE */
native_write_cr4(cr4 & ~X86_CR4_PGE); native_write_cr4(cr4 & ~X86_CR4_PGE);
/* write old PGE again and flush TLBs */ /* write old PGE again and flush TLBs */
...@@ -221,12 +253,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask, ...@@ -221,12 +253,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
#define TLBSTATE_OK 1 #define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2 #define TLBSTATE_LAZY 2
struct tlb_state {
struct mm_struct *active_mm;
int state;
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
static inline void reset_lazy_tlbstate(void) static inline void reset_lazy_tlbstate(void)
{ {
this_cpu_write(cpu_tlbstate.state, 0); this_cpu_write(cpu_tlbstate.state, 0);
......
...@@ -46,7 +46,7 @@ static inline void cpu_vmxoff(void) ...@@ -46,7 +46,7 @@ static inline void cpu_vmxoff(void)
static inline int cpu_vmx_enabled(void) static inline int cpu_vmx_enabled(void)
{ {
return read_cr4() & X86_CR4_VMXE; return __read_cr4() & X86_CR4_VMXE;
} }
/** Disable VMX if it is enabled on the current CPU /** Disable VMX if it is enabled on the current CPU
......
...@@ -78,7 +78,7 @@ int x86_acpi_suspend_lowlevel(void) ...@@ -78,7 +78,7 @@ int x86_acpi_suspend_lowlevel(void)
header->pmode_cr0 = read_cr0(); header->pmode_cr0 = read_cr0();
if (__this_cpu_read(cpu_info.cpuid_level) >= 0) { if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
header->pmode_cr4 = read_cr4(); header->pmode_cr4 = __read_cr4();
header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4); header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
} }
if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <asm/archrandom.h> #include <asm/archrandom.h>
#include <asm/hypervisor.h> #include <asm/hypervisor.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/tlbflush.h>
#include <asm/debugreg.h> #include <asm/debugreg.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/vsyscall.h> #include <asm/vsyscall.h>
...@@ -1293,6 +1294,12 @@ void cpu_init(void) ...@@ -1293,6 +1294,12 @@ void cpu_init(void)
wait_for_master_cpu(cpu); wait_for_master_cpu(cpu);
/*
* Initialize the CR4 shadow before doing anything that could
* try to read it.
*/
cr4_init_shadow();
/* /*
* Load microcode on this cpu if a valid microcode is available. * Load microcode on this cpu if a valid microcode is available.
* This is early microcode loading procedure. * This is early microcode loading procedure.
......
...@@ -138,8 +138,8 @@ static void prepare_set(void) ...@@ -138,8 +138,8 @@ static void prepare_set(void)
/* Save value of CR4 and clear Page Global Enable (bit 7) */ /* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_has_pge) { if (cpu_has_pge) {
cr4 = read_cr4(); cr4 = __read_cr4();
write_cr4(cr4 & ~X86_CR4_PGE); __write_cr4(cr4 & ~X86_CR4_PGE);
} }
/* /*
...@@ -171,7 +171,7 @@ static void post_set(void) ...@@ -171,7 +171,7 @@ static void post_set(void)
/* Restore value of CR4 */ /* Restore value of CR4 */
if (cpu_has_pge) if (cpu_has_pge)
write_cr4(cr4); __write_cr4(cr4);
} }
static void cyrix_set_arr(unsigned int reg, unsigned long base, static void cyrix_set_arr(unsigned int reg, unsigned long base,
......
...@@ -678,8 +678,8 @@ static void prepare_set(void) __acquires(set_atomicity_lock) ...@@ -678,8 +678,8 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Save value of CR4 and clear Page Global Enable (bit 7) */ /* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_has_pge) { if (cpu_has_pge) {
cr4 = read_cr4(); cr4 = __read_cr4();
write_cr4(cr4 & ~X86_CR4_PGE); __write_cr4(cr4 & ~X86_CR4_PGE);
} }
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
...@@ -708,7 +708,7 @@ static void post_set(void) __releases(set_atomicity_lock) ...@@ -708,7 +708,7 @@ static void post_set(void) __releases(set_atomicity_lock)
/* Restore value of CR4 */ /* Restore value of CR4 */
if (cpu_has_pge) if (cpu_has_pge)
write_cr4(cr4); __write_cr4(cr4);
raw_spin_unlock(&set_atomicity_lock); raw_spin_unlock(&set_atomicity_lock);
} }
......
...@@ -31,6 +31,7 @@ static void __init i386_default_early_setup(void) ...@@ -31,6 +31,7 @@ static void __init i386_default_early_setup(void)
asmlinkage __visible void __init i386_start_kernel(void) asmlinkage __visible void __init i386_start_kernel(void)
{ {
cr4_init_shadow();
sanitize_boot_params(&boot_params); sanitize_boot_params(&boot_params);
/* Call the subarch specific early setup function */ /* Call the subarch specific early setup function */
......
...@@ -155,6 +155,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) ...@@ -155,6 +155,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
(__START_KERNEL & PGDIR_MASK))); (__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
cr4_init_shadow();
/* Kill off the identity-map trampoline */ /* Kill off the identity-map trampoline */
reset_early_page_tables(); reset_early_page_tables();
......
...@@ -101,7 +101,7 @@ void __show_regs(struct pt_regs *regs, int all) ...@@ -101,7 +101,7 @@ void __show_regs(struct pt_regs *regs, int all)
cr0 = read_cr0(); cr0 = read_cr0();
cr2 = read_cr2(); cr2 = read_cr2();
cr3 = read_cr3(); cr3 = read_cr3();
cr4 = read_cr4_safe(); cr4 = __read_cr4_safe();
printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
cr0, cr2, cr3, cr4); cr0, cr2, cr3, cr4);
......
...@@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, int all) ...@@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, int all)
cr0 = read_cr0(); cr0 = read_cr0();
cr2 = read_cr2(); cr2 = read_cr2();
cr3 = read_cr3(); cr3 = read_cr3();
cr4 = read_cr4(); cr4 = __read_cr4();
printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
fs, fsindex, gs, gsindex, shadowgs); fs, fsindex, gs, gsindex, shadowgs);
......
...@@ -1178,7 +1178,7 @@ void __init setup_arch(char **cmdline_p) ...@@ -1178,7 +1178,7 @@ void __init setup_arch(char **cmdline_p)
if (boot_cpu_data.cpuid_level >= 0) { if (boot_cpu_data.cpuid_level >= 0) {
/* A CPU has %cr4 if and only if it has CPUID */ /* A CPU has %cr4 if and only if it has CPUID */
mmu_cr4_features = read_cr4(); mmu_cr4_features = __read_cr4();
if (trampoline_cr4_features) if (trampoline_cr4_features)
*trampoline_cr4_features = mmu_cr4_features; *trampoline_cr4_features = mmu_cr4_features;
} }
......
...@@ -1583,7 +1583,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) ...@@ -1583,7 +1583,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{ {
unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
if (cr4 & X86_CR4_VMXE) if (cr4 & X86_CR4_VMXE)
......
...@@ -2785,7 +2785,7 @@ static int hardware_enable(void) ...@@ -2785,7 +2785,7 @@ static int hardware_enable(void)
u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
u64 old, test_bits; u64 old, test_bits;
if (read_cr4() & X86_CR4_VMXE) if (cr4_read_shadow() & X86_CR4_VMXE)
return -EBUSY; return -EBUSY;
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
...@@ -4255,7 +4255,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) ...@@ -4255,7 +4255,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
/* Save the most likely value for this task's CR4 in the VMCS. */ /* Save the most likely value for this task's CR4 in the VMCS. */
cr4 = read_cr4(); cr4 = cr4_read_shadow();
vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
vmx->host_state.vmcs_host_cr4 = cr4; vmx->host_state.vmcs_host_cr4 = cr4;
...@@ -7784,7 +7784,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -7784,7 +7784,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
cr4 = read_cr4(); cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
vmcs_writel(HOST_CR4, cr4); vmcs_writel(HOST_CR4, cr4);
vmx->host_state.vmcs_host_cr4 = cr4; vmx->host_state.vmcs_host_cr4 = cr4;
......
...@@ -600,7 +600,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, ...@@ -600,7 +600,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
printk(nx_warning, from_kuid(&init_user_ns, current_uid())); printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
if (pte && pte_present(*pte) && pte_exec(*pte) && if (pte && pte_present(*pte) && pte_exec(*pte) &&
(pgd_flags(*pgd) & _PAGE_USER) && (pgd_flags(*pgd) & _PAGE_USER) &&
(read_cr4() & X86_CR4_SMEP)) (__read_cr4() & X86_CR4_SMEP))
printk(smep_warning, from_kuid(&init_user_ns, current_uid())); printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
} }
......
...@@ -713,6 +713,15 @@ void __init zone_sizes_init(void) ...@@ -713,6 +713,15 @@ void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns); free_area_init_nodes(max_zone_pfns);
} }
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
#ifdef CONFIG_SMP
.active_mm = &init_mm,
.state = 0,
#endif
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
};
EXPORT_SYMBOL_GPL(cpu_tlbstate);
void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{ {
/* entry 0 MUST be WB (hardwired to speed up translations) */ /* entry 0 MUST be WB (hardwired to speed up translations) */
......
...@@ -14,9 +14,6 @@ ...@@ -14,9 +14,6 @@
#include <asm/uv/uv.h> #include <asm/uv/uv.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
= { &init_mm, 0, };
/* /*
* Smarter SMP flushing macros. * Smarter SMP flushing macros.
* c/o Linus Torvalds. * c/o Linus Torvalds.
......
...@@ -105,11 +105,8 @@ static void __save_processor_state(struct saved_context *ctxt) ...@@ -105,11 +105,8 @@ static void __save_processor_state(struct saved_context *ctxt)
ctxt->cr0 = read_cr0(); ctxt->cr0 = read_cr0();
ctxt->cr2 = read_cr2(); ctxt->cr2 = read_cr2();
ctxt->cr3 = read_cr3(); ctxt->cr3 = read_cr3();
#ifdef CONFIG_X86_32 ctxt->cr4 = __read_cr4_safe();
ctxt->cr4 = read_cr4_safe(); #ifdef CONFIG_X86_64
#else
/* CONFIG_X86_64 */
ctxt->cr4 = read_cr4();
ctxt->cr8 = read_cr8(); ctxt->cr8 = read_cr8();
#endif #endif
ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE, ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE,
...@@ -175,12 +172,12 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) ...@@ -175,12 +172,12 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
/* cr4 was introduced in the Pentium CPU */ /* cr4 was introduced in the Pentium CPU */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
if (ctxt->cr4) if (ctxt->cr4)
write_cr4(ctxt->cr4); __write_cr4(ctxt->cr4);
#else #else
/* CONFIG X86_64 */ /* CONFIG X86_64 */
wrmsrl(MSR_EFER, ctxt->efer); wrmsrl(MSR_EFER, ctxt->efer);
write_cr8(ctxt->cr8); write_cr8(ctxt->cr8);
write_cr4(ctxt->cr4); __write_cr4(ctxt->cr4);
#endif #endif
write_cr3(ctxt->cr3); write_cr3(ctxt->cr3);
write_cr2(ctxt->cr2); write_cr2(ctxt->cr2);
......
...@@ -81,7 +81,7 @@ void __init setup_real_mode(void) ...@@ -81,7 +81,7 @@ void __init setup_real_mode(void)
trampoline_header->start = (u64) secondary_startup_64; trampoline_header->start = (u64) secondary_startup_64;
trampoline_cr4_features = &trampoline_header->cr4; trampoline_cr4_features = &trampoline_header->cr4;
*trampoline_cr4_features = read_cr4(); *trampoline_cr4_features = __read_cr4();
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment