svm.c 185 KB
Newer Older
Avi Kivity's avatar
Avi Kivity committed
1
2
3
4
5
6
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * AMD SVM support
 *
 * Copyright (C) 2006 Qumranet, Inc.
7
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
Avi Kivity's avatar
Avi Kivity committed
8
9
10
11
12
13
14
15
16
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */
17
18
19

#define pr_fmt(fmt) "SVM: " fmt

20
21
#include <linux/kvm_host.h>

22
#include "irq.h"
23
#include "mmu.h"
24
#include "kvm_cache_regs.h"
25
#include "x86.h"
26
#include "cpuid.h"
27
#include "pmu.h"
Avi Kivity's avatar
Avi Kivity committed
28

Avi Kivity's avatar
Avi Kivity committed
29
#include <linux/module.h>
30
#include <linux/mod_devicetable.h>
31
#include <linux/kernel.h>
Avi Kivity's avatar
Avi Kivity committed
32
33
#include <linux/vmalloc.h>
#include <linux/highmem.h>
Alexey Dobriyan's avatar
Alexey Dobriyan committed
34
#include <linux/sched.h>
35
#include <linux/trace_events.h>
36
#include <linux/slab.h>
37
38
#include <linux/amd-iommu.h>
#include <linux/hashtable.h>
39
#include <linux/frame.h>
Brijesh Singh's avatar
Brijesh Singh committed
40
#include <linux/psp-sev.h>
41
#include <linux/file.h>
42
43
#include <linux/pagemap.h>
#include <linux/swap.h>
Avi Kivity's avatar
Avi Kivity committed
44

45
#include <asm/apic.h>
46
#include <asm/perf_event.h>
47
#include <asm/tlbflush.h>
Avi Kivity's avatar
Avi Kivity committed
48
#include <asm/desc.h>
49
#include <asm/debugreg.h>
50
#include <asm/kvm_para.h>
51
#include <asm/irq_remapping.h>
52
#include <asm/spec-ctrl.h>
Avi Kivity's avatar
Avi Kivity committed
53

54
#include <asm/virtext.h>
55
#include "trace.h"
56

57
58
#define __ex(x) __kvm_handle_fault_on_reboot(x)

Avi Kivity's avatar
Avi Kivity committed
59
60
61
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");

62
63
64
65
66
67
static const struct x86_cpu_id svm_cpu_id[] = {
	X86_FEATURE_MATCH(X86_FEATURE_SVM),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);

Avi Kivity's avatar
Avi Kivity committed
68
69
70
71
72
73
#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1

#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3

74
75
76
77
#define SVM_FEATURE_NPT            (1 <<  0)
#define SVM_FEATURE_LBRV           (1 <<  1)
#define SVM_FEATURE_SVML           (1 <<  2)
#define SVM_FEATURE_NRIP           (1 <<  3)
78
79
80
81
#define SVM_FEATURE_TSC_RATE       (1 <<  4)
#define SVM_FEATURE_VMCB_CLEAN     (1 <<  5)
#define SVM_FEATURE_FLUSH_ASID     (1 <<  6)
#define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
82
#define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
83

84
85
#define SVM_AVIC_DOORBELL	0xc001011b

86
87
88
89
#define NESTED_EXIT_HOST	0	/* Exit handled on host level */
#define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
#define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */

90
91
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))

92
#define TSC_RATIO_RSVD          0xffffff0000000000ULL
93
94
#define TSC_RATIO_MIN		0x0000000000000001ULL
#define TSC_RATIO_MAX		0x000000ffffffffffULL
95

96
#define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)
97
98
99
100
101
102
103

/*
 * 0xff is broadcast, so the max index allowed for physical APIC ID
 * table is 0xfe.  APIC IDs above 0xff are reserved.
 */
#define AVIC_MAX_PHYSICAL_ID_COUNT	255

104
105
106
107
#define AVIC_UNACCEL_ACCESS_WRITE_MASK		1
#define AVIC_UNACCEL_ACCESS_OFFSET_MASK		0xFF0
#define AVIC_UNACCEL_ACCESS_VECTOR_MASK		0xFFFFFFFF

108
109
110
111
112
113
114
115
116
117
118
119
120
/* AVIC GATAG is encoded using VM and VCPU IDs */
#define AVIC_VCPU_ID_BITS		8
#define AVIC_VCPU_ID_MASK		((1 << AVIC_VCPU_ID_BITS) - 1)

#define AVIC_VM_ID_BITS			24
#define AVIC_VM_ID_NR			(1 << AVIC_VM_ID_BITS)
#define AVIC_VM_ID_MASK			((1 << AVIC_VM_ID_BITS) - 1)

#define AVIC_GATAG(x, y)		(((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
						(y & AVIC_VCPU_ID_MASK))
#define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
#define AVIC_GATAG_TO_VCPUID(x)		(x & AVIC_VCPU_ID_MASK)

121
122
static bool erratum_383_found __read_mostly;

123
124
125
126
127
128
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
	MSR_FS_BASE,
#endif
	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
129
	MSR_TSC_AUX,
130
131
132
133
};

#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)

134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
struct kvm_sev_info {
	bool active;		/* SEV enabled guest */
	unsigned int asid;	/* ASID used for this guest */
	unsigned int handle;	/* SEV firmware handle */
	int fd;			/* SEV device fd */
	unsigned long pages_locked; /* Number of pages locked */
	struct list_head regions_list;  /* List of registered regions */
};

struct kvm_svm {
	struct kvm kvm;

	/* Struct members for AVIC */
	u32 avic_vm_id;
	struct page *avic_logical_id_table_page;
	struct page *avic_physical_id_table_page;
	struct hlist_node hnode;

	struct kvm_sev_info sev_info;
};

155
156
struct kvm_vcpu;

157
158
159
struct nested_state {
	struct vmcb *hsave;
	u64 hsave_msr;
160
	u64 vm_cr_msr;
161
162
163
164
165
166
167
	u64 vmcb;

	/* These are the merged vectors */
	u32 *msrpm;

	/* gpa pointers to the real vectors */
	u64 vmcb_msrpm;
168
	u64 vmcb_iopm;
169

170
171
172
	/* A VMEXIT is required but not yet emulated */
	bool exit_required;

173
	/* cache for intercepts of the guest */
174
	u32 intercept_cr;
175
	u32 intercept_dr;
176
177
178
	u32 intercept_exceptions;
	u64 intercept;

179
180
	/* Nested Paging related state */
	u64 nested_cr3;
181
182
};

183
184
185
#define MSRPM_OFFSETS	16
static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;

186
187
188
189
190
191
/*
 * Set osvw_len to higher value when updated Revision Guides
 * are published and we know what the new status bits are
 */
static uint64_t osvw_len = 4, osvw_status;

192
193
194
195
196
197
198
199
struct vcpu_svm {
	struct kvm_vcpu vcpu;
	struct vmcb *vmcb;
	unsigned long vmcb_pa;
	struct svm_cpu_data *svm_data;
	uint64_t asid_generation;
	uint64_t sysenter_esp;
	uint64_t sysenter_eip;
200
	uint64_t tsc_aux;
201

202
203
	u64 msr_decfg;

204
205
206
	u64 next_rip;

	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
207
	struct {
208
209
210
		u16 fs;
		u16 gs;
		u16 ldt;
211
212
		u64 gs_base;
	} host;
213

214
	u64 spec_ctrl;
215
216
217
218
219
220
	/*
	 * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
	 * translated into the appropriate L2_CFG bits on the host to
	 * perform speculative control.
	 */
	u64 virt_spec_ctrl;
221

222
223
	u32 *msrpm;

224
225
	ulong nmi_iret_rip;

226
	struct nested_state nested;
Jan Kiszka's avatar
Jan Kiszka committed
227
228

	bool nmi_singlestep;
229
	u64 nmi_singlestep_guest_rflags;
230
231
232

	unsigned int3_injected;
	unsigned long int3_rip;
233

234
235
	/* cached guest cpuid flags for faster access */
	bool nrips_enabled	: 1;
236

237
	u32 ldr_reg;
238
	u32 dfr_reg;
239
240
	struct page *avic_backing_page;
	u64 *avic_physical_id_cache;
241
	bool avic_is_running;
242
243
244
245
246
247
248
249
250

	/*
	 * Per-vcpu list of struct amd_svm_iommu_ir:
	 * This is used mainly to store interrupt remapping information used
	 * when update the vcpu affinity. This avoids the need to scan for
	 * IRTE and try to match ga_tag in the IOMMU driver.
	 */
	struct list_head ir_list;
	spinlock_t ir_list_lock;
251
252
253

	/* which host CPU was used for running this vcpu */
	unsigned int last_cpu;
254
255
256
257
258
259
260
261
};

/*
 * This is a wrapper of struct amd_iommu_ir_data.
 */
struct amd_svm_iommu_ir {
	struct list_head node;	/* Used by SVM for per-vcpu ir_list */
	void *data;		/* Storing pointer to struct amd_ir_data */
262
263
};

264
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK	(0xFF)
265
#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT			31
266
267
268
269
270
271
272
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK		(1 << 31)

#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK	(0xFFULL)
#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK	(0xFFFFFFFFFFULL << 12)
#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK		(1ULL << 62)
#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK		(1ULL << 63)

273
274
275
static DEFINE_PER_CPU(u64, current_tsc_ratio);
#define TSC_RATIO_DEFAULT	0x0100000000ULL

276
277
#define MSR_INVALID			0xffffffffU

278
static const struct svm_direct_access_msrs {
279
280
281
	u32 index;   /* Index of the MSR */
	bool always; /* True if intercept is always on */
} direct_access_msrs[] = {
Brian Gerst's avatar
Brian Gerst committed
282
	{ .index = MSR_STAR,				.always = true  },
283
284
285
286
287
288
289
290
291
	{ .index = MSR_IA32_SYSENTER_CS,		.always = true  },
#ifdef CONFIG_X86_64
	{ .index = MSR_GS_BASE,				.always = true  },
	{ .index = MSR_FS_BASE,				.always = true  },
	{ .index = MSR_KERNEL_GS_BASE,			.always = true  },
	{ .index = MSR_LSTAR,				.always = true  },
	{ .index = MSR_CSTAR,				.always = true  },
	{ .index = MSR_SYSCALL_MASK,			.always = true  },
#endif
292
	{ .index = MSR_IA32_SPEC_CTRL,			.always = false },
Ashok Raj's avatar
Ashok Raj committed
293
	{ .index = MSR_IA32_PRED_CMD,			.always = false },
294
295
296
297
298
	{ .index = MSR_IA32_LASTBRANCHFROMIP,		.always = false },
	{ .index = MSR_IA32_LASTBRANCHTOIP,		.always = false },
	{ .index = MSR_IA32_LASTINTFROMIP,		.always = false },
	{ .index = MSR_IA32_LASTINTTOIP,		.always = false },
	{ .index = MSR_INVALID,				.always = false },
299
300
};

301
302
303
304
/* enable NPT for AMD64 and X86 with PAE */
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static bool npt_enabled = true;
#else
Joerg Roedel's avatar
Joerg Roedel committed
305
static bool npt_enabled;
306
#endif
307

308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
/*
 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
 * pause_filter_count: On processors that support Pause filtering(indicated
 *	by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter
 *	count value. On VMRUN this value is loaded into an internal counter.
 *	Each time a pause instruction is executed, this counter is decremented
 *	until it reaches zero at which time a #VMEXIT is generated if pause
 *	intercept is enabled. Refer to  AMD APM Vol 2 Section 15.14.4 Pause
 *	Intercept Filtering for more details.
 *	This also indicate if ple logic enabled.
 *
 * pause_filter_thresh: In addition, some processor families support advanced
 *	pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on
 *	the amount of time a guest is allowed to execute in a pause loop.
 *	In this mode, a 16-bit pause filter threshold field is added in the
 *	VMCB. The threshold value is a cycle count that is used to reset the
 *	pause counter. As with simple pause filtering, VMRUN loads the pause
 *	count value from VMCB into an internal counter. Then, on each pause
 *	instruction the hardware checks the elapsed number of cycles since
 *	the most recent pause instruction against the pause filter threshold.
 *	If the elapsed cycle count is greater than the pause filter threshold,
 *	then the internal pause count is reloaded from the VMCB and execution
 *	continues. If the elapsed cycle count is less than the pause filter
 *	threshold, then the internal pause count is decremented. If the count
 *	value is less than zero and PAUSE intercept is enabled, a #VMEXIT is
 *	triggered. If advanced pause filtering is supported and pause filter
 *	threshold field is set to zero, the filter will operate in the simpler,
 *	count only mode.
 */

static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
module_param(pause_filter_thresh, ushort, 0444);

static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
module_param(pause_filter_count, ushort, 0444);

/* Default doubles per-vcpu window every exit. */
static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
module_param(pause_filter_count_grow, ushort, 0444);

/* Default resets per-vcpu window every exit to pause_filter_count. */
static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
module_param(pause_filter_count_shrink, ushort, 0444);

/* Default is to compute the maximum so we can never overflow. */
static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
module_param(pause_filter_count_max, ushort, 0444);

356
357
/* allow nested paging (virtualized MMU) for all guests */
static int npt = true;
358
module_param(npt, int, S_IRUGO);
359

360
361
/* allow nested virtualization in KVM/SVM */
static int nested = true;
362
363
module_param(nested, int, S_IRUGO);

364
365
/* enable / disable AVIC */
static int avic;
366
#ifdef CONFIG_X86_LOCAL_APIC
367
module_param(avic, int, S_IRUGO);
368
#endif
369

370
371
372
373
/* enable/disable Virtual VMLOAD VMSAVE */
static int vls = true;
module_param(vls, int, 0444);

374
375
376
/* enable/disable Virtual GIF */
static int vgif = true;
module_param(vgif, int, 0444);
377

Brijesh Singh's avatar
Brijesh Singh committed
378
379
380
381
/* enable/disable SEV support */
static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
module_param(sev, int, 0444);

382
383
static u8 rsm_ins_bytes[] = "\x0f\xaa";

384
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
385
static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
386
static void svm_complete_interrupts(struct vcpu_svm *svm);
387

388
static int nested_svm_exit_handled(struct vcpu_svm *svm);
389
static int nested_svm_intercept(struct vcpu_svm *svm);
390
391
392
393
static int nested_svm_vmexit(struct vcpu_svm *svm);
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
				      bool has_error_code, u32 error_code);

394
enum {
395
396
	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
			    pause filter count */
397
	VMCB_PERM_MAP,   /* IOPM Base and MSRPM Base */
398
	VMCB_ASID,	 /* ASID */
399
	VMCB_INTR,	 /* int_ctl, int_vector */
400
	VMCB_NPT,        /* npt_en, nCR3, gPAT */
401
	VMCB_CR,	 /* CR0, CR3, CR4, EFER */
402
	VMCB_DR,         /* DR6, DR7 */
403
	VMCB_DT,         /* GDT, IDT */
404
	VMCB_SEG,        /* CS, DS, SS, ES, CPL */
405
	VMCB_CR2,        /* CR2 only */
406
	VMCB_LBR,        /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
407
408
409
410
	VMCB_AVIC,       /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
			  * AVIC PHYSICAL_TABLE pointer,
			  * AVIC LOGICAL_TABLE pointer
			  */
411
412
413
	VMCB_DIRTY_MAX,
};

414
415
/* TPR and CR2 are always written before VMRUN */
#define VMCB_ALWAYS_DIRTY_MASK	((1U << VMCB_INTR) | (1U << VMCB_CR2))
416

417
418
#define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL

419
static unsigned int max_sev_asid;
420
421
static unsigned int min_sev_asid;
static unsigned long *sev_asid_bitmap;
422
#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
423

424
425
426
427
428
429
430
431
struct enc_region {
	struct list_head list;
	unsigned long npages;
	struct page **pages;
	unsigned long uaddr;
	unsigned long size;
};

432
433
434
435
436
437

static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
{
	return container_of(kvm, struct kvm_svm, kvm);
}

438
439
static inline bool svm_sev_enabled(void)
{
440
	return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
441
442
443
444
}

static inline bool sev_guest(struct kvm *kvm)
{
445
#ifdef CONFIG_KVM_AMD_SEV
446
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
447
448

	return sev->active;
449
450
451
#else
	return false;
#endif
452
}
453

454
455
static inline int sev_get_asid(struct kvm *kvm)
{
456
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
457
458
459
460

	return sev->asid;
}

461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
static inline void mark_all_dirty(struct vmcb *vmcb)
{
	vmcb->control.clean = 0;
}

static inline void mark_all_clean(struct vmcb *vmcb)
{
	vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
			       & ~VMCB_ALWAYS_DIRTY_MASK;
}

static inline void mark_dirty(struct vmcb *vmcb, int bit)
{
	vmcb->control.clean &= ~(1 << bit);
}

477
478
static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
479
	return container_of(vcpu, struct vcpu_svm, vcpu);
480
481
}

482
483
484
485
486
487
static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
{
	svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
	mark_dirty(svm->vmcb, VMCB_AVIC);
}

488
489
490
491
492
493
494
495
496
497
498
static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);
	u64 *entry = svm->avic_physical_id_cache;

	if (!entry)
		return false;

	return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
}

499
500
501
502
503
static void recalc_intercepts(struct vcpu_svm *svm)
{
	struct vmcb_control_area *c, *h;
	struct nested_state *g;

504
505
	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);

506
507
508
509
510
511
512
	if (!is_guest_mode(&svm->vcpu))
		return;

	c = &svm->vmcb->control;
	h = &svm->nested.hsave->control;
	g = &svm->nested;

513
	c->intercept_cr = h->intercept_cr | g->intercept_cr;
514
	c->intercept_dr = h->intercept_dr | g->intercept_dr;
515
	c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
516
517
518
	c->intercept = h->intercept | g->intercept;
}

519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
{
	if (is_guest_mode(&svm->vcpu))
		return svm->nested.hsave;
	else
		return svm->vmcb;
}

static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept_cr |= (1U << bit);

	recalc_intercepts(svm);
}

static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept_cr &= ~(1U << bit);

	recalc_intercepts(svm);
}

static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	return vmcb->control.intercept_cr & (1U << bit);
}

552
static inline void set_dr_intercepts(struct vcpu_svm *svm)
553
554
555
{
	struct vmcb *vmcb = get_host_vmcb(svm);

556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
	vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
		| (1 << INTERCEPT_DR1_READ)
		| (1 << INTERCEPT_DR2_READ)
		| (1 << INTERCEPT_DR3_READ)
		| (1 << INTERCEPT_DR4_READ)
		| (1 << INTERCEPT_DR5_READ)
		| (1 << INTERCEPT_DR6_READ)
		| (1 << INTERCEPT_DR7_READ)
		| (1 << INTERCEPT_DR0_WRITE)
		| (1 << INTERCEPT_DR1_WRITE)
		| (1 << INTERCEPT_DR2_WRITE)
		| (1 << INTERCEPT_DR3_WRITE)
		| (1 << INTERCEPT_DR4_WRITE)
		| (1 << INTERCEPT_DR5_WRITE)
		| (1 << INTERCEPT_DR6_WRITE)
		| (1 << INTERCEPT_DR7_WRITE);
572
573
574
575

	recalc_intercepts(svm);
}

576
static inline void clr_dr_intercepts(struct vcpu_svm *svm)
577
578
579
{
	struct vmcb *vmcb = get_host_vmcb(svm);

580
	vmcb->control.intercept_dr = 0;
581
582
583
584

	recalc_intercepts(svm);
}

585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept_exceptions |= (1U << bit);

	recalc_intercepts(svm);
}

static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept_exceptions &= ~(1U << bit);

	recalc_intercepts(svm);
}

603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
static inline void set_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept |= (1ULL << bit);

	recalc_intercepts(svm);
}

static inline void clr_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept &= ~(1ULL << bit);

	recalc_intercepts(svm);
}

621
622
623
624
625
static inline bool vgif_enabled(struct vcpu_svm *svm)
{
	return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
}

626
627
static inline void enable_gif(struct vcpu_svm *svm)
{
628
629
630
631
	if (vgif_enabled(svm))
		svm->vmcb->control.int_ctl |= V_GIF_MASK;
	else
		svm->vcpu.arch.hflags |= HF_GIF_MASK;
632
633
634
635
}

static inline void disable_gif(struct vcpu_svm *svm)
{
636
637
638
639
	if (vgif_enabled(svm))
		svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
	else
		svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
640
641
642
643
}

static inline bool gif_set(struct vcpu_svm *svm)
{
644
645
646
647
	if (vgif_enabled(svm))
		return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
	else
		return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
648
649
}

650
static unsigned long iopm_base;
Avi Kivity's avatar
Avi Kivity committed
651
652
653
654

struct kvm_ldttss_desc {
	u16 limit0;
	u16 base0;
Joerg Roedel's avatar
Joerg Roedel committed
655
656
	unsigned base1:8, type:5, dpl:2, p:1;
	unsigned limit1:4, zero0:3, g:1, base2:8;
Avi Kivity's avatar
Avi Kivity committed
657
658
659
660
661
662
663
	u32 base3;
	u32 zero1;
} __attribute__((packed));

struct svm_cpu_data {
	int cpu;

Avi Kivity's avatar
Avi Kivity committed
664
665
666
	u64 asid_generation;
	u32 max_asid;
	u32 next_asid;
667
	u32 min_asid;
Avi Kivity's avatar
Avi Kivity committed
668
669
670
	struct kvm_ldttss_desc *tss_desc;

	struct page *save_area;
Ashok Raj's avatar
Ashok Raj committed
671
	struct vmcb *current_vmcb;
672
673
674

	/* index = sev_asid, value = vmcb pointer */
	struct vmcb **sev_vmcbs;
Avi Kivity's avatar
Avi Kivity committed
675
676
677
678
};

static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);

679
static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
Avi Kivity's avatar
Avi Kivity committed
680

681
#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
Avi Kivity's avatar
Avi Kivity committed
682
683
684
#define MSRS_RANGE_SIZE 2048
#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)

685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
static u32 svm_msrpm_offset(u32 msr)
{
	u32 offset;
	int i;

	for (i = 0; i < NUM_MSR_MAPS; i++) {
		if (msr < msrpm_ranges[i] ||
		    msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
			continue;

		offset  = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
		offset += (i * MSRS_RANGE_SIZE);       /* add range offset */

		/* Now we have the u8 offset - but need the u32 offset */
		return offset / 4;
	}

	/* MSR not in any range */
	return MSR_INVALID;
}

Avi Kivity's avatar
Avi Kivity committed
706
707
708
709
#define MAX_INST_SIZE 15

static inline void clgi(void)
{
710
	asm volatile (__ex("clgi"));
Avi Kivity's avatar
Avi Kivity committed
711
712
713
714
}

static inline void stgi(void)
{
715
	asm volatile (__ex("stgi"));
Avi Kivity's avatar
Avi Kivity committed
716
717
718
719
}

static inline void invlpga(unsigned long addr, u32 asid)
{
720
	asm volatile (__ex("invlpga %1, %0") : : "c"(asid), "a"(addr));
Avi Kivity's avatar
Avi Kivity committed
721
722
}

723
static int get_npt_level(struct kvm_vcpu *vcpu)
724
725
{
#ifdef CONFIG_X86_64
726
	return PT64_ROOT_4LEVEL;
727
728
729
730
731
#else
	return PT32E_ROOT_LEVEL;
#endif
}

Avi Kivity's avatar
Avi Kivity committed
732
733
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
734
	vcpu->arch.efer = efer;
735
	if (!npt_enabled && !(efer & EFER_LMA))
736
		efer &= ~EFER_LME;
Avi Kivity's avatar
Avi Kivity committed
737

738
	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
739
	mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
Avi Kivity's avatar
Avi Kivity committed
740
741
742
743
744
745
746
747
}

static int is_external_interrupt(u32 info)
{
	info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
	return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
}

748
static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
749
750
751
752
753
{
	struct vcpu_svm *svm = to_svm(vcpu);
	u32 ret = 0;

	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
754
755
		ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
	return ret;
756
757
758
759
760
761
762
763
764
765
766
767
768
}

static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	if (mask == 0)
		svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
	else
		svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;

}

Avi Kivity's avatar
Avi Kivity committed
769
770
static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
771
772
	struct vcpu_svm *svm = to_svm(vcpu);

773
	if (svm->vmcb->control.next_rip != 0) {
774
		WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
775
		svm->next_rip = svm->vmcb->control.next_rip;
776
	}
777

778
	if (!svm->next_rip) {
779
		if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
780
781
				EMULATE_DONE)
			printk(KERN_DEBUG "%s: NOP\n", __func__);
Avi Kivity's avatar
Avi Kivity committed
782
783
		return;
	}
784
785
786
	if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
		printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
		       __func__, kvm_rip_read(vcpu), svm->next_rip);
Avi Kivity's avatar
Avi Kivity committed
787

788
	kvm_rip_write(vcpu, svm->next_rip);
789
	svm_set_interrupt_shadow(vcpu, 0);
Avi Kivity's avatar
Avi Kivity committed
790
791
}

792
static void svm_queue_exception(struct kvm_vcpu *vcpu)
793
794
{
	struct vcpu_svm *svm = to_svm(vcpu);
795
796
	unsigned nr = vcpu->arch.exception.nr;
	bool has_error_code = vcpu->arch.exception.has_error_code;
797
	bool reinject = vcpu->arch.exception.injected;
798
	u32 error_code = vcpu->arch.exception.error_code;
799

Joerg Roedel's avatar
Joerg Roedel committed
800
801
802
803
	/*
	 * If we are within a nested VM we'd better #VMEXIT and let the guest
	 * handle the exception
	 */
804
805
	if (!reinject &&
	    nested_svm_check_exception(svm, nr, has_error_code, error_code))
806
807
		return;

808
809
	kvm_deliver_exception_payload(&svm->vcpu);

810
	if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
		unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);

		/*
		 * For guest debugging where we have to reinject #BP if some
		 * INT3 is guest-owned:
		 * Emulate nRIP by moving RIP forward. Will fail if injection
		 * raises a fault that is not intercepted. Still better than
		 * failing in all cases.
		 */
		skip_emulated_instruction(&svm->vcpu);
		rip = kvm_rip_read(&svm->vcpu);
		svm->int3_rip = rip + svm->vmcb->save.cs.base;
		svm->int3_injected = rip - old_rip;
	}

826
827
828
829
830
831
832
	svm->vmcb->control.event_inj = nr
		| SVM_EVTINJ_VALID
		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
		| SVM_EVTINJ_TYPE_EXEPT;
	svm->vmcb->control.event_inj_err = error_code;
}

833
834
835
836
837
838
static void svm_init_erratum_383(void)
{
	u32 low, high;
	int err;
	u64 val;

839
	if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
		return;

	/* Use _safe variants to not break nested virtualization */
	val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
	if (err)
		return;

	val |= (1ULL << 47);

	low  = lower_32_bits(val);
	high = upper_32_bits(val);

	native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);

	erratum_383_found = true;
}

857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
static void svm_init_osvw(struct kvm_vcpu *vcpu)
{
	/*
	 * Guests should see errata 400 and 415 as fixed (assuming that
	 * HLT and IO instructions are intercepted).
	 */
	vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
	vcpu->arch.osvw.status = osvw_status & ~(6ULL);

	/*
	 * By increasing VCPU's osvw.length to 3 we are telling the guest that
	 * all osvw.status bits inside that length, including bit 0 (which is
	 * reserved for erratum 298), are valid. However, if host processor's
	 * osvw_len is 0 then osvw_status[0] carries no information. We need to
	 * be conservative here and therefore we tell the guest that erratum 298
	 * is present (because we really don't know).
	 */
	if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
		vcpu->arch.osvw.status |= 1;
}

Avi Kivity's avatar
Avi Kivity committed
878
879
static int has_svm(void)
{
880
	const char *msg;
Avi Kivity's avatar
Avi Kivity committed
881

882
	if (!cpu_has_svm(&msg)) {
Joe Perches's avatar
Joe Perches committed
883
		printk(KERN_INFO "has_svm: %s\n", msg);
Avi Kivity's avatar
Avi Kivity committed
884
885
886
887
888
889
		return 0;
	}

	return 1;
}

890
static void svm_hardware_disable(void)
Avi Kivity's avatar
Avi Kivity committed
891
{
892
893
894
895
	/* Make sure we clean up behind us */
	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);

896
	cpu_svm_disable();
897
898

	amd_pmu_disable_virt();
Avi Kivity's avatar
Avi Kivity committed
899
900
}

901
static int svm_hardware_enable(void)
Avi Kivity's avatar
Avi Kivity committed
902
903
{

904
	struct svm_cpu_data *sd;
Avi Kivity's avatar
Avi Kivity committed
905
906
907
908
	uint64_t efer;
	struct desc_struct *gdt;
	int me = raw_smp_processor_id();

909
910
911
912
	rdmsrl(MSR_EFER, efer);
	if (efer & EFER_SVME)
		return -EBUSY;

Avi Kivity's avatar
Avi Kivity committed
913
	if (!has_svm()) {
914
		pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
915
		return -EINVAL;
Avi Kivity's avatar
Avi Kivity committed
916
	}
917
918
	sd = per_cpu(svm_data, me);
	if (!sd) {
919
		pr_err("%s: svm_data is NULL on %d\n", __func__, me);
920
		return -EINVAL;
Avi Kivity's avatar
Avi Kivity committed
921
922
	}

923
924
925
	sd->asid_generation = 1;
	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
	sd->next_asid = sd->max_asid + 1;
926
	sd->min_asid = max_sev_asid + 1;
Avi Kivity's avatar
Avi Kivity committed
927

928
	gdt = get_current_gdt_rw();
929
	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
Avi Kivity's avatar
Avi Kivity committed
930

931
	wrmsrl(MSR_EFER, efer | EFER_SVME);
Avi Kivity's avatar
Avi Kivity committed
932

933
	wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
934

935
936
	if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
937
		__this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
938
939
	}

940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969

	/*
	 * Get OSVW bits.
	 *
	 * Note that it is possible to have a system with mixed processor
	 * revisions and therefore different OSVW bits. If bits are not the same
	 * on different processors then choose the worst case (i.e. if erratum
	 * is present on one processor and not on another then assume that the
	 * erratum is present everywhere).
	 */
	if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
		uint64_t len, status = 0;
		int err;

		len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
		if (!err)
			status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
						      &err);

		if (err)
			osvw_status = osvw_len = 0;
		else {
			if (len < osvw_len)
				osvw_len = len;
			osvw_status |= status;
			osvw_status &= (1ULL << osvw_len) - 1;
		}
	} else
		osvw_status = osvw_len = 0;

970
971
	svm_init_erratum_383();

972
973
	amd_pmu_enable_virt();

974
	return 0;
Avi Kivity's avatar
Avi Kivity committed
975
976
}

977
978
static void svm_cpu_uninit(int cpu)
{
979
	struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
980

981
	if (!sd)
982
983
984
		return;

	per_cpu(svm_data, raw_smp_processor_id()) = NULL;
985
	kfree(sd->sev_vmcbs);
986
987
	__free_page(sd->save_area);
	kfree(sd);
988
989
}

Avi Kivity's avatar
Avi Kivity committed
990
991
static int svm_cpu_init(int cpu)
{
992
	struct svm_cpu_data *sd;
Avi Kivity's avatar
Avi Kivity committed
993
994
	int r;

995
996
	sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
	if (!sd)
Avi Kivity's avatar
Avi Kivity committed
997
		return -ENOMEM;
998
	sd->cpu = cpu;
Avi Kivity's avatar
Avi Kivity committed
999
	r = -ENOMEM;
1000
	sd->save_area = alloc_page(GFP_KERNEL);
1001
	if (!sd->save_area)
Avi Kivity's avatar
Avi Kivity committed
1002
1003
		goto err_1;

1004
1005
	if (svm_sev_enabled()) {
		r = -ENOMEM;
1006
1007
1008
		sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
					      sizeof(void *),
					      GFP_KERNEL);
1009
1010
1011
1012
		if (!sd->sev_vmcbs)
			goto err_1;
	}

1013
	per_cpu(svm_data, cpu) = sd;
Avi Kivity's avatar
Avi Kivity committed
1014
1015
1016
1017

	return 0;

err_1:
1018
	kfree(sd);
Avi Kivity's avatar
Avi Kivity committed
1019
1020
1021
1022
	return r;

}

1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
static bool valid_msr_intercept(u32 index)
{
	int i;

	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
		if (direct_access_msrs[i].index == index)
			return true;

	return false;
}

1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
{
	u8 bit_write;
	unsigned long tmp;
	u32 offset;
	u32 *msrpm;

	msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
				      to_svm(vcpu)->msrpm;

	offset    = svm_msrpm_offset(msr);
	bit_write = 2 * (msr & 0x0f) + 1;
	tmp       = msrpm[offset];

	BUG_ON(offset == MSR_INVALID);

	return !!test_bit(bit_write,  &tmp);
}

1053
1054
static void set_msr_interception(u32 *msrpm, unsigned msr,
				 int read, int write)
Avi Kivity's avatar
Avi Kivity committed
1055
{
1056
1057
1058
	u8 bit_read, bit_write;
	unsigned long tmp;
	u32 offset;
Avi Kivity's avatar
Avi Kivity committed
1059

1060
1061
1062
1063
1064
1065
	/*
	 * If this warning triggers extend the direct_access_msrs list at the
	 * beginning of the file
	 */
	WARN_ON(!valid_msr_intercept(msr));

1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
	offset    = svm_msrpm_offset(msr);
	bit_read  = 2 * (msr & 0x0f);
	bit_write = 2 * (msr & 0x0f) + 1;
	tmp       = msrpm[offset];

	BUG_ON(offset == MSR_INVALID);

	read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
	write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);

	msrpm[offset] = tmp;
Avi Kivity's avatar
Avi Kivity committed
1077
1078
}

1079
static void svm_vcpu_init_msrpm(u32 *msrpm)
Avi Kivity's avatar
Avi Kivity committed
1080
1081
1082
{
	int i;

1083
1084
	memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));

1085
1086
1087
1088
1089
1090
	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
		if (!direct_access_msrs[i].always)
			continue;

		set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
	}
1091
1092
}

1093
1094
1095
1096
1097
1098
1099
1100
static void add_msr_offset(u32 offset)
{
	int i;

	for (i = 0; i < MSRPM_OFFSETS; ++i) {

		/* Offset already in list? */
		if (msrpm_offsets[i] == offset)
1101
			return;
1102
1103
1104
1105
1106
1107
1108
1109
1110

		/* Slot used by another offset? */
		if (msrpm_offsets[i] != MSR_INVALID)
			continue;

		/* Add offset to list */
		msrpm_offsets[i] = offset;

		return;
Avi Kivity's avatar
Avi Kivity committed
1111
	}
1112
1113
1114
1115
1116

	/*
	 * If this BUG triggers the msrpm_offsets table has an overflow. Just
	 * increase MSRPM_OFFSETS in this case.
	 */
1117
	BUG();
Avi Kivity's avatar
Avi Kivity committed
1118
1119
}

1120
static void init_msrpm_offsets(void)
1121
{
1122
	int i;
1123

1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
	memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));

	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
		u32 offset;

		offset = svm_msrpm_offset(direct_access_msrs[i].index);
		BUG_ON(offset == MSR_INVALID);

		add_msr_offset(offset);
	}
1134
1135
}

1136
1137
1138
1139
static void svm_enable_lbrv(struct vcpu_svm *svm)
{
	u32 *msrpm = svm->msrpm;

1140
	svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
}

static void svm_disable_lbrv(struct vcpu_svm *svm)
{
	u32 *msrpm = svm->msrpm;

1151
	svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
1152
1153
1154
1155
1156
1157
	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}

1158
1159
1160
static void disable_nmi_singlestep(struct vcpu_svm *svm)
{
	svm->nmi_singlestep = false;
1161

1162
1163
1164
1165
1166
1167
1168
	if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
		/* Clear our flags if they were not set by the guest */
		if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
			svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
		if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
			svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
	}
1169
1170
}

1171
/* Note:
1172
 * This hash table is used to map VM_ID to a struct kvm_svm,
1173
1174
1175
1176
 * when handling AMD IOMMU GALOG notification to schedule in
 * a particular vCPU.
 */
#define SVM_VM_DATA_HASH_BITS	8
1177
static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
1178
1179
static u32 next_vm_id = 0;
static bool next_vm_id_wrapped = 0;
1180
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
1181
1182
1183
1184
1185
1186
1187
1188

/* Note:
 * This function is called from IOMMU driver to notify
 * SVM to schedule in a particular vCPU of a particular VM.
 */
static int avic_ga_log_notifier(u32 ga_tag)
{
	unsigned long flags;
1189
	struct kvm_svm *kvm_svm;
1190
1191
1192
1193
1194
1195
1196
	struct kvm_vcpu *vcpu = NULL;
	u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
	u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);

	pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);

	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1197
1198
	hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
		if (kvm_svm->avic_vm_id != vm_id)
1199
			continue;
1200
		vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
1201
1202
1203
1204
1205
1206
1207
1208
1209
		break;
	}
	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);

	/* Note:
	 * At this point, the IOMMU should have already set the pending
	 * bit in the vAPIC backing page. So, we just need to schedule
	 * in the vcpu.
	 */
1210
	if (vcpu)
1211
1212
1213
1214
1215
		kvm_vcpu_wake_up(vcpu);

	return 0;
}

Brijesh Singh's avatar
Brijesh Singh committed
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
static __init int sev_hardware_setup(void)
{
	struct sev_user_data_status *status;
	int rc;

	/* Maximum number of encrypted guests supported simultaneously */
	max_sev_asid = cpuid_ecx(0x8000001F);

	if (!max_sev_asid)
		return 1;

1227
1228
1229
1230
	/* Minimum ASID value that should be used for SEV guest */
	min_sev_asid = cpuid_edx(0x8000001F);

	/* Initialize SEV ASID bitmap */
1231
	sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1232
1233
1234
	if (!sev_asid_bitmap)
		return 1;

Brijesh Singh's avatar
Brijesh Singh committed
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
	status = kmalloc(sizeof(*status), GFP_KERNEL);
	if (!status)
		return 1;

	/*
	 * Check SEV platform status.
	 *
	 * PLATFORM_STATUS can be called in any state, if we failed to query
	 * the PLATFORM status then either PSP firmware does not support SEV
	 * feature or SEV firmware is dead.
	 */
	rc = sev_platform_status(status, NULL);
	if (rc)
		goto err;

	pr_info("SEV supported\n");

err:
	kfree(status);
	return rc;
}

1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
static void grow_ple_window(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);
	struct vmcb_control_area *control = &svm->vmcb->control;
	int old = control->pause_filter_count;

	control->pause_filter_count = __grow_ple_window(old,
							pause_filter_count,
							pause_filter_count_grow,
							pause_filter_count_max);

	if (control->pause_filter_count != old)
		mark_dirty(svm->vmcb, VMCB_INTERCEPTS);

	trace_kvm_ple_window_grow(vcpu->vcpu_id,
				  control->pause_filter_count, old);
}

static void shrink_ple_window(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);
	struct vmcb_control_area *control = &svm->vmcb->control;
	int old = control->pause_filter_count;

	control->pause_filter_count =
				__shrink_ple_window(old,
						    pause_filter_count,
						    pause_filter_count_shrink,
						    pause_filter_count);
	if (control->pause_filter_count != old)
		mark_dirty(svm->vmcb, VMCB_INTERCEPTS);

	trace_kvm_ple_window_shrink(vcpu->vcpu_id,
				    control->pause_filter_count, old);
}