machine_kexec_64.c 7.32 KB
Newer Older
1
/*
Dave Jones's avatar
Dave Jones committed
2
 * handle transition of Linux booting another kernel
3 4 5 6 7 8 9 10 11 12
 * Copyright (C) 2002-2005 Eric Biederman  <ebiederm@xmission.com>
 *
 * This source code is licensed under the GNU General Public License,
 * Version 2.  See the file COPYING for more details.
 */

#include <linux/mm.h>
#include <linux/kexec.h>
#include <linux/string.h>
#include <linux/reboot.h>
Ken'ichi Ohmichi's avatar
Ken'ichi Ohmichi committed
13
#include <linux/numa.h>
Ingo Molnar's avatar
Ingo Molnar committed
14
#include <linux/ftrace.h>
15
#include <linux/io.h>
Ingo Molnar's avatar
Ingo Molnar committed
16

17 18 19
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
20

21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
				unsigned long addr)
{
	pud_t *pud;
	pmd_t *pmd;
	struct page *page;
	int result = -ENOMEM;

	addr &= PMD_MASK;
	pgd += pgd_index(addr);
	if (!pgd_present(*pgd)) {
		page = kimage_alloc_control_pages(image, 0);
		if (!page)
			goto out;
		pud = (pud_t *)page_address(page);
		memset(pud, 0, PAGE_SIZE);
		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
	}
	pud = pud_offset(pgd, addr);
	if (!pud_present(*pud)) {
		page = kimage_alloc_control_pages(image, 0);
		if (!page)
			goto out;
		pmd = (pmd_t *)page_address(page);
		memset(pmd, 0, PAGE_SIZE);
		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
	}
	pmd = pmd_offset(pud, addr);
	if (!pmd_present(*pmd))
		set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
	result = 0;
out:
	return result;
}

56
static void init_level2_page(pmd_t *level2p, unsigned long addr)
57 58
{
	unsigned long end_addr;
Maneesh Soni's avatar
Maneesh Soni committed
59

60
	addr &= PAGE_MASK;
61
	end_addr = addr + PUD_SIZE;
Maneesh Soni's avatar
Maneesh Soni committed
62
	while (addr < end_addr) {
63 64
		set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
		addr += PMD_SIZE;
65 66 67
	}
}

68
static int init_level3_page(struct kimage *image, pud_t *level3p,
Maneesh Soni's avatar
Maneesh Soni committed
69
				unsigned long addr, unsigned long last_addr)
70 71 72
{
	unsigned long end_addr;
	int result;
Maneesh Soni's avatar
Maneesh Soni committed
73

74 75
	result = 0;
	addr &= PAGE_MASK;
76
	end_addr = addr + PGDIR_SIZE;
Maneesh Soni's avatar
Maneesh Soni committed
77
	while ((addr < last_addr) && (addr < end_addr)) {
78
		struct page *page;
79
		pmd_t *level2p;
Maneesh Soni's avatar
Maneesh Soni committed
80

81 82 83 84 85
		page = kimage_alloc_control_pages(image, 0);
		if (!page) {
			result = -ENOMEM;
			goto out;
		}
86
		level2p = (pmd_t *)page_address(page);
87
		init_level2_page(level2p, addr);
88 89
		set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
		addr += PUD_SIZE;
90 91
	}
	/* clear the unused entries */
Maneesh Soni's avatar
Maneesh Soni committed
92
	while (addr < end_addr) {
93 94
		pud_clear(level3p++);
		addr += PUD_SIZE;
95 96 97 98 99 100
	}
out:
	return result;
}


101
static int init_level4_page(struct kimage *image, pgd_t *level4p,
Maneesh Soni's avatar
Maneesh Soni committed
102
				unsigned long addr, unsigned long last_addr)
103 104 105
{
	unsigned long end_addr;
	int result;
Maneesh Soni's avatar
Maneesh Soni committed
106

107 108
	result = 0;
	addr &= PAGE_MASK;
109
	end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE);
Maneesh Soni's avatar
Maneesh Soni committed
110
	while ((addr < last_addr) && (addr < end_addr)) {
111
		struct page *page;
112
		pud_t *level3p;
Maneesh Soni's avatar
Maneesh Soni committed
113

114 115 116 117 118
		page = kimage_alloc_control_pages(image, 0);
		if (!page) {
			result = -ENOMEM;
			goto out;
		}
119
		level3p = (pud_t *)page_address(page);
120
		result = init_level3_page(image, level3p, addr, last_addr);
121
		if (result)
122
			goto out;
123 124
		set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
		addr += PGDIR_SIZE;
125 126
	}
	/* clear the unused entries */
Maneesh Soni's avatar
Maneesh Soni committed
127
	while (addr < end_addr) {
128 129
		pgd_clear(level4p++);
		addr += PGDIR_SIZE;
130
	}
Maneesh Soni's avatar
Maneesh Soni committed
131
out:
132 133 134
	return result;
}

135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
static void free_transition_pgtable(struct kimage *image)
{
	free_page((unsigned long)image->arch.pud);
	free_page((unsigned long)image->arch.pmd);
	free_page((unsigned long)image->arch.pte);
}

static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
{
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	unsigned long vaddr, paddr;
	int result = -ENOMEM;

	vaddr = (unsigned long)relocate_kernel;
	paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
	pgd += pgd_index(vaddr);
	if (!pgd_present(*pgd)) {
		pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
		if (!pud)
			goto err;
		image->arch.pud = pud;
		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
	}
	pud = pud_offset(pgd, vaddr);
	if (!pud_present(*pud)) {
		pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
		if (!pmd)
			goto err;
		image->arch.pmd = pmd;
		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
	}
	pmd = pmd_offset(pud, vaddr);
	if (!pmd_present(*pmd)) {
		pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
		if (!pte)
			goto err;
		image->arch.pte = pte;
		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
	}
	pte = pte_offset_kernel(pmd, vaddr);
	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
	return 0;
err:
	free_transition_pgtable(image);
	return result;
}

184 185 186

static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
{
187
	pgd_t *level4p;
188
	int result;
189
	level4p = (pgd_t *)__va(start_pgtable);
190
	result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
191 192 193 194 195 196 197
	if (result)
		return result;
	/*
	 * image->start may be outside 0 ~ max_pfn, for example when
	 * jump back to original kernel from kexeced kernel
	 */
	result = init_one_level2_page(image, level4p, image->start);
198 199 200
	if (result)
		return result;
	return init_transition_pgtable(image, level4p);
201 202 203 204
}

static void set_idt(void *newidt, u16 limit)
{
205
	struct desc_ptr curidt;
206 207

	/* x86-64 supports unaliged loads & stores */
208 209
	curidt.size    = limit;
	curidt.address = (unsigned long)newidt;
210 211

	__asm__ __volatile__ (
212 213
		"lidtq %0\n"
		: : "m" (curidt)
214 215 216 217 218 219
		);
};


static void set_gdt(void *newgdt, u16 limit)
{
220
	struct desc_ptr curgdt;
221 222

	/* x86-64 supports unaligned loads & stores */
223 224
	curgdt.size    = limit;
	curgdt.address = (unsigned long)newgdt;
225 226

	__asm__ __volatile__ (
227 228
		"lgdtq %0\n"
		: : "m" (curgdt)
229 230 231 232 233 234
		);
};

static void load_segments(void)
{
	__asm__ __volatile__ (
235 236 237 238 239
		"\tmovl %0,%%ds\n"
		"\tmovl %0,%%es\n"
		"\tmovl %0,%%ss\n"
		"\tmovl %0,%%fs\n"
		"\tmovl %0,%%gs\n"
Michael Matz's avatar
Michael Matz committed
240
		: : "a" (__KERNEL_DS) : "memory"
241 242 243 244 245
		);
}

int machine_kexec_prepare(struct kimage *image)
{
246
	unsigned long start_pgtable;
247 248 249
	int result;

	/* Calculate the offsets */
Maneesh Soni's avatar
Maneesh Soni committed
250
	start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
251 252 253

	/* Setup the identity mapped 64bit page table */
	result = init_pgtable(image, start_pgtable);
Maneesh Soni's avatar
Maneesh Soni committed
254
	if (result)
255 256 257 258 259 260 261
		return result;

	return 0;
}

void machine_kexec_cleanup(struct kimage *image)
{
262
	free_transition_pgtable(image);
263 264 265 266 267 268
}

/*
 * Do not allocate memory (or fail in any way) in machine_kexec().
 * We are past the point of no return, committed to rebooting now.
 */
Huang Ying's avatar
Huang Ying committed
269
void machine_kexec(struct kimage *image)
270
{
271 272
	unsigned long page_list[PAGES_NR];
	void *control_page;
273

Ingo Molnar's avatar
Ingo Molnar committed
274 275
	tracer_disable();

276 277 278
	/* Interrupts aren't acceptable while we reboot */
	local_irq_disable();

279 280 281
	control_page = page_address(image->control_code_page) + PAGE_SIZE;
	memcpy(control_page, relocate_kernel, PAGE_SIZE);

282
	page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
283 284
	page_list[PA_TABLE_PAGE] =
	  (unsigned long)__pa(page_address(image->control_code_page));
285

286 287
	/*
	 * The segment registers are funny things, they have both a
288 289 290 291
	 * visible and an invisible part.  Whenever the visible part is
	 * set to a specific selector, the invisible part is loaded
	 * with from a table in memory.  At no other time is the
	 * descriptor table in memory accessed.
292 293 294 295 296
	 *
	 * I take advantage of this here by force loading the
	 * segments, before I zap the gdt with an invalid value.
	 */
	load_segments();
297 298
	/*
	 * The gdt & idt are now invalid.
299 300
	 * If you want to load them you must set up your own idt & gdt.
	 */
301 302
	set_gdt(phys_to_virt(0), 0);
	set_idt(phys_to_virt(0), 0);
303

304
	/* now call it */
305 306
	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
			image->start);
307
}
308

Ken'ichi Ohmichi's avatar
Ken'ichi Ohmichi committed
309 310
void arch_crash_save_vmcoreinfo(void)
{
311
	VMCOREINFO_SYMBOL(phys_base);
312
	VMCOREINFO_SYMBOL(init_level4_pgt);
313 314 315 316 317

#ifdef CONFIG_NUMA
	VMCOREINFO_SYMBOL(node_data);
	VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
#endif
Ken'ichi Ohmichi's avatar
Ken'ichi Ohmichi committed
318 319
}