Commit 830ac852 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-kdump-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull kdump fixes from Peter Anvin:
 "The kexec/kdump people have found several problems with the support
  for loading over 4 GiB that was introduced in this merge cycle.  This
  is partly due to a number of design problems inherent in the way the
  various pieces of kdump fit together (it is pretty horrifically manual
  in many places.)

  After a *lot* of iterations this is the patchset that was agreed upon,
  but of course it is now very late in the cycle.  However, because it
  changes both the syntax and semantics of the crashkernel option, it
  would be desirable to avoid a stable release with the broken
  interfaces."

I'm not happy with the timing, since originally the plan was to release
the final 3.9 tomorrow.  But apparently I'm doing an -rc8 instead...

* 'x86-kdump-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  kexec: use Crash kernel for Crash kernel low
  x86, kdump: Change crashkernel_high/low= to crashkernel=,high/low
  x86, kdump: Retore crashkernel= to allocate under 896M
  x86, kdump: Set crashkernel_low automatically
parents db93f8b4 157752d8
......@@ -596,9 +596,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
is selected automatically. Check
Documentation/kdump/kdump.txt for further details.
crashkernel_low=size[KMG]
[KNL, x86] parts under 4G.
crashkernel=range1:size1[,range2:size2,...][@offset]
[KNL] Same as above, but depends on the memory
in the running system. The syntax of range is
......@@ -606,6 +603,26 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
a memory unit (amount[KMG]). See also
Documentation/kdump/kdump.txt for an example.
crashkernel=size[KMG],high
[KNL, x86_64] range could be above 4G. Allow kernel
to allocate physical memory region from top, so could
be above 4G if system have more than 4G ram installed.
Otherwise memory region will be allocated below 4G, if
available.
It will be ignored if crashkernel=X is specified.
crashkernel=size[KMG],low
[KNL, x86_64] range under 4G. When crashkernel=X,high
is passed, kernel could allocate physical memory region
above 4G, that cause second kernel crash on system
that require some amount of low memory, e.g. swiotlb
requires at least 64M+32K low memory. Kernel would
try to allocate 72M below 4G automatically.
This one let user to specify own low range under 4G
for second kernel instead.
0: to disable low allocation.
It will be ignored when crashkernel=X,high is not used
or memory reserved is below 4G.
cs89x0_dma= [HW,NET]
Format: <dma>
......
......@@ -507,11 +507,14 @@ static void __init memblock_x86_reserve_range_setup_data(void)
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
* would limit the kernel to the low 512 MiB due to mapping restrictions.
* On 64bit, old kexec-tools need to under 896MiB.
*/
#ifdef CONFIG_X86_32
# define CRASH_KERNEL_ADDR_MAX (512 << 20)
# define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20)
# define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20)
#else
# define CRASH_KERNEL_ADDR_MAX MAXMEM
# define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20)
# define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM
#endif
static void __init reserve_crashkernel_low(void)
......@@ -521,18 +524,34 @@ static void __init reserve_crashkernel_low(void)
unsigned long long low_base = 0, low_size = 0;
unsigned long total_low_mem;
unsigned long long base;
bool auto_set = false;
int ret;
total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
/* crashkernel=Y,low */
ret = parse_crashkernel_low(boot_command_line, total_low_mem,
&low_size, &base);
if (ret != 0 || low_size <= 0)
if (ret != 0) {
/*
* two parts from lib/swiotlb.c:
* swiotlb size: user specified with swiotlb= or default.
* swiotlb overflow buffer: now is hardcoded to 32k.
* We round it to 8M for other buffers that
* may need to stay low too.
*/
low_size = swiotlb_size_or_default() + (8UL<<20);
auto_set = true;
} else {
/* passed with crashkernel=0,low ? */
if (!low_size)
return;
}
low_base = memblock_find_in_range(low_size, (1ULL<<32),
low_size, alignment);
if (!low_base) {
if (!auto_set)
pr_info("crashkernel low reservation failed - No suitable area found.\n");
return;
......@@ -554,14 +573,22 @@ static void __init reserve_crashkernel(void)
const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long total_mem;
unsigned long long crash_size, crash_base;
bool high = false;
int ret;
total_mem = memblock_phys_mem_size();
/* crashkernel=XM */
ret = parse_crashkernel(boot_command_line, total_mem,
&crash_size, &crash_base);
if (ret != 0 || crash_size <= 0) {
/* crashkernel=X,high */
ret = parse_crashkernel_high(boot_command_line, total_mem,
&crash_size, &crash_base);
if (ret != 0 || crash_size <= 0)
return;
high = true;
}
/* 0 means: find the address automatically */
if (crash_base <= 0) {
......@@ -569,7 +596,9 @@ static void __init reserve_crashkernel(void)
* kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
*/
crash_base = memblock_find_in_range(alignment,
CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
high ? CRASH_KERNEL_ADDR_HIGH_MAX :
CRASH_KERNEL_ADDR_LOW_MAX,
crash_size, alignment);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
......
......@@ -200,6 +200,8 @@ extern size_t vmcoreinfo_max_size;
int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
int crash_shrink_memory(unsigned long new_size);
......
......@@ -25,6 +25,7 @@ extern int swiotlb_force;
extern void swiotlb_init(int verbose);
int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
extern unsigned long swiotlb_nr_tbl(void);
unsigned long swiotlb_size_or_default(void);
extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
/*
......
......@@ -55,7 +55,7 @@ struct resource crashk_res = {
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
struct resource crashk_low_res = {
.name = "Crash kernel low",
.name = "Crash kernel",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
......@@ -1368,35 +1368,114 @@ static int __init parse_crashkernel_simple(char *cmdline,
return 0;
}
#define SUFFIX_HIGH 0
#define SUFFIX_LOW 1
#define SUFFIX_NULL 2
static __initdata char *suffix_tbl[] = {
[SUFFIX_HIGH] = ",high",
[SUFFIX_LOW] = ",low",
[SUFFIX_NULL] = NULL,
};
/*
* That function is the entry point for command line parsing and should be
* called from the arch-specific code.
* That function parses "suffix" crashkernel command lines like
*
* crashkernel=size,[high|low]
*
* It returns 0 on success and -EINVAL on failure.
*/
static int __init __parse_crashkernel(char *cmdline,
unsigned long long system_ram,
static int __init parse_crashkernel_suffix(char *cmdline,
unsigned long long *crash_size,
unsigned long long *crash_base,
const char *name)
const char *suffix)
{
char *p = cmdline, *ck_cmdline = NULL;
char *first_colon, *first_space;
char *cur = cmdline;
BUG_ON(!crash_size || !crash_base);
*crash_size = 0;
*crash_base = 0;
*crash_size = memparse(cmdline, &cur);
if (cmdline == cur) {
pr_warn("crashkernel: memory value expected\n");
return -EINVAL;
}
/* check with suffix */
if (strncmp(cur, suffix, strlen(suffix))) {
pr_warn("crashkernel: unrecognized char\n");
return -EINVAL;
}
cur += strlen(suffix);
if (*cur != ' ' && *cur != '\0') {
pr_warn("crashkernel: unrecognized char\n");
return -EINVAL;
}
return 0;
}
static __init char *get_last_crashkernel(char *cmdline,
const char *name,
const char *suffix)
{
char *p = cmdline, *ck_cmdline = NULL;
/* find crashkernel and use the last one if there are more */
p = strstr(p, name);
while (p) {
char *end_p = strchr(p, ' ');
char *q;
if (!end_p)
end_p = p + strlen(p);
if (!suffix) {
int i;
/* skip the one with any known suffix */
for (i = 0; suffix_tbl[i]; i++) {
q = end_p - strlen(suffix_tbl[i]);
if (!strncmp(q, suffix_tbl[i],
strlen(suffix_tbl[i])))
goto next;
}
ck_cmdline = p;
} else {
q = end_p - strlen(suffix);
if (!strncmp(q, suffix, strlen(suffix)))
ck_cmdline = p;
}
next:
p = strstr(p+1, name);
}
if (!ck_cmdline)
return NULL;
return ck_cmdline;
}
static int __init __parse_crashkernel(char *cmdline,
unsigned long long system_ram,
unsigned long long *crash_size,
unsigned long long *crash_base,
const char *name,
const char *suffix)
{
char *first_colon, *first_space;
char *ck_cmdline;
BUG_ON(!crash_size || !crash_base);
*crash_size = 0;
*crash_base = 0;
ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
if (!ck_cmdline)
return -EINVAL;
ck_cmdline += strlen(name);
if (suffix)
return parse_crashkernel_suffix(ck_cmdline, crash_size,
crash_base, suffix);
/*
* if the commandline contains a ':', then that's the extended
* syntax -- if not, it must be the classic syntax
......@@ -1413,13 +1492,26 @@ static int __init __parse_crashkernel(char *cmdline,
return 0;
}
/*
* That function is the entry point for command line parsing and should be
* called from the arch-specific code.
*/
int __init parse_crashkernel(char *cmdline,
unsigned long long system_ram,
unsigned long long *crash_size,
unsigned long long *crash_base)
{
return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
"crashkernel=");
"crashkernel=", NULL);
}
int __init parse_crashkernel_high(char *cmdline,
unsigned long long system_ram,
unsigned long long *crash_size,
unsigned long long *crash_base)
{
return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
"crashkernel=", suffix_tbl[SUFFIX_HIGH]);
}
int __init parse_crashkernel_low(char *cmdline,
......@@ -1428,7 +1520,7 @@ int __init parse_crashkernel_low(char *cmdline,
unsigned long long *crash_base)
{
return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
"crashkernel_low=");
"crashkernel=", suffix_tbl[SUFFIX_LOW]);
}
static void update_vmcoreinfo_note(void)
......
......@@ -105,9 +105,9 @@ setup_io_tlb_npages(char *str)
if (!strcmp(str, "force"))
swiotlb_force = 1;
return 1;
return 0;
}
__setup("swiotlb=", setup_io_tlb_npages);
early_param("swiotlb", setup_io_tlb_npages);
/* make io_tlb_overflow tunable too? */
unsigned long swiotlb_nr_tbl(void)
......@@ -115,6 +115,18 @@ unsigned long swiotlb_nr_tbl(void)
return io_tlb_nslabs;
}
EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
/* default to 64MB */
#define IO_TLB_DEFAULT_SIZE (64UL<<20)
unsigned long swiotlb_size_or_default(void)
{
unsigned long size;
size = io_tlb_nslabs << IO_TLB_SHIFT;
return size ? size : (IO_TLB_DEFAULT_SIZE);
}
/* Note that this doesn't work with highmem page */
static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
volatile void *address)
......@@ -188,8 +200,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
void __init
swiotlb_init(int verbose)
{
/* default to 64MB */
size_t default_size = 64UL<<20;
size_t default_size = IO_TLB_DEFAULT_SIZE;
unsigned char *vstart;
unsigned long bytes;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment