tlb.c 17.8 KB
Newer Older
Glauber Costa's avatar
Glauber Costa committed
1 2 3 4 5 6
#include <linux/init.h>

#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
7
#include <linux/export.h>
8
#include <linux/cpu.h>
Glauber Costa's avatar
Glauber Costa committed
9 10 11

#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
12
#include <asm/cache.h>
Tejun Heo's avatar
Tejun Heo committed
13
#include <asm/apic.h>
Tejun Heo's avatar
Tejun Heo committed
14
#include <asm/uv/uv.h>
15
#include <linux/debugfs.h>
16

Glauber Costa's avatar
Glauber Costa committed
17
/*
18
 *	TLB flushing, formerly SMP-only
Glauber Costa's avatar
Glauber Costa committed
19 20 21 22 23 24 25 26 27
 *		c/o Linus Torvalds.
 *
 *	These mean you can really definitely utterly forget about
 *	writing to user space from interrupts. (Its not allowed anyway).
 *
 *	Optimizations Manfred Spraul <manfred@colorfullife.com>
 *
 *	More scalable flush, from Andi Kleen
 *
28
 *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
Glauber Costa's avatar
Glauber Costa committed
29 30
 */

31 32
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
			    u16 *new_asid, bool *need_flush)
{
	u16 asid;

	if (!static_cpu_has(X86_FEATURE_PCID)) {
		*new_asid = 0;
		*need_flush = true;
		return;
	}

	for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
		if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
		    next->context.ctx_id)
			continue;

		*new_asid = asid;
		*need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
			       next_tlb_gen);
		return;
	}

	/*
	 * We don't currently own an ASID slot on this CPU.
	 * Allocate a slot.
	 */
	*new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
	if (*new_asid >= TLB_NR_DYN_ASIDS) {
		*new_asid = 0;
		this_cpu_write(cpu_tlbstate.next_asid, 1);
	}
	*need_flush = true;
}

Glauber Costa's avatar
Glauber Costa committed
67 68
void leave_mm(int cpu)
{
69 70 71 72 73 74 75 76 77 78 79 80 81
	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);

	/*
	 * It's plausible that we're in lazy TLB mode while our mm is init_mm.
	 * If so, our callers still expect us to flush the TLB, but there
	 * aren't any user TLB entries in init_mm to worry about.
	 *
	 * This needs to happen before any other sanity checks due to
	 * intel_idle's shenanigans.
	 */
	if (loaded_mm == &init_mm)
		return;

82 83
	/* Warn if we're not lazy. */
	WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
84 85

	switch_mm(NULL, &init_mm, NULL);
Glauber Costa's avatar
Glauber Costa committed
86 87
}

88 89
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
	       struct task_struct *tsk)
90 91 92 93 94 95 96 97 98 99
{
	unsigned long flags;

	local_irq_save(flags);
	switch_mm_irqs_off(prev, next, tsk);
	local_irq_restore(flags);
}

void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
			struct task_struct *tsk)
100
{
101
	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
102
	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
103 104
	unsigned cpu = smp_processor_id();
	u64 next_tlb_gen;
105

106
	/*
107 108 109 110
	 * NB: The scheduler will call us with prev == next when switching
	 * from lazy TLB mode to normal mode if active_mm isn't changing.
	 * When this happens, we don't assume that CR3 (and hence
	 * cpu_tlbstate.loaded_mm) matches next.
111 112 113
	 *
	 * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
	 */
114

115 116 117 118 119 120 121
	/* We don't want flush_tlb_func_* to run concurrently with us. */
	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
		WARN_ON_ONCE(!irqs_disabled());

	/*
	 * Verify that CR3 is what we think it is.  This will catch
	 * hypothetical buggy code that directly switches to swapper_pg_dir
122 123
	 * without going through leave_mm() / switch_mm_irqs_off() or that
	 * does something like write_cr3(read_cr3_pa()).
124 125 126
	 *
	 * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
	 * isn't free.
127
	 */
128
#ifdef CONFIG_DEBUG_VM
129
	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
		/*
		 * If we were to BUG here, we'd be very likely to kill
		 * the system so hard that we don't see the call trace.
		 * Try to recover instead by ignoring the error and doing
		 * a global flush to minimize the chance of corruption.
		 *
		 * (This is far from being a fully correct recovery.
		 *  Architecturally, the CPU could prefetch something
		 *  back into an incorrect ASID slot and leave it there
		 *  to cause trouble down the road.  It's better than
		 *  nothing, though.)
		 */
		__flush_tlb_all();
	}
#endif
145

146
	if (real_prev == next) {
147
		VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
			  next->context.ctx_id);

		if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
			/*
			 * There's nothing to do: we weren't lazy, and we
			 * aren't changing our mm.  We don't need to flush
			 * anything, nor do we need to update CR3, CR4, or
			 * LDTR.
			 */
			return;
		}

		/* Resume remote flushes and then read tlb_gen. */
		cpumask_set_cpu(cpu, mm_cpumask(next));
		next_tlb_gen = atomic64_read(&next->context.tlb_gen);

164 165
		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
		    next_tlb_gen) {
166 167 168 169 170 171
			/*
			 * Ideally, we'd have a flush_tlb() variant that
			 * takes the known CR3 value as input.  This would
			 * be faster on Xen PV and on hypothetical CPUs
			 * on which INVPCID is fast.
			 */
172
			this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
173
				       next_tlb_gen);
174
			write_cr3(build_cr3(next, prev_asid));
175 176
			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
					TLB_FLUSH_ALL);
177
		}
178 179

		/*
180 181 182
		 * We just exited lazy mode, which means that CR4 and/or LDTR
		 * may be stale.  (Changes to the required CR4 and LDTR states
		 * are not reflected in tlb_gen.)
183
		 */
184
	} else {
185 186
		u16 new_asid;
		bool need_flush;
187 188 189 190 191 192 193

		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
			/*
			 * If our current stack is in vmalloc space and isn't
			 * mapped in the new pgd, we'll double-fault.  Forcibly
			 * map it.
			 */
194
			unsigned int index = pgd_index(current_stack_pointer);
195 196 197 198 199
			pgd_t *pgd = next->pgd + index;

			if (unlikely(pgd_none(*pgd)))
				set_pgd(pgd, init_mm.pgd[index]);
		}
200

201 202 203
		/* Stop remote flushes for the previous mm */
		if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
			cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
204

205
		VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
206

207 208 209 210 211
		/*
		 * Start remote flushes and then read tlb_gen.
		 */
		cpumask_set_cpu(cpu, mm_cpumask(next));
		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
212

213
		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
214

215 216 217
		if (need_flush) {
			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
218
			write_cr3(build_cr3(next, new_asid));
219 220 221 222
			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
					TLB_FLUSH_ALL);
		} else {
			/* The new ASID is already up to date. */
223
			write_cr3(build_cr3_noflush(next, new_asid));
224 225 226 227 228
			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
		}

		this_cpu_write(cpu_tlbstate.loaded_mm, next);
		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
229
	}
230 231

	load_mm_cr4(next);
232
	switch_ldt(real_prev, next);
233 234
}

235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
/*
 * Call this when reinitializing a CPU.  It fixes the following potential
 * problems:
 *
 * - The ASID changed from what cpu_tlbstate thinks it is (most likely
 *   because the CPU was taken down and came back up with CR3's PCID
 *   bits clear.  CPU hotplug can do this.
 *
 * - The TLB contains junk in slots corresponding to inactive ASIDs.
 *
 * - The CPU went so far out to lunch that it may have missed a TLB
 *   flush.
 */
void initialize_tlbstate_and_flush(void)
{
	int i;
	struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
	u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
	unsigned long cr3 = __read_cr3();

	/* Assert that CR3 already references the right mm. */
	WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));

	/*
	 * Assert that CR4.PCIDE is set if needed.  (CR4.PCIDE initialization
	 * doesn't work like other CR4 bits because it can only be set from
	 * long mode.)
	 */
263
	WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
264 265 266
		!(cr4_read_shadow() & X86_CR4_PCIDE));

	/* Force ASID 0 and force a TLB flush. */
267
	write_cr3(build_cr3(mm, 0));
268 269 270 271 272 273 274 275 276 277 278

	/* Reinitialize tlbstate. */
	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
	this_cpu_write(cpu_tlbstate.next_asid, 1);
	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
	this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);

	for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
		this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
}

279 280 281 282 283 284 285
/*
 * flush_tlb_func_common()'s memory ordering requirement is that any
 * TLB fills that happen after we flush the TLB are ordered after we
 * read active_mm's tlb_gen.  We don't need any explicit barriers
 * because all x86 flush operations are serializing and the
 * atomic64_read operation won't be reordered by the compiler.
 */
286 287
static void flush_tlb_func_common(const struct flush_tlb_info *f,
				  bool local, enum tlb_flush_reason reason)
Glauber Costa's avatar
Glauber Costa committed
288
{
289 290 291 292 293 294 295 296 297 298
	/*
	 * We have three different tlb_gen values in here.  They are:
	 *
	 * - mm_tlb_gen:     the latest generation.
	 * - local_tlb_gen:  the generation that this CPU has already caught
	 *                   up to.
	 * - f->new_tlb_gen: the generation that the requester of the flush
	 *                   wants us to catch up to.
	 */
	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
299
	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
300
	u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
301
	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
302

303 304 305
	/* This code cannot presently handle being reentered. */
	VM_WARN_ON(!irqs_disabled());

306
	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
307 308
		   loaded_mm->context.ctx_id);

309
	if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
310
		/*
311 312 313 314
		 * We're in lazy mode -- don't flush.  We can get here on
		 * remote flushes due to races and on local flushes if a
		 * kernel thread coincidentally flushes the mm it's lazily
		 * still using.
315
		 */
316 317
		return;
	}
Glauber Costa's avatar
Glauber Costa committed
318

319 320 321 322 323 324 325
	if (unlikely(local_tlb_gen == mm_tlb_gen)) {
		/*
		 * There's nothing to do: we're already up to date.  This can
		 * happen if two concurrent flushes happen -- the first flush to
		 * be handled can catch us all the way up, leaving no work for
		 * the second flush.
		 */
326
		trace_tlb_flush(reason, 0);
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
		return;
	}

	WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
	WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);

	/*
	 * If we get to this point, we know that our TLB is out of date.
	 * This does not strictly imply that we need to flush (it's
	 * possible that f->new_tlb_gen <= local_tlb_gen), but we're
	 * going to need to flush in the very near future, so we might
	 * as well get it over with.
	 *
	 * The only question is whether to do a full or partial flush.
	 *
	 * We do a partial flush if requested and two extra conditions
	 * are met:
	 *
	 * 1. f->new_tlb_gen == local_tlb_gen + 1.  We have an invariant that
	 *    we've always done all needed flushes to catch up to
	 *    local_tlb_gen.  If, for example, local_tlb_gen == 2 and
	 *    f->new_tlb_gen == 3, then we know that the flush needed to bring
	 *    us up to date for tlb_gen 3 is the partial flush we're
	 *    processing.
	 *
	 *    As an example of why this check is needed, suppose that there
	 *    are two concurrent flushes.  The first is a full flush that
	 *    changes context.tlb_gen from 1 to 2.  The second is a partial
	 *    flush that changes context.tlb_gen from 2 to 3.  If they get
	 *    processed on this CPU in reverse order, we'll see
	 *     local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
	 *    If we were to use __flush_tlb_single() and set local_tlb_gen to
	 *    3, we'd be break the invariant: we'd update local_tlb_gen above
	 *    1 without the full flush that's needed for tlb_gen 2.
	 *
	 * 2. f->new_tlb_gen == mm_tlb_gen.  This is purely an optimiation.
	 *    Partial TLB flushes are not all that much cheaper than full TLB
	 *    flushes, so it seems unlikely that it would be a performance win
	 *    to do a partial flush if that won't bring our TLB fully up to
	 *    date.  By doing a full flush instead, we can increase
	 *    local_tlb_gen all the way to mm_tlb_gen and we can probably
	 *    avoid another flush in the very near future.
	 */
	if (f->end != TLB_FLUSH_ALL &&
	    f->new_tlb_gen == local_tlb_gen + 1 &&
	    f->new_tlb_gen == mm_tlb_gen) {
		/* Partial flush */
374
		unsigned long addr;
375
		unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
376

377 378
		addr = f->start;
		while (addr < f->end) {
379 380 381
			__flush_tlb_single(addr);
			addr += PAGE_SIZE;
		}
382 383 384
		if (local)
			count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
		trace_tlb_flush(reason, nr_pages);
385 386 387 388 389 390
	} else {
		/* Full flush. */
		local_flush_tlb();
		if (local)
			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
		trace_tlb_flush(reason, TLB_FLUSH_ALL);
391
	}
392 393

	/* Both paths above update our state to mm_tlb_gen. */
394
	this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
Glauber Costa's avatar
Glauber Costa committed
395 396
}

397 398 399 400 401 402 403 404 405 406 407 408 409
static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
{
	const struct flush_tlb_info *f = info;

	flush_tlb_func_common(f, true, reason);
}

static void flush_tlb_func_remote(void *info)
{
	const struct flush_tlb_info *f = info;

	inc_irq_stat(irq_tlb_count);

410
	if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
411 412 413 414 415 416
		return;

	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
	flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
}

417
void native_flush_tlb_others(const struct cpumask *cpumask,
418
			     const struct flush_tlb_info *info)
419
{
420
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
421
	if (info->end == TLB_FLUSH_ALL)
422 423 424
		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
	else
		trace_tlb_flush(TLB_REMOTE_SEND_IPI,
425
				(info->end - info->start) >> PAGE_SHIFT);
426

427
	if (is_uv_system()) {
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
		/*
		 * This whole special case is confused.  UV has a "Broadcast
		 * Assist Unit", which seems to be a fancy way to send IPIs.
		 * Back when x86 used an explicit TLB flush IPI, UV was
		 * optimized to use its own mechanism.  These days, x86 uses
		 * smp_call_function_many(), but UV still uses a manual IPI,
		 * and that IPI's action is out of date -- it does a manual
		 * flush instead of calling flush_tlb_func_remote().  This
		 * means that the percpu tlb_gen variables won't be updated
		 * and we'll do pointless flushes on future context switches.
		 *
		 * Rather than hooking native_flush_tlb_others() here, I think
		 * that UV should be updated so that smp_call_function_many(),
		 * etc, are optimal on UV.
		 */
Tejun Heo's avatar
Tejun Heo committed
443
		unsigned int cpu;
444

445
		cpu = smp_processor_id();
446
		cpumask = uv_flush_tlb_others(cpumask, info);
Tejun Heo's avatar
Tejun Heo committed
447
		if (cpumask)
448
			smp_call_function_many(cpumask, flush_tlb_func_remote,
449
					       (void *)info, 1);
450
		return;
451
	}
452
	smp_call_function_many(cpumask, flush_tlb_func_remote,
453
			       (void *)info, 1);
Glauber Costa's avatar
Glauber Costa committed
454 455
}

456 457 458 459 460 461 462 463 464 465
/*
 * See Documentation/x86/tlb.txt for details.  We choose 33
 * because it is large enough to cover the vast majority (at
 * least 95%) of allocations, and is small enough that we are
 * confident it will not cause too much overhead.  Each single
 * flush is about 100 ns, so this caps the maximum overhead at
 * _about_ 3,000 ns.
 *
 * This is in units of pages.
 */
466
static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
467

468 469 470
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
				unsigned long end, unsigned long vmflag)
{
471
	int cpu;
472

473 474 475
	struct flush_tlb_info info = {
		.mm = mm,
	};
476

477
	cpu = get_cpu();
478

479
	/* This is also a barrier that synchronizes with switch_mm(). */
480
	info.new_tlb_gen = inc_mm_tlb_gen(mm);
481

482 483 484 485 486 487
	/* Should we flush just the requested range? */
	if ((end != TLB_FLUSH_ALL) &&
	    !(vmflag & VM_HUGETLB) &&
	    ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
		info.start = start;
		info.end = end;
Dave Hansen's avatar
Dave Hansen committed
488
	} else {
489 490
		info.start = 0UL;
		info.end = TLB_FLUSH_ALL;
491
	}
492

493 494 495
	if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
		VM_WARN_ON(irqs_disabled());
		local_irq_disable();
496
		flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
497 498 499
		local_irq_enable();
	}

500
	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
501
		flush_tlb_others(mm_cpumask(mm), &info);
502

503
	put_cpu();
Glauber Costa's avatar
Glauber Costa committed
504 505
}

506

Glauber Costa's avatar
Glauber Costa committed
507 508
static void do_flush_tlb_all(void *info)
{
509
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
Glauber Costa's avatar
Glauber Costa committed
510 511 512 513 514
	__flush_tlb_all();
}

void flush_tlb_all(void)
{
515
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
516
	on_each_cpu(do_flush_tlb_all, NULL, 1);
Glauber Costa's avatar
Glauber Costa committed
517
}
518

519 520 521 522 523 524
static void do_kernel_range_flush(void *info)
{
	struct flush_tlb_info *f = info;
	unsigned long addr;

	/* flush range by one by one 'invlpg' */
525
	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
526 527 528 529 530 531 532
		__flush_tlb_single(addr);
}

void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{

	/* Balance as user space task's flush, a bit conservative */
533
	if (end == TLB_FLUSH_ALL ||
534
	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
535
		on_each_cpu(do_flush_tlb_all, NULL, 1);
536 537
	} else {
		struct flush_tlb_info info;
538 539
		info.start = start;
		info.end = end;
540 541 542
		on_each_cpu(do_kernel_range_flush, &info, 1);
	}
}
543

544 545
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
546 547 548 549 550 551
	struct flush_tlb_info info = {
		.mm = NULL,
		.start = 0UL,
		.end = TLB_FLUSH_ALL,
	};

552 553
	int cpu = get_cpu();

554 555 556
	if (cpumask_test_cpu(cpu, &batch->cpumask)) {
		VM_WARN_ON(irqs_disabled());
		local_irq_disable();
557
		flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
558 559 560
		local_irq_enable();
	}

561
	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
562
		flush_tlb_others(&batch->cpumask, &info);
563

564 565 566 567 568
	cpumask_clear(&batch->cpumask);

	put_cpu();
}

569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
			     size_t count, loff_t *ppos)
{
	char buf[32];
	unsigned int len;

	len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
}

static ssize_t tlbflush_write_file(struct file *file,
		 const char __user *user_buf, size_t count, loff_t *ppos)
{
	char buf[32];
	ssize_t len;
	int ceiling;

	len = min(count, sizeof(buf) - 1);
	if (copy_from_user(buf, user_buf, len))
		return -EFAULT;

	buf[len] = '\0';
	if (kstrtoint(buf, 0, &ceiling))
		return -EINVAL;

	if (ceiling < 0)
		return -EINVAL;

	tlb_single_page_flush_ceiling = ceiling;
	return count;
}

static const struct file_operations fops_tlbflush = {
	.read = tlbflush_read_file,
	.write = tlbflush_write_file,
	.llseek = default_llseek,
};

static int __init create_tlb_single_page_flush_ceiling(void)
{
	debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
			    arch_debugfs_dir, NULL, &fops_tlbflush);
	return 0;
}
late_initcall(create_tlb_single_page_flush_ceiling);