Commit 652fa53c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'locking-urgent-2020-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking fixes from Thomas Gleixner:
 "Three small fixes/updates for the locking core code:

   - Plug a task struct reference leak in the percpu rswem
     implementation.

   - Document the refcount interaction with PID_MAX_LIMIT

   - Improve the 'invalid wait context' data dump in lockdep so it
     contains all information which is required to decode the problem"

* tag 'locking-urgent-2020-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  locking/lockdep: Improve 'invalid wait context' splat
  locking/refcount: Document interaction with PID_MAX_LIMIT
  locking/percpu-rwsem: Fix a task_struct refcount
parents 4119bf9f 9a019db0
......@@ -38,11 +38,24 @@
* atomic operations, then the count will continue to edge closer to 0. If it
* reaches a value of 1 before /any/ of the threads reset it to the saturated
* value, then a concurrent refcount_dec_and_test() may erroneously free the
* underlying object. Given the precise timing details involved with the
* round-robin scheduling of each thread manipulating the refcount and the need
* to hit the race multiple times in succession, there doesn't appear to be a
* practical avenue of attack even if using refcount_add() operations with
* larger increments.
* underlying object.
* Linux limits the maximum number of tasks to PID_MAX_LIMIT, which is currently
* 0x400000 (and can't easily be raised in the future beyond FUTEX_TID_MASK).
* With the current PID limit, if no batched refcounting operations are used and
* the attacker can't repeatedly trigger kernel oopses in the middle of refcount
* operations, this makes it impossible for a saturated refcount to leave the
* saturation range, even if it is possible for multiple uses of the same
* refcount to nest in the context of a single task:
*
* (UINT_MAX+1-REFCOUNT_SATURATED) / PID_MAX_LIMIT =
* 0x40000000 / 0x400000 = 0x100 = 256
*
* If hundreds of references are added/removed with a single refcounting
* operation, it may potentially be possible to leave the saturation range; but
* given the precise timing details involved with the round-robin scheduling of
* each thread manipulating the refcount and the need to hit the race multiple
* times in succession, there doesn't appear to be a practical avenue of attack
* even if using refcount_add() operations with larger increments.
*
* Memory ordering
* ===============
......
......@@ -3952,10 +3952,36 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return ret;
}
static inline short task_wait_context(struct task_struct *curr)
{
/*
* Set appropriate wait type for the context; for IRQs we have to take
* into account force_irqthread as that is implied by PREEMPT_RT.
*/
if (curr->hardirq_context) {
/*
* Check if force_irqthreads will run us threaded.
*/
if (curr->hardirq_threaded || curr->irq_config)
return LD_WAIT_CONFIG;
return LD_WAIT_SPIN;
} else if (curr->softirq_context) {
/*
* Softirqs are always threaded.
*/
return LD_WAIT_CONFIG;
}
return LD_WAIT_MAX;
}
static int
print_lock_invalid_wait_context(struct task_struct *curr,
struct held_lock *hlock)
{
short curr_inner;
if (!debug_locks_off())
return 0;
if (debug_locks_silent)
......@@ -3971,6 +3997,10 @@ print_lock_invalid_wait_context(struct task_struct *curr,
print_lock(hlock);
pr_warn("other info that might help us debug this:\n");
curr_inner = task_wait_context(curr);
pr_warn("context-{%d:%d}\n", curr_inner, curr_inner);
lockdep_print_held_locks(curr);
pr_warn("stack backtrace:\n");
......@@ -4017,26 +4047,7 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
}
depth++;
/*
* Set appropriate wait type for the context; for IRQs we have to take
* into account force_irqthread as that is implied by PREEMPT_RT.
*/
if (curr->hardirq_context) {
/*
* Check if force_irqthreads will run us threaded.
*/
if (curr->hardirq_threaded || curr->irq_config)
curr_inner = LD_WAIT_CONFIG;
else
curr_inner = LD_WAIT_SPIN;
} else if (curr->softirq_context) {
/*
* Softirqs are always threaded.
*/
curr_inner = LD_WAIT_CONFIG;
} else {
curr_inner = LD_WAIT_MAX;
}
curr_inner = task_wait_context(curr);
for (; depth < curr->lockdep_depth; depth++) {
struct held_lock *prev = curr->held_locks + depth;
......
......@@ -118,14 +118,15 @@ static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
unsigned int mode, int wake_flags,
void *key)
{
struct task_struct *p = get_task_struct(wq_entry->private);
bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
struct percpu_rw_semaphore *sem = key;
struct task_struct *p;
/* concurrent against percpu_down_write(), can get stolen */
if (!__percpu_rwsem_trylock(sem, reader))
return 1;
p = get_task_struct(wq_entry->private);
list_del_init(&wq_entry->entry);
smp_store_release(&wq_entry->private, NULL);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment