base.c 79.6 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
/*
 *  linux/fs/proc/base.c
 *
 *  Copyright (C) 1991, 1992 Linus Torvalds
 *
 *  proc base directory handling functions
 *
 *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
 *  Instead of using magical inumbers to determine the kind of object
 *  we allocate and fill in-core inodes upon lookup. They don't even
 *  go into icache. We cache the reference to task_struct upon lookup too.
 *  Eventually it should become a filesystem in its own. We don't use the
 *  rest of procfs anymore.
Mauricio Lin's avatar
Mauricio Lin committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
 *
 *
 *  Changelog:
 *  17-Jan-2005
 *  Allan Bezerra
 *  Bruna Moreira <bruna.moreira@indt.org.br>
 *  Edjard Mota <edjard.mota@indt.org.br>
 *  Ilias Biris <ilias.biris@indt.org.br>
 *  Mauricio Lin <mauricio.lin@indt.org.br>
 *
 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
 *
 *  A new process specific entry (smaps) included in /proc. It shows the
 *  size of rss for each memory area. The maps entry lacks information
 *  about physical memory size (rss) for each mapped file, i.e.,
 *  rss information for executables and library files.
 *  This additional information is useful for any tools that need to know
 *  about physical memory consumption for a process specific library.
 *
 *  Changelog:
 *  21-Feb-2005
 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
 *  Pud inclusion in the page table walking.
 *
 *  ChangeLog:
 *  10-Mar-2005
 *  10LE Instituto Nokia de Tecnologia - INdT:
 *  A better way to walks through the page table as suggested by Hugh Dickins.
 *
 *  Simo Piiroinen <simo.piiroinen@nokia.com>:
 *  Smaps information related to shared, private, clean and dirty pages.
 *
 *  Paul Mundt <paul.mundt@nokia.com>:
 *  Overall revision about smaps.
Linus Torvalds's avatar
Linus Torvalds committed
48
49
50
51
52
53
54
55
 */

#include <asm/uaccess.h>

#include <linux/errno.h>
#include <linux/time.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
56
#include <linux/task_io_accounting_ops.h>
Linus Torvalds's avatar
Linus Torvalds committed
57
#include <linux/init.h>
58
#include <linux/capability.h>
Linus Torvalds's avatar
Linus Torvalds committed
59
#include <linux/file.h>
Al Viro's avatar
Al Viro committed
60
#include <linux/fdtable.h>
Linus Torvalds's avatar
Linus Torvalds committed
61
62
63
#include <linux/string.h>
#include <linux/seq_file.h>
#include <linux/namei.h>
64
#include <linux/mnt_namespace.h>
Linus Torvalds's avatar
Linus Torvalds committed
65
#include <linux/mm.h>
66
#include <linux/swap.h>
67
#include <linux/rcupdate.h>
Linus Torvalds's avatar
Linus Torvalds committed
68
#include <linux/kallsyms.h>
Ken Chen's avatar
Ken Chen committed
69
#include <linux/stacktrace.h>
70
#include <linux/resource.h>
Kees Cook's avatar
Kees Cook committed
71
#include <linux/module.h>
Linus Torvalds's avatar
Linus Torvalds committed
72
73
74
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/ptrace.h>
75
#include <linux/tracehook.h>
Andrew Morton's avatar
Andrew Morton committed
76
#include <linux/printk.h>
77
#include <linux/cgroup.h>
Linus Torvalds's avatar
Linus Torvalds committed
78
79
#include <linux/cpuset.h>
#include <linux/audit.h>
Al Viro's avatar
Al Viro committed
80
#include <linux/poll.h>
81
#include <linux/nsproxy.h>
82
#include <linux/oom.h>
83
#include <linux/elf.h>
84
#include <linux/pid_namespace.h>
85
#include <linux/user_namespace.h>
86
#include <linux/fs_struct.h>
87
#include <linux/slab.h>
88
#include <linux/flex_array.h>
89
#include <linux/posix-timers.h>
90
91
92
#ifdef CONFIG_HARDWALL
#include <asm/hardwall.h>
#endif
93
#include <trace/events/oom.h>
Linus Torvalds's avatar
Linus Torvalds committed
94
#include "internal.h"
95
#include "fd.h"
Linus Torvalds's avatar
Linus Torvalds committed
96

97
98
99
100
101
102
103
104
105
106
/* NOTE:
 *	Implementing inode permission operations in /proc is almost
 *	certainly an error.  Permission checks need to happen during
 *	each system call not at open time.  The reason is that most of
 *	what we wish to check for permissions in /proc varies at runtime.
 *
 *	The classic example of a problem is opening file descriptors
 *	in /proc for a task before it execs a suid executable.
 */

Linus Torvalds's avatar
Linus Torvalds committed
107
struct pid_entry {
108
	const char *name;
109
	int len;
Al Viro's avatar
Al Viro committed
110
	umode_t mode;
111
	const struct inode_operations *iop;
112
	const struct file_operations *fop;
113
	union proc_op op;
Linus Torvalds's avatar
Linus Torvalds committed
114
115
};

116
#define NOD(NAME, MODE, IOP, FOP, OP) {			\
117
	.name = (NAME),					\
118
	.len  = sizeof(NAME) - 1,			\
119
120
121
122
123
124
	.mode = MODE,					\
	.iop  = IOP,					\
	.fop  = FOP,					\
	.op   = OP,					\
}

Alexey Dobriyan's avatar
Alexey Dobriyan committed
125
126
127
#define DIR(NAME, MODE, iops, fops)	\
	NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
#define LNK(NAME, get_link)					\
128
	NOD(NAME, (S_IFLNK|S_IRWXUGO),				\
129
		&proc_pid_link_inode_operations, NULL,		\
Alexey Dobriyan's avatar
Alexey Dobriyan committed
130
131
132
133
		{ .proc_get_link = get_link } )
#define REG(NAME, MODE, fops)				\
	NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
#define ONE(NAME, MODE, show)				\
134
135
	NOD(NAME, (S_IFREG|(MODE)), 			\
		NULL, &proc_single_file_operations,	\
Alexey Dobriyan's avatar
Alexey Dobriyan committed
136
		{ .proc_show = show } )
Linus Torvalds's avatar
Linus Torvalds committed
137

138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/*
 * Count the number of hardlinks for the pid_entry table, excluding the .
 * and .. links.
 */
static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
	unsigned int n)
{
	unsigned int i;
	unsigned int count;

	count = 0;
	for (i = 0; i < n; ++i) {
		if (S_ISDIR(entries[i].mode))
			++count;
	}

	return count;
}

157
static int get_task_root(struct task_struct *task, struct path *root)
Linus Torvalds's avatar
Linus Torvalds committed
158
{
Hugh Dickins's avatar
Hugh Dickins committed
159
160
	int result = -ENOENT;

161
	task_lock(task);
162
163
	if (task->fs) {
		get_fs_root(task->fs, root);
Hugh Dickins's avatar
Hugh Dickins committed
164
165
		result = 0;
	}
166
	task_unlock(task);
Hugh Dickins's avatar
Hugh Dickins committed
167
	return result;
168
169
}

170
static int proc_cwd_link(struct dentry *dentry, struct path *path)
171
{
172
	struct task_struct *task = get_proc_task(d_inode(dentry));
173
	int result = -ENOENT;
174
175

	if (task) {
176
177
178
179
180
181
		task_lock(task);
		if (task->fs) {
			get_fs_pwd(task->fs, path);
			result = 0;
		}
		task_unlock(task);
182
183
		put_task_struct(task);
	}
Linus Torvalds's avatar
Linus Torvalds committed
184
185
186
	return result;
}

187
static int proc_root_link(struct dentry *dentry, struct path *path)
Linus Torvalds's avatar
Linus Torvalds committed
188
{
189
	struct task_struct *task = get_proc_task(d_inode(dentry));
Linus Torvalds's avatar
Linus Torvalds committed
190
	int result = -ENOENT;
191
192

	if (task) {
193
		result = get_task_root(task, path);
194
195
		put_task_struct(task);
	}
Linus Torvalds's avatar
Linus Torvalds committed
196
197
198
	return result;
}

199
200
static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
				     size_t _count, loff_t *pos)
Linus Torvalds's avatar
Linus Torvalds committed
201
{
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
	struct task_struct *tsk;
	struct mm_struct *mm;
	char *page;
	unsigned long count = _count;
	unsigned long arg_start, arg_end, env_start, env_end;
	unsigned long len1, len2, len;
	unsigned long p;
	char c;
	ssize_t rv;

	BUG_ON(*pos < 0);

	tsk = get_proc_task(file_inode(file));
	if (!tsk)
		return -ESRCH;
	mm = get_task_mm(tsk);
	put_task_struct(tsk);
	if (!mm)
		return 0;
	/* Check if process spawned far enough to have cmdline. */
	if (!mm->env_end) {
		rv = 0;
		goto out_mmput;
	}

	page = (char *)__get_free_page(GFP_TEMPORARY);
	if (!page) {
		rv = -ENOMEM;
		goto out_mmput;
	}

	down_read(&mm->mmap_sem);
	arg_start = mm->arg_start;
	arg_end = mm->arg_end;
	env_start = mm->env_start;
	env_end = mm->env_end;
	up_read(&mm->mmap_sem);

	BUG_ON(arg_start > arg_end);
	BUG_ON(env_start > env_end);

	len1 = arg_end - arg_start;
	len2 = env_end - env_start;

246
247
248
249
250
	/* Empty ARGV. */
	if (len1 == 0) {
		rv = 0;
		goto out_free_page;
	}
251
	/*
252
253
	 * Inherently racy -- command line shares address space
	 * with code and data.
254
	 */
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
	rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
	if (rv <= 0)
		goto out_free_page;

	rv = 0;

	if (c == '\0') {
		/* Command line (set of strings) occupies whole ARGV. */
		if (len1 <= *pos)
			goto out_free_page;

		p = arg_start + *pos;
		len = len1 - *pos;
		while (count > 0 && len > 0) {
			unsigned int _count;
			int nr_read;

			_count = min3(count, len, PAGE_SIZE);
			nr_read = access_remote_vm(mm, p, page, _count, 0);
			if (nr_read < 0)
				rv = nr_read;
			if (nr_read <= 0)
				goto out_free_page;

			if (copy_to_user(buf, page, nr_read)) {
				rv = -EFAULT;
				goto out_free_page;
			}

			p	+= nr_read;
			len	-= nr_read;
			buf	+= nr_read;
			count	-= nr_read;
			rv	+= nr_read;
		}
	} else {
		/*
		 * Command line (1 string) occupies ARGV and maybe
		 * extends into ENVP.
		 */
		if (len1 + len2 <= *pos)
			goto skip_argv_envp;
		if (len1 <= *pos)
			goto skip_argv;

		p = arg_start + *pos;
		len = len1 - *pos;
		while (count > 0 && len > 0) {
			unsigned int _count, l;
			int nr_read;
			bool final;

			_count = min3(count, len, PAGE_SIZE);
			nr_read = access_remote_vm(mm, p, page, _count, 0);
			if (nr_read < 0)
				rv = nr_read;
			if (nr_read <= 0)
				goto out_free_page;

			/*
			 * Command line can be shorter than whole ARGV
			 * even if last "marker" byte says it is not.
			 */
			final = false;
			l = strnlen(page, nr_read);
			if (l < nr_read) {
				nr_read = l;
				final = true;
			}

			if (copy_to_user(buf, page, nr_read)) {
				rv = -EFAULT;
				goto out_free_page;
			}

			p	+= nr_read;
			len	-= nr_read;
			buf	+= nr_read;
			count	-= nr_read;
			rv	+= nr_read;

			if (final)
				goto out_free_page;
		}
skip_argv:
		/*
		 * Command line (1 string) occupies ARGV and
		 * extends into ENVP.
		 */
		if (len1 <= *pos) {
			p = env_start + *pos - len1;
			len = len1 + len2 - *pos;
		} else {
			p = env_start;
			len = len2;
		}
		while (count > 0 && len > 0) {
			unsigned int _count, l;
			int nr_read;
			bool final;

			_count = min3(count, len, PAGE_SIZE);
			nr_read = access_remote_vm(mm, p, page, _count, 0);
			if (nr_read < 0)
				rv = nr_read;
			if (nr_read <= 0)
				goto out_free_page;

			/* Find EOS. */
			final = false;
			l = strnlen(page, nr_read);
			if (l < nr_read) {
				nr_read = l;
				final = true;
			}

			if (copy_to_user(buf, page, nr_read)) {
				rv = -EFAULT;
				goto out_free_page;
			}

			p	+= nr_read;
			len	-= nr_read;
			buf	+= nr_read;
			count	-= nr_read;
			rv	+= nr_read;

			if (final)
				goto out_free_page;
		}
skip_argv_envp:
		;
	}

out_free_page:
	free_page((unsigned long)page);
out_mmput:
	mmput(mm);
	if (rv > 0)
		*pos += rv;
	return rv;
Linus Torvalds's avatar
Linus Torvalds committed
396
397
}

398
399
400
401
402
static const struct file_operations proc_pid_cmdline_ops = {
	.read	= proc_pid_cmdline_read,
	.llseek	= generic_file_llseek,
};

403
404
static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
			 struct pid *pid, struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
405
{
406
	struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
407
	if (mm && !IS_ERR(mm)) {
Linus Torvalds's avatar
Linus Torvalds committed
408
		unsigned int nwords = 0;
Hannes Eder's avatar
Hannes Eder committed
409
		do {
Linus Torvalds's avatar
Linus Torvalds committed
410
			nwords += 2;
Hannes Eder's avatar
Hannes Eder committed
411
		} while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
412
		seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0]));
Linus Torvalds's avatar
Linus Torvalds committed
413
		mmput(mm);
414
415
416
		return 0;
	} else
		return PTR_ERR(mm);
Linus Torvalds's avatar
Linus Torvalds committed
417
418
419
420
421
422
423
424
}


#ifdef CONFIG_KALLSYMS
/*
 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
 * Returns the resolved symbol.  If that fails, simply return the address.
 */
425
426
static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
			  struct pid *pid, struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
427
{
Alexey Dobriyan's avatar
Alexey Dobriyan committed
428
	unsigned long wchan;
429
	char symname[KSYM_NAME_LEN];
Linus Torvalds's avatar
Linus Torvalds committed
430
431
432

	wchan = get_wchan(task);

433
434
	if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)
			&& !lookup_symbol_name(wchan, symname))
435
		seq_printf(m, "%s", symname);
436
437
	else
		seq_putc(m, '0');
438
439

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
440
441
442
}
#endif /* CONFIG_KALLSYMS */

443
444
445
446
447
static int lock_trace(struct task_struct *task)
{
	int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
	if (err)
		return err;
448
	if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
449
450
451
452
453
454
455
456
457
458
459
		mutex_unlock(&task->signal->cred_guard_mutex);
		return -EPERM;
	}
	return 0;
}

static void unlock_trace(struct task_struct *task)
{
	mutex_unlock(&task->signal->cred_guard_mutex);
}

Ken Chen's avatar
Ken Chen committed
460
461
462
463
464
465
466
467
468
#ifdef CONFIG_STACKTRACE

#define MAX_STACK_TRACE_DEPTH	64

static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
			  struct pid *pid, struct task_struct *task)
{
	struct stack_trace trace;
	unsigned long *entries;
469
	int err;
Ken Chen's avatar
Ken Chen committed
470
471
472
473
474
475
476
477
478
479
480
	int i;

	entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
	if (!entries)
		return -ENOMEM;

	trace.nr_entries	= 0;
	trace.max_entries	= MAX_STACK_TRACE_DEPTH;
	trace.entries		= entries;
	trace.skip		= 0;

481
482
483
484
485
	err = lock_trace(task);
	if (!err) {
		save_stack_trace_tsk(task, &trace);

		for (i = 0; i < trace.nr_entries; i++) {
486
			seq_printf(m, "[<%pK>] %pS\n",
487
488
489
				   (void *)entries[i], (void *)entries[i]);
		}
		unlock_trace(task);
Ken Chen's avatar
Ken Chen committed
490
491
492
	}
	kfree(entries);

493
	return err;
Ken Chen's avatar
Ken Chen committed
494
495
496
}
#endif

497
#ifdef CONFIG_SCHED_INFO
Linus Torvalds's avatar
Linus Torvalds committed
498
499
500
/*
 * Provides /proc/PID/schedstat
 */
501
502
static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
			      struct pid *pid, struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
503
{
504
505
506
507
	if (unlikely(!sched_info_on()))
		seq_printf(m, "0 0 0\n");
	else
		seq_printf(m, "%llu %llu %lu\n",
508
509
510
511
512
		   (unsigned long long)task->se.sum_exec_runtime,
		   (unsigned long long)task->sched_info.run_delay,
		   task->sched_info.pcount);

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
513
514
515
}
#endif

Arjan van de Ven's avatar
Arjan van de Ven committed
516
517
518
519
#ifdef CONFIG_LATENCYTOP
static int lstats_show_proc(struct seq_file *m, void *v)
{
	int i;
520
521
	struct inode *inode = m->private;
	struct task_struct *task = get_proc_task(inode);
Arjan van de Ven's avatar
Arjan van de Ven committed
522

523
524
525
	if (!task)
		return -ESRCH;
	seq_puts(m, "Latency Top version : v0.1\n");
Arjan van de Ven's avatar
Arjan van de Ven committed
526
	for (i = 0; i < 32; i++) {
527
528
		struct latency_record *lr = &task->latency_record[i];
		if (lr->backtrace[0]) {
Arjan van de Ven's avatar
Arjan van de Ven committed
529
			int q;
530
531
			seq_printf(m, "%i %li %li",
				   lr->count, lr->time, lr->max);
Arjan van de Ven's avatar
Arjan van de Ven committed
532
			for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
533
534
				unsigned long bt = lr->backtrace[q];
				if (!bt)
Arjan van de Ven's avatar
Arjan van de Ven committed
535
					break;
536
				if (bt == ULONG_MAX)
Arjan van de Ven's avatar
Arjan van de Ven committed
537
					break;
538
				seq_printf(m, " %ps", (void *)bt);
Arjan van de Ven's avatar
Arjan van de Ven committed
539
			}
540
			seq_putc(m, '\n');
Arjan van de Ven's avatar
Arjan van de Ven committed
541
542
543
		}

	}
544
	put_task_struct(task);
Arjan van de Ven's avatar
Arjan van de Ven committed
545
546
547
548
549
	return 0;
}

static int lstats_open(struct inode *inode, struct file *file)
{
550
	return single_open(file, lstats_show_proc, inode);
551
552
}

Arjan van de Ven's avatar
Arjan van de Ven committed
553
554
555
static ssize_t lstats_write(struct file *file, const char __user *buf,
			    size_t count, loff_t *offs)
{
Al Viro's avatar
Al Viro committed
556
	struct task_struct *task = get_proc_task(file_inode(file));
Arjan van de Ven's avatar
Arjan van de Ven committed
557

558
559
	if (!task)
		return -ESRCH;
Arjan van de Ven's avatar
Arjan van de Ven committed
560
	clear_all_latency_tracing(task);
561
	put_task_struct(task);
Arjan van de Ven's avatar
Arjan van de Ven committed
562
563
564
565
566
567
568
569
570

	return count;
}

static const struct file_operations proc_lstats_operations = {
	.open		= lstats_open,
	.read		= seq_read,
	.write		= lstats_write,
	.llseek		= seq_lseek,
571
	.release	= single_release,
Arjan van de Ven's avatar
Arjan van de Ven committed
572
573
574
575
};

#endif

576
577
static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
			  struct pid *pid, struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
578
{
579
	unsigned long totalpages = totalram_pages + total_swap_pages;
580
	unsigned long points = 0;
Linus Torvalds's avatar
Linus Torvalds committed
581

582
	read_lock(&tasklist_lock);
583
	if (pid_alive(task))
584
585
		points = oom_badness(task, NULL, NULL, totalpages) *
						1000 / totalpages;
586
	read_unlock(&tasklist_lock);
587
588
589
	seq_printf(m, "%lu\n", points);

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
590
591
}

592
struct limit_names {
593
594
	const char *name;
	const char *unit;
595
596
597
};

static const struct limit_names lnames[RLIM_NLIMITS] = {
598
	[RLIMIT_CPU] = {"Max cpu time", "seconds"},
599
600
601
602
603
604
605
606
607
608
609
610
611
612
	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
	[RLIMIT_DATA] = {"Max data size", "bytes"},
	[RLIMIT_STACK] = {"Max stack size", "bytes"},
	[RLIMIT_CORE] = {"Max core file size", "bytes"},
	[RLIMIT_RSS] = {"Max resident set", "bytes"},
	[RLIMIT_NPROC] = {"Max processes", "processes"},
	[RLIMIT_NOFILE] = {"Max open files", "files"},
	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
	[RLIMIT_AS] = {"Max address space", "bytes"},
	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
	[RLIMIT_NICE] = {"Max nice priority", NULL},
	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
613
	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
614
615
616
};

/* Display limits for a process */
617
618
static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
			   struct pid *pid, struct task_struct *task)
619
620
621
622
623
624
{
	unsigned int i;
	unsigned long flags;

	struct rlimit rlim[RLIM_NLIMITS];

625
	if (!lock_task_sighand(task, &flags))
626
627
628
629
630
631
632
		return 0;
	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
	unlock_task_sighand(task, &flags);

	/*
	 * print the file header
	 */
633
       seq_printf(m, "%-25s %-20s %-20s %-10s\n",
634
		  "Limit", "Soft Limit", "Hard Limit", "Units");
635
636
637

	for (i = 0; i < RLIM_NLIMITS; i++) {
		if (rlim[i].rlim_cur == RLIM_INFINITY)
638
			seq_printf(m, "%-25s %-20s ",
639
				   lnames[i].name, "unlimited");
640
		else
641
			seq_printf(m, "%-25s %-20lu ",
642
				   lnames[i].name, rlim[i].rlim_cur);
643
644

		if (rlim[i].rlim_max == RLIM_INFINITY)
645
			seq_printf(m, "%-20s ", "unlimited");
646
		else
647
			seq_printf(m, "%-20lu ", rlim[i].rlim_max);
648
649

		if (lnames[i].unit)
650
			seq_printf(m, "%-10s\n", lnames[i].unit);
651
		else
652
			seq_putc(m, '\n');
653
654
	}

655
	return 0;
656
657
}

Roland McGrath's avatar
Roland McGrath committed
658
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
659
660
static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
			    struct pid *pid, struct task_struct *task)
Roland McGrath's avatar
Roland McGrath committed
661
662
663
{
	long nr;
	unsigned long args[6], sp, pc;
664
665
666
	int res;

	res = lock_trace(task);
667
668
	if (res)
		return res;
Roland McGrath's avatar
Roland McGrath committed
669
670

	if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
671
		seq_puts(m, "running\n");
672
	else if (nr < 0)
673
		seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
674
	else
675
		seq_printf(m,
Roland McGrath's avatar
Roland McGrath committed
676
677
678
679
		       "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
		       nr,
		       args[0], args[1], args[2], args[3], args[4], args[5],
		       sp, pc);
680
	unlock_trace(task);
681
682

	return 0;
Roland McGrath's avatar
Roland McGrath committed
683
684
685
}
#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */

Linus Torvalds's avatar
Linus Torvalds committed
686
687
688
689
690
/************************************************************************/
/*                       Here the fs part begins                        */
/************************************************************************/

/* permission checks */
691
static int proc_fd_access_allowed(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
692
{
693
694
	struct task_struct *task;
	int allowed = 0;
695
696
697
	/* Allow access to a task's file descriptors if it is us or we
	 * may use ptrace attach to the process and find out that
	 * information.
698
699
	 */
	task = get_proc_task(inode);
700
	if (task) {
701
		allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
702
		put_task_struct(task);
703
	}
704
	return allowed;
Linus Torvalds's avatar
Linus Torvalds committed
705
706
}

707
int proc_setattr(struct dentry *dentry, struct iattr *attr)
708
709
{
	int error;
710
	struct inode *inode = d_inode(dentry);
711
712
713
714
715

	if (attr->ia_valid & ATTR_MODE)
		return -EPERM;

	error = inode_change_ok(inode, attr);
Christoph Hellwig's avatar
Christoph Hellwig committed
716
717
718
719
720
721
	if (error)
		return error;

	setattr_copy(inode, attr);
	mark_inode_dirty(inode);
	return 0;
722
723
}

724
725
726
727
728
729
730
731
732
733
734
735
/*
 * May current process learn task's sched/cmdline info (for hide_pid_min=1)
 * or euid/egid (for hide_pid_min=2)?
 */
static bool has_pid_permissions(struct pid_namespace *pid,
				 struct task_struct *task,
				 int hide_pid_min)
{
	if (pid->hide_pid < hide_pid_min)
		return true;
	if (in_group_p(pid->pid_gid))
		return true;
736
	return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
737
738
739
740
741
742
743
744
745
746
}


static int proc_pid_permission(struct inode *inode, int mask)
{
	struct pid_namespace *pid = inode->i_sb->s_fs_info;
	struct task_struct *task;
	bool has_perms;

	task = get_proc_task(inode);
747
748
	if (!task)
		return -ESRCH;
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
	has_perms = has_pid_permissions(pid, task, 1);
	put_task_struct(task);

	if (!has_perms) {
		if (pid->hide_pid == 2) {
			/*
			 * Let's make getdents(), stat(), and open()
			 * consistent with each other.  If a process
			 * may not stat() a file, it shouldn't be seen
			 * in procfs at all.
			 */
			return -ENOENT;
		}

		return -EPERM;
	}
	return generic_permission(inode, mask);
}



770
static const struct inode_operations proc_def_inode_operations = {
771
772
773
	.setattr	= proc_setattr,
};

774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
static int proc_single_show(struct seq_file *m, void *v)
{
	struct inode *inode = m->private;
	struct pid_namespace *ns;
	struct pid *pid;
	struct task_struct *task;
	int ret;

	ns = inode->i_sb->s_fs_info;
	pid = proc_pid(inode);
	task = get_pid_task(pid, PIDTYPE_PID);
	if (!task)
		return -ESRCH;

	ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);

	put_task_struct(task);
	return ret;
}

static int proc_single_open(struct inode *inode, struct file *filp)
{
Jovi Zhang's avatar
Jovi Zhang committed
796
	return single_open(filp, proc_single_show, inode);
797
798
799
800
801
802
803
804
805
}

static const struct file_operations proc_single_file_operations = {
	.open		= proc_single_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

806
807

struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
Linus Torvalds's avatar
Linus Torvalds committed
808
{
809
810
	struct task_struct *task = get_proc_task(inode);
	struct mm_struct *mm = ERR_PTR(-ESRCH);
811

812
	if (task) {
813
		mm = mm_access(task, mode | PTRACE_MODE_FSCREDS);
814
		put_task_struct(task);
815

816
817
818
819
820
821
822
823
824
825
826
827
828
829
		if (!IS_ERR_OR_NULL(mm)) {
			/* ensure this mm_struct can't be freed */
			atomic_inc(&mm->mm_count);
			/* but do not pin its memory */
			mmput(mm);
		}
	}

	return mm;
}

static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
{
	struct mm_struct *mm = proc_mem_open(inode, mode);
830
831
832
833
834

	if (IS_ERR(mm))
		return PTR_ERR(mm);

	file->private_data = mm;
Linus Torvalds's avatar
Linus Torvalds committed
835
836
837
	return 0;
}

838
839
static int mem_open(struct inode *inode, struct file *file)
{
840
841
842
843
844
845
	int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);

	/* OK to pass negative loff_t, we can catch out-of-range */
	file->f_mode |= FMODE_UNSIGNED_OFFSET;

	return ret;
846
847
}

848
849
static ssize_t mem_rw(struct file *file, char __user *buf,
			size_t count, loff_t *ppos, int write)
Linus Torvalds's avatar
Linus Torvalds committed
850
{
851
	struct mm_struct *mm = file->private_data;
852
853
	unsigned long addr = *ppos;
	ssize_t copied;
Linus Torvalds's avatar
Linus Torvalds committed
854
855
	char *page;

856
857
	if (!mm)
		return 0;
858

859
860
	page = (char *)__get_free_page(GFP_TEMPORARY);
	if (!page)
861
		return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
862

863
	copied = 0;
864
865
866
	if (!atomic_inc_not_zero(&mm->mm_users))
		goto free;

Linus Torvalds's avatar
Linus Torvalds committed
867
	while (count > 0) {
868
		int this_len = min_t(int, count, PAGE_SIZE);
Linus Torvalds's avatar
Linus Torvalds committed
869

870
		if (write && copy_from_user(page, buf, this_len)) {
Linus Torvalds's avatar
Linus Torvalds committed
871
872
873
			copied = -EFAULT;
			break;
		}
874
875
876

		this_len = access_remote_vm(mm, addr, page, this_len, write);
		if (!this_len) {
Linus Torvalds's avatar
Linus Torvalds committed
877
878
879
880
			if (!copied)
				copied = -EIO;
			break;
		}
881
882
883
884
885
886
887
888
889
890

		if (!write && copy_to_user(buf, page, this_len)) {
			copied = -EFAULT;
			break;
		}

		buf += this_len;
		addr += this_len;
		copied += this_len;
		count -= this_len;
Linus Torvalds's avatar
Linus Torvalds committed
891
	}
892
	*ppos = addr;
893

894
895
	mmput(mm);
free:
896
	free_page((unsigned long) page);
Linus Torvalds's avatar
Linus Torvalds committed
897
898
899
	return copied;
}

900
901
902
903
904
905
906
907
908
909
910
911
static ssize_t mem_read(struct file *file, char __user *buf,
			size_t count, loff_t *ppos)
{
	return mem_rw(file, buf, count, ppos, 0);
}

static ssize_t mem_write(struct file *file, const char __user *buf,
			 size_t count, loff_t *ppos)
{
	return mem_rw(file, (char __user*)buf, count, ppos, 1);
}

912
loff_t mem_lseek(struct file *file, loff_t offset, int orig)
Linus Torvalds's avatar
Linus Torvalds committed
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
{
	switch (orig) {
	case 0:
		file->f_pos = offset;
		break;
	case 1:
		file->f_pos += offset;
		break;
	default:
		return -EINVAL;
	}
	force_successful_syscall_return();
	return file->f_pos;
}

928
929
930
static int mem_release(struct inode *inode, struct file *file)
{
	struct mm_struct *mm = file->private_data;
931
	if (mm)
932
		mmdrop(mm);
933
934
935
	return 0;
}

936
static const struct file_operations proc_mem_operations = {
Linus Torvalds's avatar
Linus Torvalds committed
937
938
939
940
	.llseek		= mem_lseek,
	.read		= mem_read,
	.write		= mem_write,
	.open		= mem_open,
941
	.release	= mem_release,
Linus Torvalds's avatar
Linus Torvalds committed
942
943
};

944
945
946
947
948
static int environ_open(struct inode *inode, struct file *file)
{
	return __mem_open(inode, file, PTRACE_MODE_READ);
}

949
950
951
952
953
static ssize_t environ_read(struct file *file, char __user *buf,
			size_t count, loff_t *ppos)
{
	char *page;
	unsigned long src = *ppos;
954
955
	int ret = 0;
	struct mm_struct *mm = file->private_data;
956
	unsigned long env_start, env_end;
957

958
959
	if (!mm)
		return 0;
960
961
962

	page = (char *)__get_free_page(GFP_TEMPORARY);
	if (!page)
963
		return -ENOMEM;
964

Al Viro's avatar
Al Viro committed
965
	ret = 0;
966
967
	if (!atomic_inc_not_zero(&mm->mm_users))
		goto free;
968
969
970
971
972
973

	down_read(&mm->mmap_sem);
	env_start = mm->env_start;
	env_end = mm->env_end;
	up_read(&mm->mmap_sem);

974
	while (count > 0) {
975
976
		size_t this_len, max_len;
		int retval;
977

978
		if (src >= (env_end - env_start))
979
980
			break;

981
		this_len = env_end - (env_start + src);
982
983
984

		max_len = min_t(size_t, PAGE_SIZE, count);
		this_len = min(max_len, this_len);
985

986
		retval = access_remote_vm(mm, (env_start + src),
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
			page, this_len, 0);

		if (retval <= 0) {
			ret = retval;
			break;
		}

		if (copy_to_user(buf, page, retval)) {
			ret = -EFAULT;
			break;
		}

		ret += retval;
		src += retval;
For faster browsing, not all history is shown. View entire blame