kprobes.c 21.7 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/*
 *  Kernel Probes (KProbes)
 *  arch/i386/kernel/kprobes.c
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) IBM Corporation, 2002, 2004
 *
 * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
 *		Probes initial implementation ( includes contributions from
 *		Rusty Russell).
 * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
 *		interface to access function arguments.
26
27
28
 * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
 *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
 *		<prasanna@in.ibm.com> added function-return probes.
Linus Torvalds's avatar
Linus Torvalds committed
29
30
31
32
33
 */

#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
34
#include <linux/kdebug.h>
35
#include <asm/cacheflush.h>
Linus Torvalds's avatar
Linus Torvalds committed
36
#include <asm/desc.h>
37
#include <asm/uaccess.h>
Linus Torvalds's avatar
Linus Torvalds committed
38
39
40

void jprobe_return_end(void);

41
42
43
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);

44
/* insert a jmp code */
45
static __always_inline void set_jmp_op(void *from, void *to)
46
47
48
49
50
51
52
53
54
55
56
57
58
{
	struct __arch_jmp_op {
		char op;
		long raddr;
	} __attribute__((packed)) *jop;
	jop = (struct __arch_jmp_op *)from;
	jop->raddr = (long)(to) - ((long)(from) + 5);
	jop->op = RELATIVEJUMP_INSTRUCTION;
}

/*
 * returns non-zero if opcodes can be boosted.
 */
59
static __always_inline int can_boost(kprobe_opcode_t *opcodes)
60
{
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf)		      \
	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
	 << (row % 32))
	/*
	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
	 * Groups, and some special opcodes can not be boost.
	 */
	static const unsigned long twobyte_is_boostable[256 / 32] = {
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
		/*      -------------------------------         */
		W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
		W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
		W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
		W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
		W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
		W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
		W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
		W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
		W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
		W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
		W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
		W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
		W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
		W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
		W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
		W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0)  /* f0 */
		/*      -------------------------------         */
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
	};
#undef W
	kprobe_opcode_t opcode;
	kprobe_opcode_t *orig_opcodes = opcodes;
retry:
	if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
		return 0;
	opcode = *(opcodes++);

	/* 2nd-byte opcode */
	if (opcode == 0x0f) {
		if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
			return 0;
		return test_bit(*opcodes, twobyte_is_boostable);
	}

	switch (opcode & 0xf0) {
	case 0x60:
		if (0x63 < opcode && opcode < 0x67)
			goto retry; /* prefixes */
		/* can't boost Address-size override and bound */
		return (opcode != 0x62 && opcode != 0x67);
114
115
116
	case 0x70:
		return 0; /* can't boost conditional jump */
	case 0xc0:
117
118
		/* can't boost software-interruptions */
		return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
119
120
121
122
	case 0xd0:
		/* can boost AA* and XLAT */
		return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
	case 0xe0:
123
124
		/* can boost in/out and absolute jmps */
		return ((opcode & 0x04) || opcode == 0xea);
125
	case 0xf0:
126
127
		if ((opcode & 0x0c) == 0 && opcode != 0xf1)
			goto retry; /* lock/rep(ne) prefix */
128
129
130
		/* clear and set flags can be boost */
		return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
	default:
131
132
133
134
		if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
			goto retry; /* prefixes */
		/* can't boost CS override and call */
		return (opcode != 0x2e && opcode != 0x9a);
135
136
137
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
138
139
140
/*
 * returns non-zero if opcode modifies the interrupt flag.
 */
141
static int __kprobes is_IF_modifier(kprobe_opcode_t opcode)
Linus Torvalds's avatar
Linus Torvalds committed
142
143
144
145
146
147
148
149
150
151
152
{
	switch (opcode) {
	case 0xfa:		/* cli */
	case 0xfb:		/* sti */
	case 0xcf:		/* iret/iretd */
	case 0x9d:		/* popf/popfd */
		return 1;
	}
	return 0;
}

153
int __kprobes arch_prepare_kprobe(struct kprobe *p)
Linus Torvalds's avatar
Linus Torvalds committed
154
{
155
156
157
158
159
	/* insn: must be on special executable page on i386. */
	p->ainsn.insn = get_insn_slot();
	if (!p->ainsn.insn)
		return -ENOMEM;

Linus Torvalds's avatar
Linus Torvalds committed
160
	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
161
	p->opcode = *p->addr;
162
	if (can_boost(p->addr)) {
163
164
165
166
		p->ainsn.boostable = 0;
	} else {
		p->ainsn.boostable = -1;
	}
167
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
168
169
}

170
void __kprobes arch_arm_kprobe(struct kprobe *p)
Linus Torvalds's avatar
Linus Torvalds committed
171
{
172
173
174
	*p->addr = BREAKPOINT_INSTRUCTION;
	flush_icache_range((unsigned long) p->addr,
			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
Linus Torvalds's avatar
Linus Torvalds committed
175
176
}

177
void __kprobes arch_disarm_kprobe(struct kprobe *p)
Linus Torvalds's avatar
Linus Torvalds committed
178
179
{
	*p->addr = p->opcode;
180
181
182
183
	flush_icache_range((unsigned long) p->addr,
			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
}

184
185
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
Ingo Molnar's avatar
Ingo Molnar committed
186
	mutex_lock(&kprobe_mutex);
187
	free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
Ingo Molnar's avatar
Ingo Molnar committed
188
	mutex_unlock(&kprobe_mutex);
189
190
}

191
static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
192
{
193
194
195
196
	kcb->prev_kprobe.kp = kprobe_running();
	kcb->prev_kprobe.status = kcb->kprobe_status;
	kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags;
	kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags;
197
198
}

199
static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
200
{
201
202
203
204
	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
	kcb->kprobe_status = kcb->prev_kprobe.status;
	kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags;
	kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags;
205
206
}

207
static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
208
				struct kprobe_ctlblk *kcb)
209
{
210
211
	__get_cpu_var(current_kprobe) = p;
	kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
212
213
		= (regs->eflags & (TF_MASK | IF_MASK));
	if (is_IF_modifier(p->opcode))
214
		kcb->kprobe_saved_eflags &= ~IF_MASK;
215
216
}

217
static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
218
219
220
221
222
223
224
{
	regs->eflags |= TF_MASK;
	regs->eflags &= ~IF_MASK;
	/*single step inline if the instruction is an int3*/
	if (p->opcode == BREAKPOINT_INSTRUCTION)
		regs->eip = (unsigned long)p->addr;
	else
225
		regs->eip = (unsigned long)p->ainsn.insn;
Linus Torvalds's avatar
Linus Torvalds committed
226
227
}

228
/* Called with kretprobe_lock held */
229
230
void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
				      struct pt_regs *regs)
231
232
{
	unsigned long *sara = (unsigned long *)&regs->esp;
233

bibo,mao's avatar
bibo,mao committed
234
235
236
237
238
	struct kretprobe_instance *ri;

	if ((ri = get_free_rp_inst(rp)) != NULL) {
		ri->rp = rp;
		ri->task = current;
239
		ri->ret_addr = (kprobe_opcode_t *) *sara;
240
241
242

		/* Replace the return addr with trampoline addr */
		*sara = (unsigned long) &kretprobe_trampoline;
bibo,mao's avatar
bibo,mao committed
243
244
245
246
		add_rp_inst(ri);
	} else {
		rp->nmissed++;
	}
247
248
}

Linus Torvalds's avatar
Linus Torvalds committed
249
250
251
252
/*
 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
 * remain disabled thorough out this function.
 */
253
static int __kprobes kprobe_handler(struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
254
255
256
{
	struct kprobe *p;
	int ret = 0;
257
	kprobe_opcode_t *addr;
258
259
	struct kprobe_ctlblk *kcb;

260
261
	addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));

262
263
264
265
266
267
	/*
	 * We don't want to be preempted for the entire
	 * duration of kprobe processing
	 */
	preempt_disable();
	kcb = get_kprobe_ctlblk();
Linus Torvalds's avatar
Linus Torvalds committed
268
269
270
271
272

	/* Check we're not actually recursing */
	if (kprobe_running()) {
		p = get_kprobe(addr);
		if (p) {
273
			if (kcb->kprobe_status == KPROBE_HIT_SS &&
274
				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
Linus Torvalds's avatar
Linus Torvalds committed
275
				regs->eflags &= ~TF_MASK;
276
				regs->eflags |= kcb->kprobe_saved_eflags;
Linus Torvalds's avatar
Linus Torvalds committed
277
278
				goto no_kprobe;
			}
279
280
281
282
283
284
			/* We have reentered the kprobe_handler(), since
			 * another probe was hit while within the handler.
			 * We here save the original kprobes variables and
			 * just single step on the instruction of the new probe
			 * without calling any user handlers.
			 */
285
286
			save_previous_kprobe(kcb);
			set_current_kprobe(p, regs, kcb);
287
			kprobes_inc_nmissed_count(p);
288
			prepare_singlestep(p, regs);
289
			kcb->kprobe_status = KPROBE_REENTER;
290
			return 1;
Linus Torvalds's avatar
Linus Torvalds committed
291
		} else {
292
293
294
295
296
297
298
299
300
			if (*addr != BREAKPOINT_INSTRUCTION) {
			/* The breakpoint instruction was removed by
			 * another cpu right after we hit, no further
			 * handling of this interrupt is appropriate
			 */
				regs->eip -= sizeof(kprobe_opcode_t);
				ret = 1;
				goto no_kprobe;
			}
301
			p = __get_cpu_var(current_kprobe);
Linus Torvalds's avatar
Linus Torvalds committed
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
			if (p->break_handler && p->break_handler(p, regs)) {
				goto ss_probe;
			}
		}
		goto no_kprobe;
	}

	p = get_kprobe(addr);
	if (!p) {
		if (*addr != BREAKPOINT_INSTRUCTION) {
			/*
			 * The breakpoint instruction was removed right
			 * after we hit it.  Another cpu has removed
			 * either a probepoint or a debugger breakpoint
			 * at this address.  In either case, no further
			 * handling of this interrupt is appropriate.
318
319
			 * Back up over the (now missing) int3 and run
			 * the original instruction.
Linus Torvalds's avatar
Linus Torvalds committed
320
			 */
321
			regs->eip -= sizeof(kprobe_opcode_t);
Linus Torvalds's avatar
Linus Torvalds committed
322
323
324
325
326
327
			ret = 1;
		}
		/* Not one of ours: let kernel handle it */
		goto no_kprobe;
	}

328
329
	set_current_kprobe(p, regs, kcb);
	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
Linus Torvalds's avatar
Linus Torvalds committed
330
331
332
333
334

	if (p->pre_handler && p->pre_handler(p, regs))
		/* handler has already set things up, so skip ss setup */
		return 1;

335
ss_probe:
336
#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
337
	if (p->ainsn.boostable == 1 && !p->post_handler){
338
339
340
341
342
343
		/* Boost up -- we can execute copied instructions directly */
		reset_current_kprobe();
		regs->eip = (unsigned long)p->ainsn.insn;
		preempt_enable_no_resched();
		return 1;
	}
344
#endif
Linus Torvalds's avatar
Linus Torvalds committed
345
	prepare_singlestep(p, regs);
346
	kcb->kprobe_status = KPROBE_HIT_SS;
Linus Torvalds's avatar
Linus Torvalds committed
347
348
349
	return 1;

no_kprobe:
350
	preempt_enable_no_resched();
Linus Torvalds's avatar
Linus Torvalds committed
351
352
353
	return ret;
}

354
355
356
357
358
/*
 * For function-return probes, init_kprobes() establishes a probepoint
 * here. When a retprobed function returns, this probe is hit and
 * trampoline_probe_handler() runs, calling the kretprobe's handler.
 */
359
 void __kprobes kretprobe_trampoline_holder(void)
360
 {
361
	asm volatile ( ".global kretprobe_trampoline\n"
bibo,mao's avatar
bibo,mao committed
362
			"kretprobe_trampoline: \n"
363
			"	pushf\n"
364
365
			/* skip cs, eip, orig_eax */
			"	subl $12, %esp\n"
366
			"	pushl %fs\n"
367
368
			"	pushl %ds\n"
			"	pushl %es\n"
369
370
371
372
373
374
375
376
377
378
			"	pushl %eax\n"
			"	pushl %ebp\n"
			"	pushl %edi\n"
			"	pushl %esi\n"
			"	pushl %edx\n"
			"	pushl %ecx\n"
			"	pushl %ebx\n"
			"	movl %esp, %eax\n"
			"	call trampoline_handler\n"
			/* move eflags to cs */
379
380
			"	movl 52(%esp), %edx\n"
			"	movl %edx, 48(%esp)\n"
381
			/* save true return address on eflags */
382
			"	movl %eax, 52(%esp)\n"
383
384
385
386
387
388
389
			"	popl %ebx\n"
			"	popl %ecx\n"
			"	popl %edx\n"
			"	popl %esi\n"
			"	popl %edi\n"
			"	popl %ebp\n"
			"	popl %eax\n"
390
			/* skip eip, orig_eax, es, ds, fs */
391
			"	addl $20, %esp\n"
392
393
394
			"	popf\n"
			"	ret\n");
}
395
396

/*
397
 * Called from kretprobe_trampoline
398
 */
399
fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
400
{
bibo,mao's avatar
bibo,mao committed
401
	struct kretprobe_instance *ri = NULL;
402
	struct hlist_head *head, empty_rp;
bibo,mao's avatar
bibo,mao committed
403
	struct hlist_node *node, *tmp;
404
	unsigned long flags, orig_ret_address = 0;
405
	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
406

407
	INIT_HLIST_HEAD(&empty_rp);
408
	spin_lock_irqsave(&kretprobe_lock, flags);
bibo,mao's avatar
bibo,mao committed
409
	head = kretprobe_inst_table_head(current);
410
	/* fixup registers */
Zachary Amsden's avatar
Zachary Amsden committed
411
	regs->xcs = __KERNEL_CS | get_kernel_rpl();
412
413
	regs->eip = trampoline_address;
	regs->orig_eax = 0xffffffff;
414

415
416
417
418
419
420
421
422
423
	/*
	 * It is possible to have multiple instances associated with a given
	 * task either because an multiple functions in the call path
	 * have a return probe installed on them, and/or more then one return
	 * return probe was registered for a target function.
	 *
	 * We can handle this because:
	 *     - instances are always inserted at the head of the list
	 *     - when multiple return probes are registered for the same
bibo,mao's avatar
bibo,mao committed
424
	 *       function, the first instance's ret_addr will point to the
425
426
427
428
	 *       real return address, and all the rest will point to
	 *       kretprobe_trampoline
	 */
	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
bibo,mao's avatar
bibo,mao committed
429
		if (ri->task != current)
430
			/* another task is sharing our hash bucket */
bibo,mao's avatar
bibo,mao committed
431
			continue;
432

433
434
		if (ri->rp && ri->rp->handler){
			__get_cpu_var(current_kprobe) = &ri->rp->kp;
435
			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
436
			ri->rp->handler(ri, regs);
437
438
			__get_cpu_var(current_kprobe) = NULL;
		}
439
440

		orig_ret_address = (unsigned long)ri->ret_addr;
441
		recycle_rp_inst(ri, &empty_rp);
442
443
444
445
446
447
448
449

		if (orig_ret_address != trampoline_address)
			/*
			 * This is the real return address. Any other
			 * instances associated with this task are for
			 * other calls deeper on the call stack
			 */
			break;
450
	}
451
452
453

	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));

454
	spin_unlock_irqrestore(&kretprobe_lock, flags);
455

456
457
458
459
	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
		hlist_del(&ri->hlist);
		kfree(ri);
	}
460
	return (void*)orig_ret_address;
461
462
}

Linus Torvalds's avatar
Linus Torvalds committed
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
/*
 * Called after single-stepping.  p->addr is the address of the
 * instruction whose first byte has been replaced by the "int 3"
 * instruction.  To avoid the SMP problems that can occur when we
 * temporarily put back the original opcode to single-step, we
 * single-stepped a copy of the instruction.  The address of this
 * copy is p->ainsn.insn.
 *
 * This function prepares to return from the post-single-step
 * interrupt.  We have to fix up the stack as follows:
 *
 * 0) Except in the case of absolute or indirect jump or call instructions,
 * the new eip is relative to the copied instruction.  We need to make
 * it relative to the original instruction.
 *
 * 1) If the single-stepped instruction was pushfl, then the TF and IF
 * flags are set in the just-pushed eflags, and may need to be cleared.
 *
 * 2) If the single-stepped instruction was a call, the return address
 * that is atop the stack is the address following the copied instruction.
 * We need to make it the address following the original instruction.
484
485
 *
 * This function also checks instruction size for preparing direct execution.
Linus Torvalds's avatar
Linus Torvalds committed
486
 */
487
488
static void __kprobes resume_execution(struct kprobe *p,
		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
Linus Torvalds's avatar
Linus Torvalds committed
489
490
{
	unsigned long *tos = (unsigned long *)&regs->esp;
491
	unsigned long copy_eip = (unsigned long)p->ainsn.insn;
Linus Torvalds's avatar
Linus Torvalds committed
492
493
	unsigned long orig_eip = (unsigned long)p->addr;

494
	regs->eflags &= ~TF_MASK;
Linus Torvalds's avatar
Linus Torvalds committed
495
496
497
	switch (p->ainsn.insn[0]) {
	case 0x9c:		/* pushfl */
		*tos &= ~(TF_MASK | IF_MASK);
498
		*tos |= kcb->kprobe_old_eflags;
Linus Torvalds's avatar
Linus Torvalds committed
499
		break;
500
501
	case 0xc2:		/* iret/ret/lret */
	case 0xc3:
502
	case 0xca:
503
504
	case 0xcb:
	case 0xcf:
505
506
	case 0xea:		/* jmp absolute -- eip is correct */
		/* eip is already adjusted, no more changes required */
507
		p->ainsn.boostable = 1;
508
		goto no_change;
Linus Torvalds's avatar
Linus Torvalds committed
509
510
511
	case 0xe8:		/* call relative - Fix return addr */
		*tos = orig_eip + (*tos - copy_eip);
		break;
512
513
514
	case 0x9a:		/* call absolute -- same as call absolute, indirect */
		*tos = orig_eip + (*tos - copy_eip);
		goto no_change;
Linus Torvalds's avatar
Linus Torvalds committed
515
516
	case 0xff:
		if ((p->ainsn.insn[1] & 0x30) == 0x10) {
517
			/*
518
			 * call absolute, indirect
519
520
521
			 * Fix return addr; eip is correct.
			 * But this is not boostable
			 */
Linus Torvalds's avatar
Linus Torvalds committed
522
			*tos = orig_eip + (*tos - copy_eip);
523
			goto no_change;
Linus Torvalds's avatar
Linus Torvalds committed
524
525
		} else if (((p->ainsn.insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
			   ((p->ainsn.insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
526
527
			/* eip is correct. And this is boostable */
			p->ainsn.boostable = 1;
528
			goto no_change;
Linus Torvalds's avatar
Linus Torvalds committed
529
530
531
532
533
		}
	default:
		break;
	}

534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
	if (p->ainsn.boostable == 0) {
		if ((regs->eip > copy_eip) &&
		    (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) {
			/*
			 * These instructions can be executed directly if it
			 * jumps back to correct address.
			 */
			set_jmp_op((void *)regs->eip,
				   (void *)orig_eip + (regs->eip - copy_eip));
			p->ainsn.boostable = 1;
		} else {
			p->ainsn.boostable = -1;
		}
	}

549
550
551
552
	regs->eip = orig_eip + (regs->eip - copy_eip);

no_change:
	return;
Linus Torvalds's avatar
Linus Torvalds committed
553
554
555
556
}

/*
 * Interrupts are disabled on entry as trap1 is an interrupt gate and they
557
 * remain disabled thoroughout this function.
Linus Torvalds's avatar
Linus Torvalds committed
558
 */
559
static int __kprobes post_kprobe_handler(struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
560
{
561
562
563
564
	struct kprobe *cur = kprobe_running();
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

	if (!cur)
Linus Torvalds's avatar
Linus Torvalds committed
565
566
		return 0;

567
568
569
	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
		kcb->kprobe_status = KPROBE_HIT_SSDONE;
		cur->post_handler(cur, regs, 0);
570
	}
Linus Torvalds's avatar
Linus Torvalds committed
571

572
573
	resume_execution(cur, regs, kcb);
	regs->eflags |= kcb->kprobe_saved_eflags;
Linus Torvalds's avatar
Linus Torvalds committed
574

575
	/*Restore back the original saved kprobes variables and continue. */
576
577
	if (kcb->kprobe_status == KPROBE_REENTER) {
		restore_previous_kprobe(kcb);
578
579
		goto out;
	}
580
	reset_current_kprobe();
581
out:
Linus Torvalds's avatar
Linus Torvalds committed
582
583
584
585
586
587
588
589
590
591
592
593
594
	preempt_enable_no_resched();

	/*
	 * if somebody else is singlestepping across a probe point, eflags
	 * will have TF set, in which case, continue the remaining processing
	 * of do_debug, as if this is not a probe hit.
	 */
	if (regs->eflags & TF_MASK)
		return 0;

	return 1;
}

595
static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
Linus Torvalds's avatar
Linus Torvalds committed
596
{
597
598
599
	struct kprobe *cur = kprobe_running();
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

600
601
602
603
604
605
606
607
608
609
610
	switch(kcb->kprobe_status) {
	case KPROBE_HIT_SS:
	case KPROBE_REENTER:
		/*
		 * We are here because the instruction being single
		 * stepped caused a page fault. We reset the current
		 * kprobe and the eip points back to the probe address
		 * and allow the page fault handler to continue as a
		 * normal page fault.
		 */
		regs->eip = (unsigned long)cur->addr;
611
		regs->eflags |= kcb->kprobe_old_eflags;
612
613
614
615
		if (kcb->kprobe_status == KPROBE_REENTER)
			restore_previous_kprobe(kcb);
		else
			reset_current_kprobe();
Linus Torvalds's avatar
Linus Torvalds committed
616
		preempt_enable_no_resched();
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
		break;
	case KPROBE_HIT_ACTIVE:
	case KPROBE_HIT_SSDONE:
		/*
		 * We increment the nmissed count for accounting,
		 * we can also use npre/npostfault count for accouting
		 * these specific fault cases.
		 */
		kprobes_inc_nmissed_count(cur);

		/*
		 * We come here because instructions in the pre/post
		 * handler caused the page_fault, this could happen
		 * if handler tries to access user space by
		 * copy_from_user(), get_user() etc. Let the
		 * user-specified handler try to fix it first.
		 */
		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
			return 1;

		/*
		 * In case the user-specified fault handler returned
		 * zero, try to fix up.
		 */
		if (fixup_exception(regs))
			return 1;

		/*
		 * fixup_exception() could not handle it,
		 * Let do_page_fault() fix it.
		 */
		break;
	default:
		break;
Linus Torvalds's avatar
Linus Torvalds committed
651
652
653
654
655
656
657
	}
	return 0;
}

/*
 * Wrapper routine to for handling exceptions.
 */
658
659
int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
				       unsigned long val, void *data)
Linus Torvalds's avatar
Linus Torvalds committed
660
661
{
	struct die_args *args = (struct die_args *)data;
662
663
	int ret = NOTIFY_DONE;

664
	if (args->regs && user_mode_vm(args->regs))
665
666
		return ret;

Linus Torvalds's avatar
Linus Torvalds committed
667
668
669
	switch (val) {
	case DIE_INT3:
		if (kprobe_handler(args->regs))
670
			ret = NOTIFY_STOP;
Linus Torvalds's avatar
Linus Torvalds committed
671
672
673
		break;
	case DIE_DEBUG:
		if (post_kprobe_handler(args->regs))
674
			ret = NOTIFY_STOP;
Linus Torvalds's avatar
Linus Torvalds committed
675
676
677
		break;
	case DIE_GPF:
	case DIE_PAGE_FAULT:
678
679
		/* kprobe_running() needs smp_processor_id() */
		preempt_disable();
Linus Torvalds's avatar
Linus Torvalds committed
680
681
		if (kprobe_running() &&
		    kprobe_fault_handler(args->regs, args->trapnr))
682
			ret = NOTIFY_STOP;
683
		preempt_enable();
Linus Torvalds's avatar
Linus Torvalds committed
684
685
686
687
		break;
	default:
		break;
	}
688
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
689
690
}

691
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
692
693
694
{
	struct jprobe *jp = container_of(p, struct jprobe, kp);
	unsigned long addr;
695
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
Linus Torvalds's avatar
Linus Torvalds committed
696

697
698
699
	kcb->jprobe_saved_regs = *regs;
	kcb->jprobe_saved_esp = &regs->esp;
	addr = (unsigned long)(kcb->jprobe_saved_esp);
Linus Torvalds's avatar
Linus Torvalds committed
700
701
702
703
704
705
706
707

	/*
	 * TBD: As Linus pointed out, gcc assumes that the callee
	 * owns the argument space and could overwrite it, e.g.
	 * tailcall optimization. So, to be absolutely safe
	 * we also save and restore enough stack bytes to cover
	 * the argument area.
	 */
708
709
	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
			MIN_STACK_SIZE(addr));
Linus Torvalds's avatar
Linus Torvalds committed
710
711
712
713
714
	regs->eflags &= ~IF_MASK;
	regs->eip = (unsigned long)(jp->entry);
	return 1;
}

715
void __kprobes jprobe_return(void)
Linus Torvalds's avatar
Linus Torvalds committed
716
{
717
718
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

Linus Torvalds's avatar
Linus Torvalds committed
719
720
721
722
723
	asm volatile ("       xchgl   %%ebx,%%esp     \n"
		      "       int3			\n"
		      "       .globl jprobe_return_end	\n"
		      "       jprobe_return_end:	\n"
		      "       nop			\n"::"b"
724
		      (kcb->jprobe_saved_esp):"memory");
Linus Torvalds's avatar
Linus Torvalds committed
725
726
}

727
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
728
{
729
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
Linus Torvalds's avatar
Linus Torvalds committed
730
	u8 *addr = (u8 *) (regs->eip - 1);
731
	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
Linus Torvalds's avatar
Linus Torvalds committed
732
733
734
	struct jprobe *jp = container_of(p, struct jprobe, kp);

	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
735
		if (&regs->esp != kcb->jprobe_saved_esp) {
Linus Torvalds's avatar
Linus Torvalds committed
736
			struct pt_regs *saved_regs =
737
738
			    container_of(kcb->jprobe_saved_esp,
					    struct pt_regs, esp);
Linus Torvalds's avatar
Linus Torvalds committed
739
			printk("current esp %p does not match saved esp %p\n",
740
			       &regs->esp, kcb->jprobe_saved_esp);
Linus Torvalds's avatar
Linus Torvalds committed
741
742
743
744
745
746
			printk("Saved registers for jprobe %p\n", jp);
			show_registers(saved_regs);
			printk("Current registers\n");
			show_registers(regs);
			BUG();
		}
747
748
		*regs = kcb->jprobe_saved_regs;
		memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
Linus Torvalds's avatar
Linus Torvalds committed
749
		       MIN_STACK_SIZE(stack_addr));
750
		preempt_enable_no_resched();
Linus Torvalds's avatar
Linus Torvalds committed
751
752
753
754
		return 1;
	}
	return 0;
}
755

756
int __init arch_init_kprobes(void)
757
{
758
	return 0;
759
}