xref: /linux/arch/x86/kernel/ptrace.c (revision 005438a8eef063495ac059d128eea71b58de50e5)
1 /* By Ross Biro 1/23/92 */
2 /*
3  * Pentium III FXSR, SSE support
4  *	Gareth Hughes <gareth@valinux.com>, May 2000
5  */
6 
7 #include <linux/kernel.h>
8 #include <linux/sched.h>
9 #include <linux/mm.h>
10 #include <linux/smp.h>
11 #include <linux/errno.h>
12 #include <linux/slab.h>
13 #include <linux/ptrace.h>
14 #include <linux/tracehook.h>
15 #include <linux/user.h>
16 #include <linux/elf.h>
17 #include <linux/security.h>
18 #include <linux/audit.h>
19 #include <linux/seccomp.h>
20 #include <linux/signal.h>
21 #include <linux/perf_event.h>
22 #include <linux/hw_breakpoint.h>
23 #include <linux/rcupdate.h>
24 #include <linux/export.h>
25 #include <linux/context_tracking.h>
26 
27 #include <asm/uaccess.h>
28 #include <asm/pgtable.h>
29 #include <asm/processor.h>
30 #include <asm/fpu/internal.h>
31 #include <asm/fpu/signal.h>
32 #include <asm/fpu/regset.h>
33 #include <asm/debugreg.h>
34 #include <asm/ldt.h>
35 #include <asm/desc.h>
36 #include <asm/prctl.h>
37 #include <asm/proto.h>
38 #include <asm/hw_breakpoint.h>
39 #include <asm/traps.h>
40 
41 #include "tls.h"
42 
43 #define CREATE_TRACE_POINTS
44 #include <trace/events/syscalls.h>
45 
46 enum x86_regset {
47 	REGSET_GENERAL,
48 	REGSET_FP,
49 	REGSET_XFP,
50 	REGSET_IOPERM64 = REGSET_XFP,
51 	REGSET_XSTATE,
52 	REGSET_TLS,
53 	REGSET_IOPERM32,
54 };
55 
56 struct pt_regs_offset {
57 	const char *name;
58 	int offset;
59 };
60 
61 #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
62 #define REG_OFFSET_END {.name = NULL, .offset = 0}
63 
64 static const struct pt_regs_offset regoffset_table[] = {
65 #ifdef CONFIG_X86_64
66 	REG_OFFSET_NAME(r15),
67 	REG_OFFSET_NAME(r14),
68 	REG_OFFSET_NAME(r13),
69 	REG_OFFSET_NAME(r12),
70 	REG_OFFSET_NAME(r11),
71 	REG_OFFSET_NAME(r10),
72 	REG_OFFSET_NAME(r9),
73 	REG_OFFSET_NAME(r8),
74 #endif
75 	REG_OFFSET_NAME(bx),
76 	REG_OFFSET_NAME(cx),
77 	REG_OFFSET_NAME(dx),
78 	REG_OFFSET_NAME(si),
79 	REG_OFFSET_NAME(di),
80 	REG_OFFSET_NAME(bp),
81 	REG_OFFSET_NAME(ax),
82 #ifdef CONFIG_X86_32
83 	REG_OFFSET_NAME(ds),
84 	REG_OFFSET_NAME(es),
85 	REG_OFFSET_NAME(fs),
86 	REG_OFFSET_NAME(gs),
87 #endif
88 	REG_OFFSET_NAME(orig_ax),
89 	REG_OFFSET_NAME(ip),
90 	REG_OFFSET_NAME(cs),
91 	REG_OFFSET_NAME(flags),
92 	REG_OFFSET_NAME(sp),
93 	REG_OFFSET_NAME(ss),
94 	REG_OFFSET_END,
95 };
96 
97 /**
98  * regs_query_register_offset() - query register offset from its name
99  * @name:	the name of a register
100  *
101  * regs_query_register_offset() returns the offset of a register in struct
102  * pt_regs from its name. If the name is invalid, this returns -EINVAL;
103  */
104 int regs_query_register_offset(const char *name)
105 {
106 	const struct pt_regs_offset *roff;
107 	for (roff = regoffset_table; roff->name != NULL; roff++)
108 		if (!strcmp(roff->name, name))
109 			return roff->offset;
110 	return -EINVAL;
111 }
112 
113 /**
114  * regs_query_register_name() - query register name from its offset
115  * @offset:	the offset of a register in struct pt_regs.
116  *
117  * regs_query_register_name() returns the name of a register from its
118  * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
119  */
120 const char *regs_query_register_name(unsigned int offset)
121 {
122 	const struct pt_regs_offset *roff;
123 	for (roff = regoffset_table; roff->name != NULL; roff++)
124 		if (roff->offset == offset)
125 			return roff->name;
126 	return NULL;
127 }
128 
129 static const int arg_offs_table[] = {
130 #ifdef CONFIG_X86_32
131 	[0] = offsetof(struct pt_regs, ax),
132 	[1] = offsetof(struct pt_regs, dx),
133 	[2] = offsetof(struct pt_regs, cx)
134 #else /* CONFIG_X86_64 */
135 	[0] = offsetof(struct pt_regs, di),
136 	[1] = offsetof(struct pt_regs, si),
137 	[2] = offsetof(struct pt_regs, dx),
138 	[3] = offsetof(struct pt_regs, cx),
139 	[4] = offsetof(struct pt_regs, r8),
140 	[5] = offsetof(struct pt_regs, r9)
141 #endif
142 };
143 
144 /*
145  * does not yet catch signals sent when the child dies.
146  * in exit.c or in signal.c.
147  */
148 
149 /*
150  * Determines which flags the user has access to [1 = access, 0 = no access].
151  */
152 #define FLAG_MASK_32		((unsigned long)			\
153 				 (X86_EFLAGS_CF | X86_EFLAGS_PF |	\
154 				  X86_EFLAGS_AF | X86_EFLAGS_ZF |	\
155 				  X86_EFLAGS_SF | X86_EFLAGS_TF |	\
156 				  X86_EFLAGS_DF | X86_EFLAGS_OF |	\
157 				  X86_EFLAGS_RF | X86_EFLAGS_AC))
158 
159 /*
160  * Determines whether a value may be installed in a segment register.
161  */
162 static inline bool invalid_selector(u16 value)
163 {
164 	return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL);
165 }
166 
167 #ifdef CONFIG_X86_32
168 
169 #define FLAG_MASK		FLAG_MASK_32
170 
171 /*
172  * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
173  * when it traps.  The previous stack will be directly underneath the saved
174  * registers, and 'sp/ss' won't even have been saved. Thus the '&regs->sp'.
175  *
176  * Now, if the stack is empty, '&regs->sp' is out of range. In this
177  * case we try to take the previous stack. To always return a non-null
178  * stack pointer we fall back to regs as stack if no previous stack
179  * exists.
180  *
181  * This is valid only for kernel mode traps.
182  */
183 unsigned long kernel_stack_pointer(struct pt_regs *regs)
184 {
185 	unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
186 	unsigned long sp = (unsigned long)&regs->sp;
187 	u32 *prev_esp;
188 
189 	if (context == (sp & ~(THREAD_SIZE - 1)))
190 		return sp;
191 
192 	prev_esp = (u32 *)(context);
193 	if (prev_esp)
194 		return (unsigned long)prev_esp;
195 
196 	return (unsigned long)regs;
197 }
198 EXPORT_SYMBOL_GPL(kernel_stack_pointer);
199 
200 static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
201 {
202 	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
203 	return &regs->bx + (regno >> 2);
204 }
205 
206 static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
207 {
208 	/*
209 	 * Returning the value truncates it to 16 bits.
210 	 */
211 	unsigned int retval;
212 	if (offset != offsetof(struct user_regs_struct, gs))
213 		retval = *pt_regs_access(task_pt_regs(task), offset);
214 	else {
215 		if (task == current)
216 			retval = get_user_gs(task_pt_regs(task));
217 		else
218 			retval = task_user_gs(task);
219 	}
220 	return retval;
221 }
222 
223 static int set_segment_reg(struct task_struct *task,
224 			   unsigned long offset, u16 value)
225 {
226 	/*
227 	 * The value argument was already truncated to 16 bits.
228 	 */
229 	if (invalid_selector(value))
230 		return -EIO;
231 
232 	/*
233 	 * For %cs and %ss we cannot permit a null selector.
234 	 * We can permit a bogus selector as long as it has USER_RPL.
235 	 * Null selectors are fine for other segment registers, but
236 	 * we will never get back to user mode with invalid %cs or %ss
237 	 * and will take the trap in iret instead.  Much code relies
238 	 * on user_mode() to distinguish a user trap frame (which can
239 	 * safely use invalid selectors) from a kernel trap frame.
240 	 */
241 	switch (offset) {
242 	case offsetof(struct user_regs_struct, cs):
243 	case offsetof(struct user_regs_struct, ss):
244 		if (unlikely(value == 0))
245 			return -EIO;
246 
247 	default:
248 		*pt_regs_access(task_pt_regs(task), offset) = value;
249 		break;
250 
251 	case offsetof(struct user_regs_struct, gs):
252 		if (task == current)
253 			set_user_gs(task_pt_regs(task), value);
254 		else
255 			task_user_gs(task) = value;
256 	}
257 
258 	return 0;
259 }
260 
261 #else  /* CONFIG_X86_64 */
262 
263 #define FLAG_MASK		(FLAG_MASK_32 | X86_EFLAGS_NT)
264 
265 static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset)
266 {
267 	BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0);
268 	return &regs->r15 + (offset / sizeof(regs->r15));
269 }
270 
271 static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
272 {
273 	/*
274 	 * Returning the value truncates it to 16 bits.
275 	 */
276 	unsigned int seg;
277 
278 	switch (offset) {
279 	case offsetof(struct user_regs_struct, fs):
280 		if (task == current) {
281 			/* Older gas can't assemble movq %?s,%r?? */
282 			asm("movl %%fs,%0" : "=r" (seg));
283 			return seg;
284 		}
285 		return task->thread.fsindex;
286 	case offsetof(struct user_regs_struct, gs):
287 		if (task == current) {
288 			asm("movl %%gs,%0" : "=r" (seg));
289 			return seg;
290 		}
291 		return task->thread.gsindex;
292 	case offsetof(struct user_regs_struct, ds):
293 		if (task == current) {
294 			asm("movl %%ds,%0" : "=r" (seg));
295 			return seg;
296 		}
297 		return task->thread.ds;
298 	case offsetof(struct user_regs_struct, es):
299 		if (task == current) {
300 			asm("movl %%es,%0" : "=r" (seg));
301 			return seg;
302 		}
303 		return task->thread.es;
304 
305 	case offsetof(struct user_regs_struct, cs):
306 	case offsetof(struct user_regs_struct, ss):
307 		break;
308 	}
309 	return *pt_regs_access(task_pt_regs(task), offset);
310 }
311 
312 static int set_segment_reg(struct task_struct *task,
313 			   unsigned long offset, u16 value)
314 {
315 	/*
316 	 * The value argument was already truncated to 16 bits.
317 	 */
318 	if (invalid_selector(value))
319 		return -EIO;
320 
321 	switch (offset) {
322 	case offsetof(struct user_regs_struct,fs):
323 		/*
324 		 * If this is setting fs as for normal 64-bit use but
325 		 * setting fs_base has implicitly changed it, leave it.
326 		 */
327 		if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
328 		     task->thread.fs != 0) ||
329 		    (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
330 		     task->thread.fs == 0))
331 			break;
332 		task->thread.fsindex = value;
333 		if (task == current)
334 			loadsegment(fs, task->thread.fsindex);
335 		break;
336 	case offsetof(struct user_regs_struct,gs):
337 		/*
338 		 * If this is setting gs as for normal 64-bit use but
339 		 * setting gs_base has implicitly changed it, leave it.
340 		 */
341 		if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
342 		     task->thread.gs != 0) ||
343 		    (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
344 		     task->thread.gs == 0))
345 			break;
346 		task->thread.gsindex = value;
347 		if (task == current)
348 			load_gs_index(task->thread.gsindex);
349 		break;
350 	case offsetof(struct user_regs_struct,ds):
351 		task->thread.ds = value;
352 		if (task == current)
353 			loadsegment(ds, task->thread.ds);
354 		break;
355 	case offsetof(struct user_regs_struct,es):
356 		task->thread.es = value;
357 		if (task == current)
358 			loadsegment(es, task->thread.es);
359 		break;
360 
361 		/*
362 		 * Can't actually change these in 64-bit mode.
363 		 */
364 	case offsetof(struct user_regs_struct,cs):
365 		if (unlikely(value == 0))
366 			return -EIO;
367 		task_pt_regs(task)->cs = value;
368 		break;
369 	case offsetof(struct user_regs_struct,ss):
370 		if (unlikely(value == 0))
371 			return -EIO;
372 		task_pt_regs(task)->ss = value;
373 		break;
374 	}
375 
376 	return 0;
377 }
378 
379 #endif	/* CONFIG_X86_32 */
380 
381 static unsigned long get_flags(struct task_struct *task)
382 {
383 	unsigned long retval = task_pt_regs(task)->flags;
384 
385 	/*
386 	 * If the debugger set TF, hide it from the readout.
387 	 */
388 	if (test_tsk_thread_flag(task, TIF_FORCED_TF))
389 		retval &= ~X86_EFLAGS_TF;
390 
391 	return retval;
392 }
393 
394 static int set_flags(struct task_struct *task, unsigned long value)
395 {
396 	struct pt_regs *regs = task_pt_regs(task);
397 
398 	/*
399 	 * If the user value contains TF, mark that
400 	 * it was not "us" (the debugger) that set it.
401 	 * If not, make sure it stays set if we had.
402 	 */
403 	if (value & X86_EFLAGS_TF)
404 		clear_tsk_thread_flag(task, TIF_FORCED_TF);
405 	else if (test_tsk_thread_flag(task, TIF_FORCED_TF))
406 		value |= X86_EFLAGS_TF;
407 
408 	regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK);
409 
410 	return 0;
411 }
412 
413 static int putreg(struct task_struct *child,
414 		  unsigned long offset, unsigned long value)
415 {
416 	switch (offset) {
417 	case offsetof(struct user_regs_struct, cs):
418 	case offsetof(struct user_regs_struct, ds):
419 	case offsetof(struct user_regs_struct, es):
420 	case offsetof(struct user_regs_struct, fs):
421 	case offsetof(struct user_regs_struct, gs):
422 	case offsetof(struct user_regs_struct, ss):
423 		return set_segment_reg(child, offset, value);
424 
425 	case offsetof(struct user_regs_struct, flags):
426 		return set_flags(child, value);
427 
428 #ifdef CONFIG_X86_64
429 	case offsetof(struct user_regs_struct,fs_base):
430 		if (value >= TASK_SIZE_OF(child))
431 			return -EIO;
432 		/*
433 		 * When changing the segment base, use do_arch_prctl
434 		 * to set either thread.fs or thread.fsindex and the
435 		 * corresponding GDT slot.
436 		 */
437 		if (child->thread.fs != value)
438 			return do_arch_prctl(child, ARCH_SET_FS, value);
439 		return 0;
440 	case offsetof(struct user_regs_struct,gs_base):
441 		/*
442 		 * Exactly the same here as the %fs handling above.
443 		 */
444 		if (value >= TASK_SIZE_OF(child))
445 			return -EIO;
446 		if (child->thread.gs != value)
447 			return do_arch_prctl(child, ARCH_SET_GS, value);
448 		return 0;
449 #endif
450 	}
451 
452 	*pt_regs_access(task_pt_regs(child), offset) = value;
453 	return 0;
454 }
455 
456 static unsigned long getreg(struct task_struct *task, unsigned long offset)
457 {
458 	switch (offset) {
459 	case offsetof(struct user_regs_struct, cs):
460 	case offsetof(struct user_regs_struct, ds):
461 	case offsetof(struct user_regs_struct, es):
462 	case offsetof(struct user_regs_struct, fs):
463 	case offsetof(struct user_regs_struct, gs):
464 	case offsetof(struct user_regs_struct, ss):
465 		return get_segment_reg(task, offset);
466 
467 	case offsetof(struct user_regs_struct, flags):
468 		return get_flags(task);
469 
470 #ifdef CONFIG_X86_64
471 	case offsetof(struct user_regs_struct, fs_base): {
472 		/*
473 		 * do_arch_prctl may have used a GDT slot instead of
474 		 * the MSR.  To userland, it appears the same either
475 		 * way, except the %fs segment selector might not be 0.
476 		 */
477 		unsigned int seg = task->thread.fsindex;
478 		if (task->thread.fs != 0)
479 			return task->thread.fs;
480 		if (task == current)
481 			asm("movl %%fs,%0" : "=r" (seg));
482 		if (seg != FS_TLS_SEL)
483 			return 0;
484 		return get_desc_base(&task->thread.tls_array[FS_TLS]);
485 	}
486 	case offsetof(struct user_regs_struct, gs_base): {
487 		/*
488 		 * Exactly the same here as the %fs handling above.
489 		 */
490 		unsigned int seg = task->thread.gsindex;
491 		if (task->thread.gs != 0)
492 			return task->thread.gs;
493 		if (task == current)
494 			asm("movl %%gs,%0" : "=r" (seg));
495 		if (seg != GS_TLS_SEL)
496 			return 0;
497 		return get_desc_base(&task->thread.tls_array[GS_TLS]);
498 	}
499 #endif
500 	}
501 
502 	return *pt_regs_access(task_pt_regs(task), offset);
503 }
504 
505 static int genregs_get(struct task_struct *target,
506 		       const struct user_regset *regset,
507 		       unsigned int pos, unsigned int count,
508 		       void *kbuf, void __user *ubuf)
509 {
510 	if (kbuf) {
511 		unsigned long *k = kbuf;
512 		while (count >= sizeof(*k)) {
513 			*k++ = getreg(target, pos);
514 			count -= sizeof(*k);
515 			pos += sizeof(*k);
516 		}
517 	} else {
518 		unsigned long __user *u = ubuf;
519 		while (count >= sizeof(*u)) {
520 			if (__put_user(getreg(target, pos), u++))
521 				return -EFAULT;
522 			count -= sizeof(*u);
523 			pos += sizeof(*u);
524 		}
525 	}
526 
527 	return 0;
528 }
529 
530 static int genregs_set(struct task_struct *target,
531 		       const struct user_regset *regset,
532 		       unsigned int pos, unsigned int count,
533 		       const void *kbuf, const void __user *ubuf)
534 {
535 	int ret = 0;
536 	if (kbuf) {
537 		const unsigned long *k = kbuf;
538 		while (count >= sizeof(*k) && !ret) {
539 			ret = putreg(target, pos, *k++);
540 			count -= sizeof(*k);
541 			pos += sizeof(*k);
542 		}
543 	} else {
544 		const unsigned long  __user *u = ubuf;
545 		while (count >= sizeof(*u) && !ret) {
546 			unsigned long word;
547 			ret = __get_user(word, u++);
548 			if (ret)
549 				break;
550 			ret = putreg(target, pos, word);
551 			count -= sizeof(*u);
552 			pos += sizeof(*u);
553 		}
554 	}
555 	return ret;
556 }
557 
558 static void ptrace_triggered(struct perf_event *bp,
559 			     struct perf_sample_data *data,
560 			     struct pt_regs *regs)
561 {
562 	int i;
563 	struct thread_struct *thread = &(current->thread);
564 
565 	/*
566 	 * Store in the virtual DR6 register the fact that the breakpoint
567 	 * was hit so the thread's debugger will see it.
568 	 */
569 	for (i = 0; i < HBP_NUM; i++) {
570 		if (thread->ptrace_bps[i] == bp)
571 			break;
572 	}
573 
574 	thread->debugreg6 |= (DR_TRAP0 << i);
575 }
576 
577 /*
578  * Walk through every ptrace breakpoints for this thread and
579  * build the dr7 value on top of their attributes.
580  *
581  */
582 static unsigned long ptrace_get_dr7(struct perf_event *bp[])
583 {
584 	int i;
585 	int dr7 = 0;
586 	struct arch_hw_breakpoint *info;
587 
588 	for (i = 0; i < HBP_NUM; i++) {
589 		if (bp[i] && !bp[i]->attr.disabled) {
590 			info = counter_arch_bp(bp[i]);
591 			dr7 |= encode_dr7(i, info->len, info->type);
592 		}
593 	}
594 
595 	return dr7;
596 }
597 
598 static int ptrace_fill_bp_fields(struct perf_event_attr *attr,
599 					int len, int type, bool disabled)
600 {
601 	int err, bp_len, bp_type;
602 
603 	err = arch_bp_generic_fields(len, type, &bp_len, &bp_type);
604 	if (!err) {
605 		attr->bp_len = bp_len;
606 		attr->bp_type = bp_type;
607 		attr->disabled = disabled;
608 	}
609 
610 	return err;
611 }
612 
613 static struct perf_event *
614 ptrace_register_breakpoint(struct task_struct *tsk, int len, int type,
615 				unsigned long addr, bool disabled)
616 {
617 	struct perf_event_attr attr;
618 	int err;
619 
620 	ptrace_breakpoint_init(&attr);
621 	attr.bp_addr = addr;
622 
623 	err = ptrace_fill_bp_fields(&attr, len, type, disabled);
624 	if (err)
625 		return ERR_PTR(err);
626 
627 	return register_user_hw_breakpoint(&attr, ptrace_triggered,
628 						 NULL, tsk);
629 }
630 
631 static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
632 					int disabled)
633 {
634 	struct perf_event_attr attr = bp->attr;
635 	int err;
636 
637 	err = ptrace_fill_bp_fields(&attr, len, type, disabled);
638 	if (err)
639 		return err;
640 
641 	return modify_user_hw_breakpoint(bp, &attr);
642 }
643 
644 /*
645  * Handle ptrace writes to debug register 7.
646  */
647 static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
648 {
649 	struct thread_struct *thread = &tsk->thread;
650 	unsigned long old_dr7;
651 	bool second_pass = false;
652 	int i, rc, ret = 0;
653 
654 	data &= ~DR_CONTROL_RESERVED;
655 	old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
656 
657 restore:
658 	rc = 0;
659 	for (i = 0; i < HBP_NUM; i++) {
660 		unsigned len, type;
661 		bool disabled = !decode_dr7(data, i, &len, &type);
662 		struct perf_event *bp = thread->ptrace_bps[i];
663 
664 		if (!bp) {
665 			if (disabled)
666 				continue;
667 
668 			bp = ptrace_register_breakpoint(tsk,
669 					len, type, 0, disabled);
670 			if (IS_ERR(bp)) {
671 				rc = PTR_ERR(bp);
672 				break;
673 			}
674 
675 			thread->ptrace_bps[i] = bp;
676 			continue;
677 		}
678 
679 		rc = ptrace_modify_breakpoint(bp, len, type, disabled);
680 		if (rc)
681 			break;
682 	}
683 
684 	/* Restore if the first pass failed, second_pass shouldn't fail. */
685 	if (rc && !WARN_ON(second_pass)) {
686 		ret = rc;
687 		data = old_dr7;
688 		second_pass = true;
689 		goto restore;
690 	}
691 
692 	return ret;
693 }
694 
695 /*
696  * Handle PTRACE_PEEKUSR calls for the debug register area.
697  */
698 static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
699 {
700 	struct thread_struct *thread = &tsk->thread;
701 	unsigned long val = 0;
702 
703 	if (n < HBP_NUM) {
704 		struct perf_event *bp = thread->ptrace_bps[n];
705 
706 		if (bp)
707 			val = bp->hw.info.address;
708 	} else if (n == 6) {
709 		val = thread->debugreg6;
710 	} else if (n == 7) {
711 		val = thread->ptrace_dr7;
712 	}
713 	return val;
714 }
715 
716 static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
717 				      unsigned long addr)
718 {
719 	struct thread_struct *t = &tsk->thread;
720 	struct perf_event *bp = t->ptrace_bps[nr];
721 	int err = 0;
722 
723 	if (!bp) {
724 		/*
725 		 * Put stub len and type to create an inactive but correct bp.
726 		 *
727 		 * CHECKME: the previous code returned -EIO if the addr wasn't
728 		 * a valid task virtual addr. The new one will return -EINVAL in
729 		 *  this case.
730 		 * -EINVAL may be what we want for in-kernel breakpoints users,
731 		 * but -EIO looks better for ptrace, since we refuse a register
732 		 * writing for the user. And anyway this is the previous
733 		 * behaviour.
734 		 */
735 		bp = ptrace_register_breakpoint(tsk,
736 				X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE,
737 				addr, true);
738 		if (IS_ERR(bp))
739 			err = PTR_ERR(bp);
740 		else
741 			t->ptrace_bps[nr] = bp;
742 	} else {
743 		struct perf_event_attr attr = bp->attr;
744 
745 		attr.bp_addr = addr;
746 		err = modify_user_hw_breakpoint(bp, &attr);
747 	}
748 
749 	return err;
750 }
751 
752 /*
753  * Handle PTRACE_POKEUSR calls for the debug register area.
754  */
755 static int ptrace_set_debugreg(struct task_struct *tsk, int n,
756 			       unsigned long val)
757 {
758 	struct thread_struct *thread = &tsk->thread;
759 	/* There are no DR4 or DR5 registers */
760 	int rc = -EIO;
761 
762 	if (n < HBP_NUM) {
763 		rc = ptrace_set_breakpoint_addr(tsk, n, val);
764 	} else if (n == 6) {
765 		thread->debugreg6 = val;
766 		rc = 0;
767 	} else if (n == 7) {
768 		rc = ptrace_write_dr7(tsk, val);
769 		if (!rc)
770 			thread->ptrace_dr7 = val;
771 	}
772 	return rc;
773 }
774 
775 /*
776  * These access the current or another (stopped) task's io permission
777  * bitmap for debugging or core dump.
778  */
779 static int ioperm_active(struct task_struct *target,
780 			 const struct user_regset *regset)
781 {
782 	return target->thread.io_bitmap_max / regset->size;
783 }
784 
785 static int ioperm_get(struct task_struct *target,
786 		      const struct user_regset *regset,
787 		      unsigned int pos, unsigned int count,
788 		      void *kbuf, void __user *ubuf)
789 {
790 	if (!target->thread.io_bitmap_ptr)
791 		return -ENXIO;
792 
793 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
794 				   target->thread.io_bitmap_ptr,
795 				   0, IO_BITMAP_BYTES);
796 }
797 
798 /*
799  * Called by kernel/ptrace.c when detaching..
800  *
801  * Make sure the single step bit is not set.
802  */
803 void ptrace_disable(struct task_struct *child)
804 {
805 	user_disable_single_step(child);
806 #ifdef TIF_SYSCALL_EMU
807 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
808 #endif
809 }
810 
811 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
812 static const struct user_regset_view user_x86_32_view; /* Initialized below. */
813 #endif
814 
815 long arch_ptrace(struct task_struct *child, long request,
816 		 unsigned long addr, unsigned long data)
817 {
818 	int ret;
819 	unsigned long __user *datap = (unsigned long __user *)data;
820 
821 	switch (request) {
822 	/* read the word at location addr in the USER area. */
823 	case PTRACE_PEEKUSR: {
824 		unsigned long tmp;
825 
826 		ret = -EIO;
827 		if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
828 			break;
829 
830 		tmp = 0;  /* Default return condition */
831 		if (addr < sizeof(struct user_regs_struct))
832 			tmp = getreg(child, addr);
833 		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
834 			 addr <= offsetof(struct user, u_debugreg[7])) {
835 			addr -= offsetof(struct user, u_debugreg[0]);
836 			tmp = ptrace_get_debugreg(child, addr / sizeof(data));
837 		}
838 		ret = put_user(tmp, datap);
839 		break;
840 	}
841 
842 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
843 		ret = -EIO;
844 		if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
845 			break;
846 
847 		if (addr < sizeof(struct user_regs_struct))
848 			ret = putreg(child, addr, data);
849 		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
850 			 addr <= offsetof(struct user, u_debugreg[7])) {
851 			addr -= offsetof(struct user, u_debugreg[0]);
852 			ret = ptrace_set_debugreg(child,
853 						  addr / sizeof(data), data);
854 		}
855 		break;
856 
857 	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
858 		return copy_regset_to_user(child,
859 					   task_user_regset_view(current),
860 					   REGSET_GENERAL,
861 					   0, sizeof(struct user_regs_struct),
862 					   datap);
863 
864 	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
865 		return copy_regset_from_user(child,
866 					     task_user_regset_view(current),
867 					     REGSET_GENERAL,
868 					     0, sizeof(struct user_regs_struct),
869 					     datap);
870 
871 	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
872 		return copy_regset_to_user(child,
873 					   task_user_regset_view(current),
874 					   REGSET_FP,
875 					   0, sizeof(struct user_i387_struct),
876 					   datap);
877 
878 	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
879 		return copy_regset_from_user(child,
880 					     task_user_regset_view(current),
881 					     REGSET_FP,
882 					     0, sizeof(struct user_i387_struct),
883 					     datap);
884 
885 #ifdef CONFIG_X86_32
886 	case PTRACE_GETFPXREGS:	/* Get the child extended FPU state. */
887 		return copy_regset_to_user(child, &user_x86_32_view,
888 					   REGSET_XFP,
889 					   0, sizeof(struct user_fxsr_struct),
890 					   datap) ? -EIO : 0;
891 
892 	case PTRACE_SETFPXREGS:	/* Set the child extended FPU state. */
893 		return copy_regset_from_user(child, &user_x86_32_view,
894 					     REGSET_XFP,
895 					     0, sizeof(struct user_fxsr_struct),
896 					     datap) ? -EIO : 0;
897 #endif
898 
899 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
900 	case PTRACE_GET_THREAD_AREA:
901 		if ((int) addr < 0)
902 			return -EIO;
903 		ret = do_get_thread_area(child, addr,
904 					(struct user_desc __user *)data);
905 		break;
906 
907 	case PTRACE_SET_THREAD_AREA:
908 		if ((int) addr < 0)
909 			return -EIO;
910 		ret = do_set_thread_area(child, addr,
911 					(struct user_desc __user *)data, 0);
912 		break;
913 #endif
914 
915 #ifdef CONFIG_X86_64
916 		/* normal 64bit interface to access TLS data.
917 		   Works just like arch_prctl, except that the arguments
918 		   are reversed. */
919 	case PTRACE_ARCH_PRCTL:
920 		ret = do_arch_prctl(child, data, addr);
921 		break;
922 #endif
923 
924 	default:
925 		ret = ptrace_request(child, request, addr, data);
926 		break;
927 	}
928 
929 	return ret;
930 }
931 
932 #ifdef CONFIG_IA32_EMULATION
933 
934 #include <linux/compat.h>
935 #include <linux/syscalls.h>
936 #include <asm/ia32.h>
937 #include <asm/user32.h>
938 
939 #define R32(l,q)							\
940 	case offsetof(struct user32, regs.l):				\
941 		regs->q = value; break
942 
943 #define SEG32(rs)							\
944 	case offsetof(struct user32, regs.rs):				\
945 		return set_segment_reg(child,				\
946 				       offsetof(struct user_regs_struct, rs), \
947 				       value);				\
948 		break
949 
950 static int putreg32(struct task_struct *child, unsigned regno, u32 value)
951 {
952 	struct pt_regs *regs = task_pt_regs(child);
953 
954 	switch (regno) {
955 
956 	SEG32(cs);
957 	SEG32(ds);
958 	SEG32(es);
959 	SEG32(fs);
960 	SEG32(gs);
961 	SEG32(ss);
962 
963 	R32(ebx, bx);
964 	R32(ecx, cx);
965 	R32(edx, dx);
966 	R32(edi, di);
967 	R32(esi, si);
968 	R32(ebp, bp);
969 	R32(eax, ax);
970 	R32(eip, ip);
971 	R32(esp, sp);
972 
973 	case offsetof(struct user32, regs.orig_eax):
974 		/*
975 		 * A 32-bit debugger setting orig_eax means to restore
976 		 * the state of the task restarting a 32-bit syscall.
977 		 * Make sure we interpret the -ERESTART* codes correctly
978 		 * in case the task is not actually still sitting at the
979 		 * exit from a 32-bit syscall with TS_COMPAT still set.
980 		 */
981 		regs->orig_ax = value;
982 		if (syscall_get_nr(child, regs) >= 0)
983 			task_thread_info(child)->status |= TS_COMPAT;
984 		break;
985 
986 	case offsetof(struct user32, regs.eflags):
987 		return set_flags(child, value);
988 
989 	case offsetof(struct user32, u_debugreg[0]) ...
990 		offsetof(struct user32, u_debugreg[7]):
991 		regno -= offsetof(struct user32, u_debugreg[0]);
992 		return ptrace_set_debugreg(child, regno / 4, value);
993 
994 	default:
995 		if (regno > sizeof(struct user32) || (regno & 3))
996 			return -EIO;
997 
998 		/*
999 		 * Other dummy fields in the virtual user structure
1000 		 * are ignored
1001 		 */
1002 		break;
1003 	}
1004 	return 0;
1005 }
1006 
1007 #undef R32
1008 #undef SEG32
1009 
1010 #define R32(l,q)							\
1011 	case offsetof(struct user32, regs.l):				\
1012 		*val = regs->q; break
1013 
1014 #define SEG32(rs)							\
1015 	case offsetof(struct user32, regs.rs):				\
1016 		*val = get_segment_reg(child,				\
1017 				       offsetof(struct user_regs_struct, rs)); \
1018 		break
1019 
1020 static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
1021 {
1022 	struct pt_regs *regs = task_pt_regs(child);
1023 
1024 	switch (regno) {
1025 
1026 	SEG32(ds);
1027 	SEG32(es);
1028 	SEG32(fs);
1029 	SEG32(gs);
1030 
1031 	R32(cs, cs);
1032 	R32(ss, ss);
1033 	R32(ebx, bx);
1034 	R32(ecx, cx);
1035 	R32(edx, dx);
1036 	R32(edi, di);
1037 	R32(esi, si);
1038 	R32(ebp, bp);
1039 	R32(eax, ax);
1040 	R32(orig_eax, orig_ax);
1041 	R32(eip, ip);
1042 	R32(esp, sp);
1043 
1044 	case offsetof(struct user32, regs.eflags):
1045 		*val = get_flags(child);
1046 		break;
1047 
1048 	case offsetof(struct user32, u_debugreg[0]) ...
1049 		offsetof(struct user32, u_debugreg[7]):
1050 		regno -= offsetof(struct user32, u_debugreg[0]);
1051 		*val = ptrace_get_debugreg(child, regno / 4);
1052 		break;
1053 
1054 	default:
1055 		if (regno > sizeof(struct user32) || (regno & 3))
1056 			return -EIO;
1057 
1058 		/*
1059 		 * Other dummy fields in the virtual user structure
1060 		 * are ignored
1061 		 */
1062 		*val = 0;
1063 		break;
1064 	}
1065 	return 0;
1066 }
1067 
1068 #undef R32
1069 #undef SEG32
1070 
1071 static int genregs32_get(struct task_struct *target,
1072 			 const struct user_regset *regset,
1073 			 unsigned int pos, unsigned int count,
1074 			 void *kbuf, void __user *ubuf)
1075 {
1076 	if (kbuf) {
1077 		compat_ulong_t *k = kbuf;
1078 		while (count >= sizeof(*k)) {
1079 			getreg32(target, pos, k++);
1080 			count -= sizeof(*k);
1081 			pos += sizeof(*k);
1082 		}
1083 	} else {
1084 		compat_ulong_t __user *u = ubuf;
1085 		while (count >= sizeof(*u)) {
1086 			compat_ulong_t word;
1087 			getreg32(target, pos, &word);
1088 			if (__put_user(word, u++))
1089 				return -EFAULT;
1090 			count -= sizeof(*u);
1091 			pos += sizeof(*u);
1092 		}
1093 	}
1094 
1095 	return 0;
1096 }
1097 
1098 static int genregs32_set(struct task_struct *target,
1099 			 const struct user_regset *regset,
1100 			 unsigned int pos, unsigned int count,
1101 			 const void *kbuf, const void __user *ubuf)
1102 {
1103 	int ret = 0;
1104 	if (kbuf) {
1105 		const compat_ulong_t *k = kbuf;
1106 		while (count >= sizeof(*k) && !ret) {
1107 			ret = putreg32(target, pos, *k++);
1108 			count -= sizeof(*k);
1109 			pos += sizeof(*k);
1110 		}
1111 	} else {
1112 		const compat_ulong_t __user *u = ubuf;
1113 		while (count >= sizeof(*u) && !ret) {
1114 			compat_ulong_t word;
1115 			ret = __get_user(word, u++);
1116 			if (ret)
1117 				break;
1118 			ret = putreg32(target, pos, word);
1119 			count -= sizeof(*u);
1120 			pos += sizeof(*u);
1121 		}
1122 	}
1123 	return ret;
1124 }
1125 
1126 #ifdef CONFIG_X86_X32_ABI
1127 static long x32_arch_ptrace(struct task_struct *child,
1128 			    compat_long_t request, compat_ulong_t caddr,
1129 			    compat_ulong_t cdata)
1130 {
1131 	unsigned long addr = caddr;
1132 	unsigned long data = cdata;
1133 	void __user *datap = compat_ptr(data);
1134 	int ret;
1135 
1136 	switch (request) {
1137 	/* Read 32bits at location addr in the USER area.  Only allow
1138 	   to return the lower 32bits of segment and debug registers.  */
1139 	case PTRACE_PEEKUSR: {
1140 		u32 tmp;
1141 
1142 		ret = -EIO;
1143 		if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) ||
1144 		    addr < offsetof(struct user_regs_struct, cs))
1145 			break;
1146 
1147 		tmp = 0;  /* Default return condition */
1148 		if (addr < sizeof(struct user_regs_struct))
1149 			tmp = getreg(child, addr);
1150 		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
1151 			 addr <= offsetof(struct user, u_debugreg[7])) {
1152 			addr -= offsetof(struct user, u_debugreg[0]);
1153 			tmp = ptrace_get_debugreg(child, addr / sizeof(data));
1154 		}
1155 		ret = put_user(tmp, (__u32 __user *)datap);
1156 		break;
1157 	}
1158 
1159 	/* Write the word at location addr in the USER area.  Only allow
1160 	   to update segment and debug registers with the upper 32bits
1161 	   zero-extended. */
1162 	case PTRACE_POKEUSR:
1163 		ret = -EIO;
1164 		if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) ||
1165 		    addr < offsetof(struct user_regs_struct, cs))
1166 			break;
1167 
1168 		if (addr < sizeof(struct user_regs_struct))
1169 			ret = putreg(child, addr, data);
1170 		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
1171 			 addr <= offsetof(struct user, u_debugreg[7])) {
1172 			addr -= offsetof(struct user, u_debugreg[0]);
1173 			ret = ptrace_set_debugreg(child,
1174 						  addr / sizeof(data), data);
1175 		}
1176 		break;
1177 
1178 	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
1179 		return copy_regset_to_user(child,
1180 					   task_user_regset_view(current),
1181 					   REGSET_GENERAL,
1182 					   0, sizeof(struct user_regs_struct),
1183 					   datap);
1184 
1185 	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
1186 		return copy_regset_from_user(child,
1187 					     task_user_regset_view(current),
1188 					     REGSET_GENERAL,
1189 					     0, sizeof(struct user_regs_struct),
1190 					     datap);
1191 
1192 	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
1193 		return copy_regset_to_user(child,
1194 					   task_user_regset_view(current),
1195 					   REGSET_FP,
1196 					   0, sizeof(struct user_i387_struct),
1197 					   datap);
1198 
1199 	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
1200 		return copy_regset_from_user(child,
1201 					     task_user_regset_view(current),
1202 					     REGSET_FP,
1203 					     0, sizeof(struct user_i387_struct),
1204 					     datap);
1205 
1206 	default:
1207 		return compat_ptrace_request(child, request, addr, data);
1208 	}
1209 
1210 	return ret;
1211 }
1212 #endif
1213 
1214 long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1215 			compat_ulong_t caddr, compat_ulong_t cdata)
1216 {
1217 	unsigned long addr = caddr;
1218 	unsigned long data = cdata;
1219 	void __user *datap = compat_ptr(data);
1220 	int ret;
1221 	__u32 val;
1222 
1223 #ifdef CONFIG_X86_X32_ABI
1224 	if (!is_ia32_task())
1225 		return x32_arch_ptrace(child, request, caddr, cdata);
1226 #endif
1227 
1228 	switch (request) {
1229 	case PTRACE_PEEKUSR:
1230 		ret = getreg32(child, addr, &val);
1231 		if (ret == 0)
1232 			ret = put_user(val, (__u32 __user *)datap);
1233 		break;
1234 
1235 	case PTRACE_POKEUSR:
1236 		ret = putreg32(child, addr, data);
1237 		break;
1238 
1239 	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
1240 		return copy_regset_to_user(child, &user_x86_32_view,
1241 					   REGSET_GENERAL,
1242 					   0, sizeof(struct user_regs_struct32),
1243 					   datap);
1244 
1245 	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
1246 		return copy_regset_from_user(child, &user_x86_32_view,
1247 					     REGSET_GENERAL, 0,
1248 					     sizeof(struct user_regs_struct32),
1249 					     datap);
1250 
1251 	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
1252 		return copy_regset_to_user(child, &user_x86_32_view,
1253 					   REGSET_FP, 0,
1254 					   sizeof(struct user_i387_ia32_struct),
1255 					   datap);
1256 
1257 	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
1258 		return copy_regset_from_user(
1259 			child, &user_x86_32_view, REGSET_FP,
1260 			0, sizeof(struct user_i387_ia32_struct), datap);
1261 
1262 	case PTRACE_GETFPXREGS:	/* Get the child extended FPU state. */
1263 		return copy_regset_to_user(child, &user_x86_32_view,
1264 					   REGSET_XFP, 0,
1265 					   sizeof(struct user32_fxsr_struct),
1266 					   datap);
1267 
1268 	case PTRACE_SETFPXREGS:	/* Set the child extended FPU state. */
1269 		return copy_regset_from_user(child, &user_x86_32_view,
1270 					     REGSET_XFP, 0,
1271 					     sizeof(struct user32_fxsr_struct),
1272 					     datap);
1273 
1274 	case PTRACE_GET_THREAD_AREA:
1275 	case PTRACE_SET_THREAD_AREA:
1276 		return arch_ptrace(child, request, addr, data);
1277 
1278 	default:
1279 		return compat_ptrace_request(child, request, addr, data);
1280 	}
1281 
1282 	return ret;
1283 }
1284 
1285 #endif	/* CONFIG_IA32_EMULATION */
1286 
1287 #ifdef CONFIG_X86_64
1288 
1289 static struct user_regset x86_64_regsets[] __read_mostly = {
1290 	[REGSET_GENERAL] = {
1291 		.core_note_type = NT_PRSTATUS,
1292 		.n = sizeof(struct user_regs_struct) / sizeof(long),
1293 		.size = sizeof(long), .align = sizeof(long),
1294 		.get = genregs_get, .set = genregs_set
1295 	},
1296 	[REGSET_FP] = {
1297 		.core_note_type = NT_PRFPREG,
1298 		.n = sizeof(struct user_i387_struct) / sizeof(long),
1299 		.size = sizeof(long), .align = sizeof(long),
1300 		.active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set
1301 	},
1302 	[REGSET_XSTATE] = {
1303 		.core_note_type = NT_X86_XSTATE,
1304 		.size = sizeof(u64), .align = sizeof(u64),
1305 		.active = xstateregs_active, .get = xstateregs_get,
1306 		.set = xstateregs_set
1307 	},
1308 	[REGSET_IOPERM64] = {
1309 		.core_note_type = NT_386_IOPERM,
1310 		.n = IO_BITMAP_LONGS,
1311 		.size = sizeof(long), .align = sizeof(long),
1312 		.active = ioperm_active, .get = ioperm_get
1313 	},
1314 };
1315 
1316 static const struct user_regset_view user_x86_64_view = {
1317 	.name = "x86_64", .e_machine = EM_X86_64,
1318 	.regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets)
1319 };
1320 
1321 #else  /* CONFIG_X86_32 */
1322 
1323 #define user_regs_struct32	user_regs_struct
1324 #define genregs32_get		genregs_get
1325 #define genregs32_set		genregs_set
1326 
1327 #endif	/* CONFIG_X86_64 */
1328 
1329 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1330 static struct user_regset x86_32_regsets[] __read_mostly = {
1331 	[REGSET_GENERAL] = {
1332 		.core_note_type = NT_PRSTATUS,
1333 		.n = sizeof(struct user_regs_struct32) / sizeof(u32),
1334 		.size = sizeof(u32), .align = sizeof(u32),
1335 		.get = genregs32_get, .set = genregs32_set
1336 	},
1337 	[REGSET_FP] = {
1338 		.core_note_type = NT_PRFPREG,
1339 		.n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
1340 		.size = sizeof(u32), .align = sizeof(u32),
1341 		.active = regset_fpregs_active, .get = fpregs_get, .set = fpregs_set
1342 	},
1343 	[REGSET_XFP] = {
1344 		.core_note_type = NT_PRXFPREG,
1345 		.n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
1346 		.size = sizeof(u32), .align = sizeof(u32),
1347 		.active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set
1348 	},
1349 	[REGSET_XSTATE] = {
1350 		.core_note_type = NT_X86_XSTATE,
1351 		.size = sizeof(u64), .align = sizeof(u64),
1352 		.active = xstateregs_active, .get = xstateregs_get,
1353 		.set = xstateregs_set
1354 	},
1355 	[REGSET_TLS] = {
1356 		.core_note_type = NT_386_TLS,
1357 		.n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
1358 		.size = sizeof(struct user_desc),
1359 		.align = sizeof(struct user_desc),
1360 		.active = regset_tls_active,
1361 		.get = regset_tls_get, .set = regset_tls_set
1362 	},
1363 	[REGSET_IOPERM32] = {
1364 		.core_note_type = NT_386_IOPERM,
1365 		.n = IO_BITMAP_BYTES / sizeof(u32),
1366 		.size = sizeof(u32), .align = sizeof(u32),
1367 		.active = ioperm_active, .get = ioperm_get
1368 	},
1369 };
1370 
1371 static const struct user_regset_view user_x86_32_view = {
1372 	.name = "i386", .e_machine = EM_386,
1373 	.regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets)
1374 };
1375 #endif
1376 
1377 /*
1378  * This represents bytes 464..511 in the memory layout exported through
1379  * the REGSET_XSTATE interface.
1380  */
1381 u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
1382 
1383 void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
1384 {
1385 #ifdef CONFIG_X86_64
1386 	x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
1387 #endif
1388 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1389 	x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64);
1390 #endif
1391 	xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
1392 }
1393 
1394 const struct user_regset_view *task_user_regset_view(struct task_struct *task)
1395 {
1396 #ifdef CONFIG_IA32_EMULATION
1397 	if (test_tsk_thread_flag(task, TIF_IA32))
1398 #endif
1399 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1400 		return &user_x86_32_view;
1401 #endif
1402 #ifdef CONFIG_X86_64
1403 	return &user_x86_64_view;
1404 #endif
1405 }
1406 
1407 static void fill_sigtrap_info(struct task_struct *tsk,
1408 				struct pt_regs *regs,
1409 				int error_code, int si_code,
1410 				struct siginfo *info)
1411 {
1412 	tsk->thread.trap_nr = X86_TRAP_DB;
1413 	tsk->thread.error_code = error_code;
1414 
1415 	memset(info, 0, sizeof(*info));
1416 	info->si_signo = SIGTRAP;
1417 	info->si_code = si_code;
1418 	info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
1419 }
1420 
1421 void user_single_step_siginfo(struct task_struct *tsk,
1422 				struct pt_regs *regs,
1423 				struct siginfo *info)
1424 {
1425 	fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info);
1426 }
1427 
1428 void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
1429 					 int error_code, int si_code)
1430 {
1431 	struct siginfo info;
1432 
1433 	fill_sigtrap_info(tsk, regs, error_code, si_code, &info);
1434 	/* Send us the fake SIGTRAP */
1435 	force_sig_info(SIGTRAP, &info, tsk);
1436 }
1437 
1438 static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
1439 {
1440 #ifdef CONFIG_X86_64
1441 	if (arch == AUDIT_ARCH_X86_64) {
1442 		audit_syscall_entry(regs->orig_ax, regs->di,
1443 				    regs->si, regs->dx, regs->r10);
1444 	} else
1445 #endif
1446 	{
1447 		audit_syscall_entry(regs->orig_ax, regs->bx,
1448 				    regs->cx, regs->dx, regs->si);
1449 	}
1450 }
1451 
1452 /*
1453  * We can return 0 to resume the syscall or anything else to go to phase
1454  * 2.  If we resume the syscall, we need to put something appropriate in
1455  * regs->orig_ax.
1456  *
1457  * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
1458  * are fully functional.
1459  *
1460  * For phase 2's benefit, our return value is:
1461  * 0:			resume the syscall
1462  * 1:			go to phase 2; no seccomp phase 2 needed
1463  * anything else:	go to phase 2; pass return value to seccomp
1464  */
1465 unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
1466 {
1467 	unsigned long ret = 0;
1468 	u32 work;
1469 
1470 	BUG_ON(regs != task_pt_regs(current));
1471 
1472 	work = ACCESS_ONCE(current_thread_info()->flags) &
1473 		_TIF_WORK_SYSCALL_ENTRY;
1474 
1475 	/*
1476 	 * If TIF_NOHZ is set, we are required to call user_exit() before
1477 	 * doing anything that could touch RCU.
1478 	 */
1479 	if (work & _TIF_NOHZ) {
1480 		user_exit();
1481 		work &= ~_TIF_NOHZ;
1482 	}
1483 
1484 #ifdef CONFIG_SECCOMP
1485 	/*
1486 	 * Do seccomp first -- it should minimize exposure of other
1487 	 * code, and keeping seccomp fast is probably more valuable
1488 	 * than the rest of this.
1489 	 */
1490 	if (work & _TIF_SECCOMP) {
1491 		struct seccomp_data sd;
1492 
1493 		sd.arch = arch;
1494 		sd.nr = regs->orig_ax;
1495 		sd.instruction_pointer = regs->ip;
1496 #ifdef CONFIG_X86_64
1497 		if (arch == AUDIT_ARCH_X86_64) {
1498 			sd.args[0] = regs->di;
1499 			sd.args[1] = regs->si;
1500 			sd.args[2] = regs->dx;
1501 			sd.args[3] = regs->r10;
1502 			sd.args[4] = regs->r8;
1503 			sd.args[5] = regs->r9;
1504 		} else
1505 #endif
1506 		{
1507 			sd.args[0] = regs->bx;
1508 			sd.args[1] = regs->cx;
1509 			sd.args[2] = regs->dx;
1510 			sd.args[3] = regs->si;
1511 			sd.args[4] = regs->di;
1512 			sd.args[5] = regs->bp;
1513 		}
1514 
1515 		BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
1516 		BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
1517 
1518 		ret = seccomp_phase1(&sd);
1519 		if (ret == SECCOMP_PHASE1_SKIP) {
1520 			regs->orig_ax = -1;
1521 			ret = 0;
1522 		} else if (ret != SECCOMP_PHASE1_OK) {
1523 			return ret;  /* Go directly to phase 2 */
1524 		}
1525 
1526 		work &= ~_TIF_SECCOMP;
1527 	}
1528 #endif
1529 
1530 	/* Do our best to finish without phase 2. */
1531 	if (work == 0)
1532 		return ret;  /* seccomp and/or nohz only (ret == 0 here) */
1533 
1534 #ifdef CONFIG_AUDITSYSCALL
1535 	if (work == _TIF_SYSCALL_AUDIT) {
1536 		/*
1537 		 * If there is no more work to be done except auditing,
1538 		 * then audit in phase 1.  Phase 2 always audits, so, if
1539 		 * we audit here, then we can't go on to phase 2.
1540 		 */
1541 		do_audit_syscall_entry(regs, arch);
1542 		return 0;
1543 	}
1544 #endif
1545 
1546 	return 1;  /* Something is enabled that we can't handle in phase 1 */
1547 }
1548 
1549 /* Returns the syscall nr to run (which should match regs->orig_ax). */
1550 long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
1551 				unsigned long phase1_result)
1552 {
1553 	long ret = 0;
1554 	u32 work = ACCESS_ONCE(current_thread_info()->flags) &
1555 		_TIF_WORK_SYSCALL_ENTRY;
1556 
1557 	BUG_ON(regs != task_pt_regs(current));
1558 
1559 	/*
1560 	 * If we stepped into a sysenter/syscall insn, it trapped in
1561 	 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
1562 	 * If user-mode had set TF itself, then it's still clear from
1563 	 * do_debug() and we need to set it again to restore the user
1564 	 * state.  If we entered on the slow path, TF was already set.
1565 	 */
1566 	if (work & _TIF_SINGLESTEP)
1567 		regs->flags |= X86_EFLAGS_TF;
1568 
1569 #ifdef CONFIG_SECCOMP
1570 	/*
1571 	 * Call seccomp_phase2 before running the other hooks so that
1572 	 * they can see any changes made by a seccomp tracer.
1573 	 */
1574 	if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
1575 		/* seccomp failures shouldn't expose any additional code. */
1576 		return -1;
1577 	}
1578 #endif
1579 
1580 	if (unlikely(work & _TIF_SYSCALL_EMU))
1581 		ret = -1L;
1582 
1583 	if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
1584 	    tracehook_report_syscall_entry(regs))
1585 		ret = -1L;
1586 
1587 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
1588 		trace_sys_enter(regs, regs->orig_ax);
1589 
1590 	do_audit_syscall_entry(regs, arch);
1591 
1592 	return ret ?: regs->orig_ax;
1593 }
1594 
1595 long syscall_trace_enter(struct pt_regs *regs)
1596 {
1597 	u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
1598 	unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
1599 
1600 	if (phase1_result == 0)
1601 		return regs->orig_ax;
1602 	else
1603 		return syscall_trace_enter_phase2(regs, arch, phase1_result);
1604 }
1605 
1606 void syscall_trace_leave(struct pt_regs *regs)
1607 {
1608 	bool step;
1609 
1610 	/*
1611 	 * We may come here right after calling schedule_user()
1612 	 * or do_notify_resume(), in which case we can be in RCU
1613 	 * user mode.
1614 	 */
1615 	user_exit();
1616 
1617 	audit_syscall_exit(regs);
1618 
1619 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
1620 		trace_sys_exit(regs, regs->ax);
1621 
1622 	/*
1623 	 * If TIF_SYSCALL_EMU is set, we only get here because of
1624 	 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
1625 	 * We already reported this syscall instruction in
1626 	 * syscall_trace_enter().
1627 	 */
1628 	step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
1629 			!test_thread_flag(TIF_SYSCALL_EMU);
1630 	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
1631 		tracehook_report_syscall_exit(regs, step);
1632 
1633 	user_enter();
1634 }
1635