1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/fasttrap_isa.h>
28 #include <sys/fasttrap_impl.h>
29 #include <sys/dtrace.h>
30 #include <sys/dtrace_impl.h>
31 #include <sys/cmn_err.h>
32 #include <sys/frame.h>
33 #include <sys/stack.h>
34 #include <sys/sysmacros.h>
35 #include <sys/trap.h>
36
37 #include <v9/sys/machpcb.h>
38 #include <v9/sys/privregs.h>
39
40 /*
41 * Lossless User-Land Tracing on SPARC
42 * -----------------------------------
43 *
44 * The Basic Idea
45 *
46 * The most important design constraint is, of course, correct execution of
47 * the user thread above all else. The next most important goal is rapid
48 * execution. We combine execution of instructions in user-land with
49 * emulation of certain instructions in the kernel to aim for complete
50 * correctness and maximal performance.
51 *
52 * We take advantage of the split PC/NPC architecture to speed up logical
53 * single-stepping; when we copy an instruction out to the scratch space in
54 * the ulwp_t structure (held in the %g7 register on SPARC), we can
55 * effectively single step by setting the PC to our scratch space and leaving
56 * the NPC alone. This executes the replaced instruction and then continues
57 * on without having to reenter the kernel as with single- stepping. The
58 * obvious caveat is for instructions whose execution is PC dependant --
59 * branches, call and link instructions (call and jmpl), and the rdpc
60 * instruction. These instructions cannot be executed in the manner described
61 * so they must be emulated in the kernel.
62 *
63 * Emulation for this small set of instructions if fairly simple; the most
64 * difficult part being emulating branch conditions.
65 *
66 *
67 * A Cache Heavy Portfolio
68 *
69 * It's important to note at this time that copying an instruction out to the
70 * ulwp_t scratch space in user-land is rather complicated. SPARC has
71 * separate data and instruction caches so any writes to the D$ (using a
72 * store instruction for example) aren't necessarily reflected in the I$.
73 * The flush instruction can be used to synchronize the two and must be used
74 * for any self-modifying code, but the flush instruction only applies to the
75 * primary address space (the absence of a flusha analogue to the flush
76 * instruction that accepts an ASI argument is an obvious omission from SPARC
77 * v9 where the notion of the alternate address space was introduced on
78 * SPARC). To correctly copy out the instruction we must use a block store
79 * that doesn't allocate in the D$ and ensures synchronization with the I$;
80 * see dtrace_blksuword32() for the implementation (this function uses
81 * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
82 * described). Refer to the UltraSPARC I/II manual for details on the
83 * ASI_BLK_COMMIT_S ASI.
84 *
85 *
86 * Return Subtleties
87 *
88 * When we're firing a return probe we need to expose the value returned by
89 * the function being traced. Since the function can set the return value
90 * in its last instruction, we need to fire the return probe only _after_
91 * the effects of the instruction are apparent. For instructions that we
92 * emulate, we can call dtrace_probe() after we've performed the emulation;
93 * for instructions that we execute after we return to user-land, we set
94 * %pc to the instruction we copied out (as described above) and set %npc
95 * to a trap instruction stashed in the ulwp_t structure. After the traced
96 * instruction is executed, the trap instruction returns control to the
97 * kernel where we can fire the return probe.
98 *
99 * This need for a second trap in cases where we execute the traced
100 * instruction makes it all the more important to emulate the most common
101 * instructions to avoid the second trip in and out of the kernel.
102 *
103 *
104 * Making it Fast
105 *
106 * Since copying out an instruction is neither simple nor inexpensive for the
107 * CPU, we should attempt to avoid doing it in as many cases as possible.
108 * Since function entry and return are usually the most interesting probe
109 * sites, we attempt to tune the performance of the fasttrap provider around
110 * instructions typically in those places.
111 *
112 * Looking at a bunch of functions in libraries and executables reveals that
113 * most functions begin with either a save or a sethi (to setup a larger
114 * argument to the save) and end with a restore or an or (in the case of leaf
115 * functions). To try to improve performance, we emulate all of these
116 * instructions in the kernel.
117 *
118 * The save and restore instructions are a little tricky since they perform
119 * register window maniplulation. Rather than trying to tinker with the
120 * register windows from the kernel, we emulate the implicit add that takes
121 * place as part of those instructions and set the %pc to point to a simple
122 * save or restore we've hidden in the ulwp_t structure. If we're in a return
123 * probe so want to make it seem as though the tracepoint has been completely
124 * executed we need to remember that we've pulled this trick with restore and
125 * pull registers from the previous window (the one that we'll switch to once
126 * the simple store instruction is executed) rather than the current one. This
127 * is why in the case of emulating a restore we set the DTrace CPU flag
128 * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
129 * (see fasttrap_return_common()).
130 */
131
132 #define OP(x) ((x) >> 30)
133 #define OP2(x) (((x) >> 22) & 0x07)
134 #define OP3(x) (((x) >> 19) & 0x3f)
135 #define RCOND(x) (((x) >> 25) & 0x07)
136 #define COND(x) (((x) >> 25) & 0x0f)
137 #define A(x) (((x) >> 29) & 0x01)
138 #define I(x) (((x) >> 13) & 0x01)
139 #define RD(x) (((x) >> 25) & 0x1f)
140 #define RS1(x) (((x) >> 14) & 0x1f)
141 #define RS2(x) (((x) >> 0) & 0x1f)
142 #define CC(x) (((x) >> 20) & 0x03)
143 #define DISP16(x) ((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
144 #define DISP22(x) ((x) & 0x3fffff)
145 #define DISP19(x) ((x) & 0x7ffff)
146 #define DISP30(x) ((x) & 0x3fffffff)
147 #define SW_TRAP(x) ((x) & 0x7f)
148
149 #define OP3_OR 0x02
150 #define OP3_RD 0x28
151 #define OP3_JMPL 0x38
152 #define OP3_RETURN 0x39
153 #define OP3_TCC 0x3a
154 #define OP3_SAVE 0x3c
155 #define OP3_RESTORE 0x3d
156
157 #define OP3_PREFETCH 0x2d
158 #define OP3_CASA 0x3c
159 #define OP3_PREFETCHA 0x3d
160 #define OP3_CASXA 0x3e
161
162 #define OP2_ILLTRAP 0x0
163 #define OP2_BPcc 0x1
164 #define OP2_Bicc 0x2
165 #define OP2_BPr 0x3
166 #define OP2_SETHI 0x4
167 #define OP2_FBPfcc 0x5
168 #define OP2_FBfcc 0x6
169
170 #define R_G0 0
171 #define R_O0 8
172 #define R_SP 14
173 #define R_I0 24
174 #define R_I1 25
175 #define R_I2 26
176 #define R_I3 27
177 #define R_I4 28
178
179 /*
180 * Check the comment in fasttrap.h when changing these offsets or adding
181 * new instructions.
182 */
183 #define FASTTRAP_OFF_SAVE 64
184 #define FASTTRAP_OFF_RESTORE 68
185 #define FASTTRAP_OFF_FTRET 72
186 #define FASTTRAP_OFF_RETURN 76
187
188 #define BREAKPOINT_INSTR 0x91d02001 /* ta 1 */
189
190 /*
191 * Tunable to let users turn off the fancy save instruction optimization.
192 * If a program is non-ABI compliant, there's a possibility that the save
193 * instruction optimization could cause an error.
194 */
195 int fasttrap_optimize_save = 1;
196
197 static uint64_t
fasttrap_anarg(struct regs * rp,int argno)198 fasttrap_anarg(struct regs *rp, int argno)
199 {
200 uint64_t value;
201
202 if (argno < 6)
203 return ((&rp->r_o0)[argno]);
204
205 if (curproc->p_model == DATAMODEL_NATIVE) {
206 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
207
208 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
209 value = dtrace_fulword(&fr->fr_argd[argno]);
210 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
211 CPU_DTRACE_BADALIGN);
212 } else {
213 struct frame32 *fr = (struct frame32 *)rp->r_sp;
214
215 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
216 value = dtrace_fuword32(&fr->fr_argd[argno]);
217 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
218 CPU_DTRACE_BADALIGN);
219 }
220
221 return (value);
222 }
223
224 static ulong_t fasttrap_getreg(struct regs *, uint_t);
225 static void fasttrap_putreg(struct regs *, uint_t, ulong_t);
226
227 static void
fasttrap_usdt_args(fasttrap_probe_t * probe,struct regs * rp,uint_t fake_restore,int argc,uintptr_t * argv)228 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
229 uint_t fake_restore, int argc, uintptr_t *argv)
230 {
231 int i, x, cap = MIN(argc, probe->ftp_nargs);
232 int inc = (fake_restore ? 16 : 0);
233
234 /*
235 * The only way we'll hit the fake_restore case is if a USDT probe is
236 * invoked as a tail-call. While it wouldn't be incorrect, we can
237 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
238 * directly since a tail-call can't be made if the invoked function
239 * would use the argument dump space (i.e. if there were more than
240 * 6 arguments). We take this shortcut because unconditionally rooting
241 * around for R_FP (R_SP + 16) would be unnecessarily painful.
242 */
243
244 if (curproc->p_model == DATAMODEL_NATIVE) {
245 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
246 uintptr_t v;
247
248 for (i = 0; i < cap; i++) {
249 x = probe->ftp_argmap[i];
250
251 if (x < 6)
252 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
253 else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
254 argv[i] = 0;
255 }
256
257 } else {
258 struct frame32 *fr = (struct frame32 *)rp->r_sp;
259 uint32_t v;
260
261 for (i = 0; i < cap; i++) {
262 x = probe->ftp_argmap[i];
263
264 if (x < 6)
265 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
266 else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
267 argv[i] = 0;
268 }
269 }
270
271 for (; i < argc; i++) {
272 argv[i] = 0;
273 }
274 }
275
276 static void
fasttrap_return_common(struct regs * rp,uintptr_t pc,pid_t pid,uint_t fake_restore)277 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
278 uint_t fake_restore)
279 {
280 fasttrap_tracepoint_t *tp;
281 fasttrap_bucket_t *bucket;
282 fasttrap_id_t *id;
283 kmutex_t *pid_mtx;
284 dtrace_icookie_t cookie;
285
286 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
287 mutex_enter(pid_mtx);
288 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
289
290 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
291 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
292 tp->ftt_proc->ftpc_acount != 0)
293 break;
294 }
295
296 /*
297 * Don't sweat it if we can't find the tracepoint again; unlike
298 * when we're in fasttrap_pid_probe(), finding the tracepoint here
299 * is not essential to the correct execution of the process.
300 */
301 if (tp == NULL || tp->ftt_retids == NULL) {
302 mutex_exit(pid_mtx);
303 return;
304 }
305
306 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
307 fasttrap_probe_t *probe = id->fti_probe;
308
309 if (id->fti_ptype == DTFTP_POST_OFFSETS) {
310 if (probe->ftp_argmap != NULL && fake_restore) {
311 uintptr_t t[5];
312
313 fasttrap_usdt_args(probe, rp, fake_restore,
314 sizeof (t) / sizeof (t[0]), t);
315
316 cookie = dtrace_interrupt_disable();
317 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
318 dtrace_probe(probe->ftp_id, t[0], t[1],
319 t[2], t[3], t[4]);
320 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
321 dtrace_interrupt_enable(cookie);
322
323 } else if (probe->ftp_argmap != NULL) {
324 uintptr_t t[5];
325
326 fasttrap_usdt_args(probe, rp, fake_restore,
327 sizeof (t) / sizeof (t[0]), t);
328
329 dtrace_probe(probe->ftp_id, t[0], t[1],
330 t[2], t[3], t[4]);
331
332 } else if (fake_restore) {
333 uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
334 uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
335 uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
336 uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
337 uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
338
339 cookie = dtrace_interrupt_disable();
340 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
341 dtrace_probe(probe->ftp_id, arg0, arg1,
342 arg2, arg3, arg4);
343 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
344 dtrace_interrupt_enable(cookie);
345
346 } else {
347 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
348 rp->r_o2, rp->r_o3, rp->r_o4);
349 }
350
351 continue;
352 }
353
354 /*
355 * If this is only a possible return point, we must
356 * be looking at a potential tail call in leaf context.
357 * If the %npc is still within this function, then we
358 * must have misidentified a jmpl as a tail-call when it
359 * is, in fact, part of a jump table. It would be nice to
360 * remove this tracepoint, but this is neither the time
361 * nor the place.
362 */
363 if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
364 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
365 continue;
366
367 /*
368 * It's possible for a function to branch to the delay slot
369 * of an instruction that we've identified as a return site.
370 * We can dectect this spurious return probe activation by
371 * observing that in this case %npc will be %pc + 4 and %npc
372 * will be inside the current function (unless the user is
373 * doing _crazy_ instruction picking in which case there's
374 * very little we can do). The second check is important
375 * in case the last instructions of a function make a tail-
376 * call to the function located immediately subsequent.
377 */
378 if (rp->r_npc == rp->r_pc + 4 &&
379 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
380 continue;
381
382 /*
383 * The first argument is the offset of return tracepoint
384 * in the function; the remaining arguments are the return
385 * values.
386 *
387 * If fake_restore is set, we need to pull the return values
388 * out of the %i's rather than the %o's -- a little trickier.
389 */
390 if (!fake_restore) {
391 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
392 rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
393 } else {
394 uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
395 uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
396 uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
397 uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
398
399 cookie = dtrace_interrupt_disable();
400 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
401 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
402 arg0, arg1, arg2, arg3);
403 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
404 dtrace_interrupt_enable(cookie);
405 }
406 }
407
408 mutex_exit(pid_mtx);
409 }
410
411 int
fasttrap_pid_probe(struct regs * rp)412 fasttrap_pid_probe(struct regs *rp)
413 {
414 proc_t *p = curproc;
415 fasttrap_tracepoint_t *tp, tp_local;
416 fasttrap_id_t *id;
417 pid_t pid;
418 uintptr_t pc = rp->r_pc;
419 uintptr_t npc = rp->r_npc;
420 uintptr_t orig_pc = pc;
421 fasttrap_bucket_t *bucket;
422 kmutex_t *pid_mtx;
423 uint_t fake_restore = 0, is_enabled = 0;
424 dtrace_icookie_t cookie;
425
426 /*
427 * It's possible that a user (in a veritable orgy of bad planning)
428 * could redirect this thread's flow of control before it reached the
429 * return probe fasttrap. In this case we need to kill the process
430 * since it's in a unrecoverable state.
431 */
432 if (curthread->t_dtrace_step) {
433 ASSERT(curthread->t_dtrace_on);
434 fasttrap_sigtrap(p, curthread, pc);
435 return (0);
436 }
437
438 /*
439 * Clear all user tracing flags.
440 */
441 curthread->t_dtrace_ft = 0;
442 curthread->t_dtrace_pc = 0;
443 curthread->t_dtrace_npc = 0;
444 curthread->t_dtrace_scrpc = 0;
445 curthread->t_dtrace_astpc = 0;
446
447 /*
448 * Treat a child created by a call to vfork(2) as if it were its
449 * parent. We know that there's only one thread of control in such a
450 * process: this one.
451 */
452 while (p->p_flag & SVFORK) {
453 p = p->p_parent;
454 }
455
456 pid = p->p_pid;
457 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
458 mutex_enter(pid_mtx);
459 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
460
461 /*
462 * Lookup the tracepoint that the process just hit.
463 */
464 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
465 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
466 tp->ftt_proc->ftpc_acount != 0)
467 break;
468 }
469
470 /*
471 * If we couldn't find a matching tracepoint, either a tracepoint has
472 * been inserted without using the pid<pid> ioctl interface (see
473 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
474 */
475 if (tp == NULL) {
476 mutex_exit(pid_mtx);
477 return (-1);
478 }
479
480 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
481 fasttrap_probe_t *probe = id->fti_probe;
482 int isentry = (id->fti_ptype == DTFTP_ENTRY);
483
484 if (id->fti_ptype == DTFTP_IS_ENABLED) {
485 is_enabled = 1;
486 continue;
487 }
488
489 /*
490 * We note that this was an entry probe to help ustack() find
491 * the first caller.
492 */
493 if (isentry) {
494 cookie = dtrace_interrupt_disable();
495 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
496 }
497 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
498 rp->r_o3, rp->r_o4);
499 if (isentry) {
500 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
501 dtrace_interrupt_enable(cookie);
502 }
503 }
504
505 /*
506 * We're about to do a bunch of work so we cache a local copy of
507 * the tracepoint to emulate the instruction, and then find the
508 * tracepoint again later if we need to light up any return probes.
509 */
510 tp_local = *tp;
511 mutex_exit(pid_mtx);
512 tp = &tp_local;
513
514 /*
515 * If there's an is-enabled probe conntected to this tracepoint it
516 * means that there was a 'mov %g0, %o0' instruction that was placed
517 * there by DTrace when the binary was linked. As this probe is, in
518 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
519 * bypass all the instruction emulation logic since we know the
520 * inevitable result. It's possible that a user could construct a
521 * scenario where the 'is-enabled' probe was on some other
522 * instruction, but that would be a rather exotic way to shoot oneself
523 * in the foot.
524 */
525 if (is_enabled) {
526 rp->r_o0 = 1;
527 pc = rp->r_npc;
528 npc = pc + 4;
529 goto done;
530 }
531
532 /*
533 * We emulate certain types of instructions to ensure correctness
534 * (in the case of position dependent instructions) or optimize
535 * common cases. The rest we have the thread execute back in user-
536 * land.
537 */
538 switch (tp->ftt_type) {
539 case FASTTRAP_T_SAVE:
540 {
541 int32_t imm;
542
543 /*
544 * This an optimization to let us handle function entry
545 * probes more efficiently. Many functions begin with a save
546 * instruction that follows the pattern:
547 * save %sp, <imm>, %sp
548 *
549 * Meanwhile, we've stashed the instruction:
550 * save %g1, %g0, %sp
551 *
552 * off of %g7, so all we have to do is stick the right value
553 * into %g1 and reset %pc to point to the instruction we've
554 * cleverly hidden (%npc should not be touched).
555 */
556
557 imm = tp->ftt_instr << 19;
558 imm >>= 19;
559 rp->r_g1 = rp->r_sp + imm;
560 pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
561 break;
562 }
563
564 case FASTTRAP_T_RESTORE:
565 {
566 ulong_t value;
567 uint_t rd;
568
569 /*
570 * This is an optimization to let us handle function
571 * return probes more efficiently. Most non-leaf functions
572 * end with the sequence:
573 * ret
574 * restore <reg>, <reg_or_imm>, %oX
575 *
576 * We've stashed the instruction:
577 * restore %g0, %g0, %g0
578 *
579 * off of %g7 so we just need to place the correct value
580 * in the right %i register (since after our fake-o
581 * restore, the %i's will become the %o's) and set the %pc
582 * to point to our hidden restore. We also set fake_restore to
583 * let fasttrap_return_common() know that it will find the
584 * return values in the %i's rather than the %o's.
585 */
586
587 if (I(tp->ftt_instr)) {
588 int32_t imm;
589
590 imm = tp->ftt_instr << 19;
591 imm >>= 19;
592 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
593 } else {
594 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
595 fasttrap_getreg(rp, RS2(tp->ftt_instr));
596 }
597
598 /*
599 * Convert %o's to %i's; leave %g's as they are.
600 */
601 rd = RD(tp->ftt_instr);
602 fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
603
604 pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
605 fake_restore = 1;
606 break;
607 }
608
609 case FASTTRAP_T_RETURN:
610 {
611 uintptr_t target;
612
613 /*
614 * A return instruction is like a jmpl (without the link
615 * part) that executes an implicit restore. We've stashed
616 * the instruction:
617 * return %o0
618 *
619 * off of %g7 so we just need to place the target in %o0
620 * and set the %pc to point to the stashed return instruction.
621 * We use %o0 since that register disappears after the return
622 * executes, erasing any evidence of this tampering.
623 */
624 if (I(tp->ftt_instr)) {
625 int32_t imm;
626
627 imm = tp->ftt_instr << 19;
628 imm >>= 19;
629 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
630 } else {
631 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
632 fasttrap_getreg(rp, RS2(tp->ftt_instr));
633 }
634
635 fasttrap_putreg(rp, R_O0, target);
636
637 pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
638 fake_restore = 1;
639 break;
640 }
641
642 case FASTTRAP_T_OR:
643 {
644 ulong_t value;
645
646 if (I(tp->ftt_instr)) {
647 int32_t imm;
648
649 imm = tp->ftt_instr << 19;
650 imm >>= 19;
651 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
652 } else {
653 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
654 fasttrap_getreg(rp, RS2(tp->ftt_instr));
655 }
656
657 fasttrap_putreg(rp, RD(tp->ftt_instr), value);
658 pc = rp->r_npc;
659 npc = pc + 4;
660 break;
661 }
662
663 case FASTTRAP_T_SETHI:
664 if (RD(tp->ftt_instr) != R_G0) {
665 uint32_t imm32 = tp->ftt_instr << 10;
666 fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
667 }
668 pc = rp->r_npc;
669 npc = pc + 4;
670 break;
671
672 case FASTTRAP_T_CCR:
673 {
674 uint_t c, v, z, n, taken;
675 uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
676
677 if (tp->ftt_cc != 0)
678 ccr >>= 4;
679
680 c = (ccr >> 0) & 1;
681 v = (ccr >> 1) & 1;
682 z = (ccr >> 2) & 1;
683 n = (ccr >> 3) & 1;
684
685 switch (tp->ftt_code) {
686 case 0x0: /* BN */
687 taken = 0; break;
688 case 0x1: /* BE */
689 taken = z; break;
690 case 0x2: /* BLE */
691 taken = z | (n ^ v); break;
692 case 0x3: /* BL */
693 taken = n ^ v; break;
694 case 0x4: /* BLEU */
695 taken = c | z; break;
696 case 0x5: /* BCS (BLU) */
697 taken = c; break;
698 case 0x6: /* BNEG */
699 taken = n; break;
700 case 0x7: /* BVS */
701 taken = v; break;
702 case 0x8: /* BA */
703 /*
704 * We handle the BA case differently since the annul
705 * bit means something slightly different.
706 */
707 panic("fasttrap: mishandled a branch");
708 taken = 1; break;
709 case 0x9: /* BNE */
710 taken = ~z; break;
711 case 0xa: /* BG */
712 taken = ~(z | (n ^ v)); break;
713 case 0xb: /* BGE */
714 taken = ~(n ^ v); break;
715 case 0xc: /* BGU */
716 taken = ~(c | z); break;
717 case 0xd: /* BCC (BGEU) */
718 taken = ~c; break;
719 case 0xe: /* BPOS */
720 taken = ~n; break;
721 case 0xf: /* BVC */
722 taken = ~v; break;
723 }
724
725 if (taken & 1) {
726 pc = rp->r_npc;
727 npc = tp->ftt_dest;
728 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
729 /*
730 * Untaken annulled branches don't execute the
731 * instruction in the delay slot.
732 */
733 pc = rp->r_npc + 4;
734 npc = pc + 4;
735 } else {
736 pc = rp->r_npc;
737 npc = pc + 4;
738 }
739 break;
740 }
741
742 case FASTTRAP_T_FCC:
743 {
744 uint_t fcc;
745 uint_t taken;
746 uint64_t fsr;
747
748 dtrace_getfsr(&fsr);
749
750 if (tp->ftt_cc == 0) {
751 fcc = (fsr >> 10) & 0x3;
752 } else {
753 uint_t shift;
754 ASSERT(tp->ftt_cc <= 3);
755 shift = 30 + tp->ftt_cc * 2;
756 fcc = (fsr >> shift) & 0x3;
757 }
758
759 switch (tp->ftt_code) {
760 case 0x0: /* FBN */
761 taken = (1 << fcc) & (0|0|0|0); break;
762 case 0x1: /* FBNE */
763 taken = (1 << fcc) & (8|4|2|0); break;
764 case 0x2: /* FBLG */
765 taken = (1 << fcc) & (0|4|2|0); break;
766 case 0x3: /* FBUL */
767 taken = (1 << fcc) & (8|0|2|0); break;
768 case 0x4: /* FBL */
769 taken = (1 << fcc) & (0|0|2|0); break;
770 case 0x5: /* FBUG */
771 taken = (1 << fcc) & (8|4|0|0); break;
772 case 0x6: /* FBG */
773 taken = (1 << fcc) & (0|4|0|0); break;
774 case 0x7: /* FBU */
775 taken = (1 << fcc) & (8|0|0|0); break;
776 case 0x8: /* FBA */
777 /*
778 * We handle the FBA case differently since the annul
779 * bit means something slightly different.
780 */
781 panic("fasttrap: mishandled a branch");
782 taken = (1 << fcc) & (8|4|2|1); break;
783 case 0x9: /* FBE */
784 taken = (1 << fcc) & (0|0|0|1); break;
785 case 0xa: /* FBUE */
786 taken = (1 << fcc) & (8|0|0|1); break;
787 case 0xb: /* FBGE */
788 taken = (1 << fcc) & (0|4|0|1); break;
789 case 0xc: /* FBUGE */
790 taken = (1 << fcc) & (8|4|0|1); break;
791 case 0xd: /* FBLE */
792 taken = (1 << fcc) & (0|0|2|1); break;
793 case 0xe: /* FBULE */
794 taken = (1 << fcc) & (8|0|2|1); break;
795 case 0xf: /* FBO */
796 taken = (1 << fcc) & (0|4|2|1); break;
797 }
798
799 if (taken) {
800 pc = rp->r_npc;
801 npc = tp->ftt_dest;
802 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
803 /*
804 * Untaken annulled branches don't execute the
805 * instruction in the delay slot.
806 */
807 pc = rp->r_npc + 4;
808 npc = pc + 4;
809 } else {
810 pc = rp->r_npc;
811 npc = pc + 4;
812 }
813 break;
814 }
815
816 case FASTTRAP_T_REG:
817 {
818 int64_t value;
819 uint_t taken;
820 uint_t reg = RS1(tp->ftt_instr);
821
822 /*
823 * An ILP32 process shouldn't be using a branch predicated on
824 * an %i or an %l since it would violate the ABI. It's a
825 * violation of the ABI because we can't ensure deterministic
826 * behavior. We should have identified this case when we
827 * enabled the probe.
828 */
829 ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
830
831 value = (int64_t)fasttrap_getreg(rp, reg);
832
833 switch (tp->ftt_code) {
834 case 0x1: /* BRZ */
835 taken = (value == 0); break;
836 case 0x2: /* BRLEZ */
837 taken = (value <= 0); break;
838 case 0x3: /* BRLZ */
839 taken = (value < 0); break;
840 case 0x5: /* BRNZ */
841 taken = (value != 0); break;
842 case 0x6: /* BRGZ */
843 taken = (value > 0); break;
844 case 0x7: /* BRGEZ */
845 taken = (value >= 0); break;
846 default:
847 case 0x0:
848 case 0x4:
849 panic("fasttrap: mishandled a branch");
850 }
851
852 if (taken) {
853 pc = rp->r_npc;
854 npc = tp->ftt_dest;
855 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
856 /*
857 * Untaken annulled branches don't execute the
858 * instruction in the delay slot.
859 */
860 pc = rp->r_npc + 4;
861 npc = pc + 4;
862 } else {
863 pc = rp->r_npc;
864 npc = pc + 4;
865 }
866 break;
867 }
868
869 case FASTTRAP_T_ALWAYS:
870 /*
871 * BAs, BA,As...
872 */
873
874 if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
875 /*
876 * Annulled branch always instructions never execute
877 * the instruction in the delay slot.
878 */
879 pc = tp->ftt_dest;
880 npc = tp->ftt_dest + 4;
881 } else {
882 pc = rp->r_npc;
883 npc = tp->ftt_dest;
884 }
885 break;
886
887 case FASTTRAP_T_RDPC:
888 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
889 pc = rp->r_npc;
890 npc = pc + 4;
891 break;
892
893 case FASTTRAP_T_CALL:
894 /*
895 * It's a call _and_ link remember...
896 */
897 rp->r_o7 = rp->r_pc;
898 pc = rp->r_npc;
899 npc = tp->ftt_dest;
900 break;
901
902 case FASTTRAP_T_JMPL:
903 pc = rp->r_npc;
904
905 if (I(tp->ftt_instr)) {
906 uint_t rs1 = RS1(tp->ftt_instr);
907 int32_t imm;
908
909 imm = tp->ftt_instr << 19;
910 imm >>= 19;
911 npc = fasttrap_getreg(rp, rs1) + imm;
912 } else {
913 uint_t rs1 = RS1(tp->ftt_instr);
914 uint_t rs2 = RS2(tp->ftt_instr);
915
916 npc = fasttrap_getreg(rp, rs1) +
917 fasttrap_getreg(rp, rs2);
918 }
919
920 /*
921 * Do the link part of the jump-and-link instruction.
922 */
923 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
924
925 break;
926
927 case FASTTRAP_T_COMMON:
928 {
929 curthread->t_dtrace_scrpc = rp->r_g7;
930 curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
931
932 /*
933 * Copy the instruction to a reserved location in the
934 * user-land thread structure, then set the PC to that
935 * location and leave the NPC alone. We take pains to ensure
936 * consistency in the instruction stream (See SPARC
937 * Architecture Manual Version 9, sections 8.4.7, A.20, and
938 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
939 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
940 * instruction into the user's address space without
941 * bypassing the I$. There's no AS_USER version of this ASI
942 * (as exist for other ASIs) so we use the lofault
943 * mechanism to catch faults.
944 */
945 if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
946 /*
947 * If the copyout fails, then the process's state
948 * is not consistent (the effects of the traced
949 * instruction will never be seen). This process
950 * cannot be allowed to continue execution.
951 */
952 fasttrap_sigtrap(curproc, curthread, pc);
953 return (0);
954 }
955
956 curthread->t_dtrace_pc = pc;
957 curthread->t_dtrace_npc = npc;
958 curthread->t_dtrace_on = 1;
959
960 pc = curthread->t_dtrace_scrpc;
961
962 if (tp->ftt_retids != NULL) {
963 curthread->t_dtrace_step = 1;
964 curthread->t_dtrace_ret = 1;
965 npc = curthread->t_dtrace_astpc;
966 }
967 break;
968 }
969
970 default:
971 panic("fasttrap: mishandled an instruction");
972 }
973
974 /*
975 * This bit me in the ass a couple of times, so lets toss this
976 * in as a cursory sanity check.
977 */
978 ASSERT(pc != rp->r_g7 + 4);
979 ASSERT(pc != rp->r_g7 + 8);
980
981 done:
982 /*
983 * If there were no return probes when we first found the tracepoint,
984 * we should feel no obligation to honor any return probes that were
985 * subsequently enabled -- they'll just have to wait until the next
986 * time around.
987 */
988 if (tp->ftt_retids != NULL) {
989 /*
990 * We need to wait until the results of the instruction are
991 * apparent before invoking any return probes. If this
992 * instruction was emulated we can just call
993 * fasttrap_return_common(); if it needs to be executed, we
994 * need to wait until we return to the kernel.
995 */
996 if (tp->ftt_type != FASTTRAP_T_COMMON) {
997 fasttrap_return_common(rp, orig_pc, pid, fake_restore);
998 } else {
999 ASSERT(curthread->t_dtrace_ret != 0);
1000 ASSERT(curthread->t_dtrace_pc == orig_pc);
1001 ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1002 ASSERT(npc == curthread->t_dtrace_astpc);
1003 }
1004 }
1005
1006 ASSERT(pc != 0);
1007 rp->r_pc = pc;
1008 rp->r_npc = npc;
1009
1010 return (0);
1011 }
1012
1013 int
fasttrap_return_probe(struct regs * rp)1014 fasttrap_return_probe(struct regs *rp)
1015 {
1016 proc_t *p = ttoproc(curthread);
1017 pid_t pid;
1018 uintptr_t pc = curthread->t_dtrace_pc;
1019 uintptr_t npc = curthread->t_dtrace_npc;
1020
1021 curthread->t_dtrace_pc = 0;
1022 curthread->t_dtrace_npc = 0;
1023 curthread->t_dtrace_scrpc = 0;
1024 curthread->t_dtrace_astpc = 0;
1025
1026 /*
1027 * Treat a child created by a call to vfork(2) as if it were its
1028 * parent. We know there's only one thread of control in such a
1029 * process: this one.
1030 */
1031 while (p->p_flag & SVFORK) {
1032 p = p->p_parent;
1033 }
1034
1035 /*
1036 * We set the %pc and %npc to their values when the traced
1037 * instruction was initially executed so that it appears to
1038 * dtrace_probe() that we're on the original instruction, and so that
1039 * the user can't easily detect our complex web of lies.
1040 * dtrace_return_probe() (our caller) will correctly set %pc and %npc
1041 * after we return.
1042 */
1043 rp->r_pc = pc;
1044 rp->r_npc = npc;
1045
1046 pid = p->p_pid;
1047 fasttrap_return_common(rp, pc, pid, 0);
1048
1049 return (0);
1050 }
1051
1052 int
fasttrap_tracepoint_install(proc_t * p,fasttrap_tracepoint_t * tp)1053 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1054 {
1055 fasttrap_instr_t instr = FASTTRAP_INSTR;
1056
1057 if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
1058 return (-1);
1059
1060 return (0);
1061 }
1062
1063 int
fasttrap_tracepoint_remove(proc_t * p,fasttrap_tracepoint_t * tp)1064 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1065 {
1066 fasttrap_instr_t instr;
1067
1068 /*
1069 * Distinguish between read or write failures and a changed
1070 * instruction.
1071 */
1072 if (uread(p, &instr, 4, tp->ftt_pc) != 0)
1073 return (0);
1074 if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
1075 return (0);
1076 if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
1077 return (-1);
1078
1079 return (0);
1080 }
1081
1082 int
fasttrap_tracepoint_init(proc_t * p,fasttrap_tracepoint_t * tp,uintptr_t pc,fasttrap_probe_type_t type)1083 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
1084 fasttrap_probe_type_t type)
1085 {
1086 uint32_t instr;
1087 int32_t disp;
1088
1089 /*
1090 * Read the instruction at the given address out of the process's
1091 * address space. We don't have to worry about a debugger
1092 * changing this instruction before we overwrite it with our trap
1093 * instruction since P_PR_LOCK is set.
1094 */
1095 if (uread(p, &instr, 4, pc) != 0)
1096 return (-1);
1097
1098 /*
1099 * Decode the instruction to fill in the probe flags. We can have
1100 * the process execute most instructions on its own using a pc/npc
1101 * trick, but pc-relative control transfer present a problem since
1102 * we're relocating the instruction. We emulate these instructions
1103 * in the kernel. We assume a default type and over-write that as
1104 * needed.
1105 *
1106 * pc-relative instructions must be emulated for correctness;
1107 * other instructions (which represent a large set of commonly traced
1108 * instructions) are emulated or otherwise optimized for performance.
1109 */
1110 tp->ftt_type = FASTTRAP_T_COMMON;
1111 if (OP(instr) == 1) {
1112 /*
1113 * Call instructions.
1114 */
1115 tp->ftt_type = FASTTRAP_T_CALL;
1116 disp = DISP30(instr) << 2;
1117 tp->ftt_dest = pc + (intptr_t)disp;
1118
1119 } else if (OP(instr) == 0) {
1120 /*
1121 * Branch instructions.
1122 *
1123 * Unconditional branches need careful attention when they're
1124 * annulled: annulled unconditional branches never execute
1125 * the instruction in the delay slot.
1126 */
1127 switch (OP2(instr)) {
1128 case OP2_ILLTRAP:
1129 case 0x7:
1130 /*
1131 * The compiler may place an illtrap after a call to
1132 * a function that returns a structure. In the case of
1133 * a returned structure, the compiler places an illtrap
1134 * whose const22 field is the size of the returned
1135 * structure immediately following the delay slot of
1136 * the call. To stay out of the way, we refuse to
1137 * place tracepoints on top of illtrap instructions.
1138 *
1139 * This is one of the dumbest architectural decisions
1140 * I've ever had to work around.
1141 *
1142 * We also identify the only illegal op2 value (See
1143 * SPARC Architecture Manual Version 9, E.2 table 31).
1144 */
1145 return (-1);
1146
1147 case OP2_BPcc:
1148 if (COND(instr) == 8) {
1149 tp->ftt_type = FASTTRAP_T_ALWAYS;
1150 } else {
1151 /*
1152 * Check for an illegal instruction.
1153 */
1154 if (CC(instr) & 1)
1155 return (-1);
1156 tp->ftt_type = FASTTRAP_T_CCR;
1157 tp->ftt_cc = CC(instr);
1158 tp->ftt_code = COND(instr);
1159 }
1160
1161 if (A(instr) != 0)
1162 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1163
1164 disp = DISP19(instr);
1165 disp <<= 13;
1166 disp >>= 11;
1167 tp->ftt_dest = pc + (intptr_t)disp;
1168 break;
1169
1170 case OP2_Bicc:
1171 if (COND(instr) == 8) {
1172 tp->ftt_type = FASTTRAP_T_ALWAYS;
1173 } else {
1174 tp->ftt_type = FASTTRAP_T_CCR;
1175 tp->ftt_cc = 0;
1176 tp->ftt_code = COND(instr);
1177 }
1178
1179 if (A(instr) != 0)
1180 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1181
1182 disp = DISP22(instr);
1183 disp <<= 10;
1184 disp >>= 8;
1185 tp->ftt_dest = pc + (intptr_t)disp;
1186 break;
1187
1188 case OP2_BPr:
1189 /*
1190 * Check for an illegal instruction.
1191 */
1192 if ((RCOND(instr) & 3) == 0)
1193 return (-1);
1194
1195 /*
1196 * It's a violation of the v8plus ABI to use a
1197 * register-predicated branch in a 32-bit app if
1198 * the register used is an %l or an %i (%gs and %os
1199 * are legit because they're not saved to the stack
1200 * in 32-bit words when we take a trap).
1201 */
1202 if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
1203 return (-1);
1204
1205 tp->ftt_type = FASTTRAP_T_REG;
1206 if (A(instr) != 0)
1207 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1208 disp = DISP16(instr);
1209 disp <<= 16;
1210 disp >>= 14;
1211 tp->ftt_dest = pc + (intptr_t)disp;
1212 tp->ftt_code = RCOND(instr);
1213 break;
1214
1215 case OP2_SETHI:
1216 tp->ftt_type = FASTTRAP_T_SETHI;
1217 break;
1218
1219 case OP2_FBPfcc:
1220 if (COND(instr) == 8) {
1221 tp->ftt_type = FASTTRAP_T_ALWAYS;
1222 } else {
1223 tp->ftt_type = FASTTRAP_T_FCC;
1224 tp->ftt_cc = CC(instr);
1225 tp->ftt_code = COND(instr);
1226 }
1227
1228 if (A(instr) != 0)
1229 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1230
1231 disp = DISP19(instr);
1232 disp <<= 13;
1233 disp >>= 11;
1234 tp->ftt_dest = pc + (intptr_t)disp;
1235 break;
1236
1237 case OP2_FBfcc:
1238 if (COND(instr) == 8) {
1239 tp->ftt_type = FASTTRAP_T_ALWAYS;
1240 } else {
1241 tp->ftt_type = FASTTRAP_T_FCC;
1242 tp->ftt_cc = 0;
1243 tp->ftt_code = COND(instr);
1244 }
1245
1246 if (A(instr) != 0)
1247 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1248
1249 disp = DISP22(instr);
1250 disp <<= 10;
1251 disp >>= 8;
1252 tp->ftt_dest = pc + (intptr_t)disp;
1253 break;
1254 }
1255
1256 } else if (OP(instr) == 2) {
1257 switch (OP3(instr)) {
1258 case OP3_RETURN:
1259 tp->ftt_type = FASTTRAP_T_RETURN;
1260 break;
1261
1262 case OP3_JMPL:
1263 tp->ftt_type = FASTTRAP_T_JMPL;
1264 break;
1265
1266 case OP3_RD:
1267 if (RS1(instr) == 5)
1268 tp->ftt_type = FASTTRAP_T_RDPC;
1269 break;
1270
1271 case OP3_SAVE:
1272 /*
1273 * We optimize for save instructions at function
1274 * entry; see the comment in fasttrap_pid_probe()
1275 * (near FASTTRAP_T_SAVE) for details.
1276 */
1277 if (fasttrap_optimize_save != 0 &&
1278 type == DTFTP_ENTRY &&
1279 I(instr) == 1 && RD(instr) == R_SP)
1280 tp->ftt_type = FASTTRAP_T_SAVE;
1281 break;
1282
1283 case OP3_RESTORE:
1284 /*
1285 * We optimize restore instructions at function
1286 * return; see the comment in fasttrap_pid_probe()
1287 * (near FASTTRAP_T_RESTORE) for details.
1288 *
1289 * rd must be an %o or %g register.
1290 */
1291 if ((RD(instr) & 0x10) == 0)
1292 tp->ftt_type = FASTTRAP_T_RESTORE;
1293 break;
1294
1295 case OP3_OR:
1296 /*
1297 * A large proportion of instructions in the delay
1298 * slot of retl instructions are or's so we emulate
1299 * these downstairs as an optimization.
1300 */
1301 tp->ftt_type = FASTTRAP_T_OR;
1302 break;
1303
1304 case OP3_TCC:
1305 /*
1306 * Breakpoint instructions are effectively position-
1307 * dependent since the debugger uses the %pc value
1308 * to lookup which breakpoint was executed. As a
1309 * result, we can't actually instrument breakpoints.
1310 */
1311 if (SW_TRAP(instr) == ST_BREAKPOINT)
1312 return (-1);
1313 break;
1314
1315 case 0x19:
1316 case 0x1d:
1317 case 0x29:
1318 case 0x33:
1319 case 0x3f:
1320 /*
1321 * Identify illegal instructions (See SPARC
1322 * Architecture Manual Version 9, E.2 table 32).
1323 */
1324 return (-1);
1325 }
1326 } else if (OP(instr) == 3) {
1327 uint32_t op3 = OP3(instr);
1328
1329 /*
1330 * Identify illegal instructions (See SPARC Architecture
1331 * Manual Version 9, E.2 table 33).
1332 */
1333 if ((op3 & 0x28) == 0x28) {
1334 if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1335 op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1336 return (-1);
1337 } else {
1338 if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1339 return (-1);
1340 }
1341 }
1342
1343 tp->ftt_instr = instr;
1344
1345 /*
1346 * We don't know how this tracepoint is going to be used, but in case
1347 * it's used as part of a function return probe, we need to indicate
1348 * whether it's always a return site or only potentially a return
1349 * site. If it's part of a return probe, it's always going to be a
1350 * return from that function if it's a restore instruction or if
1351 * the previous instruction was a return. If we could reliably
1352 * distinguish jump tables from return sites, this wouldn't be
1353 * necessary.
1354 */
1355 if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1356 (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1357 !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1358 tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1359
1360 return (0);
1361 }
1362
1363 /*ARGSUSED*/
1364 uint64_t
fasttrap_pid_getarg(void * arg,dtrace_id_t id,void * parg,int argno,int aframes)1365 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1366 int aframes)
1367 {
1368 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1369 }
1370
1371 /*ARGSUSED*/
1372 uint64_t
fasttrap_usdt_getarg(void * arg,dtrace_id_t id,void * parg,int argno,int aframes)1373 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1374 int aframes)
1375 {
1376 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1377 }
1378
1379 static uint64_t fasttrap_getreg_fast_cnt;
1380 static uint64_t fasttrap_getreg_mpcb_cnt;
1381 static uint64_t fasttrap_getreg_slow_cnt;
1382
1383 static ulong_t
fasttrap_getreg(struct regs * rp,uint_t reg)1384 fasttrap_getreg(struct regs *rp, uint_t reg)
1385 {
1386 ulong_t value;
1387 dtrace_icookie_t cookie;
1388 struct machpcb *mpcb;
1389 extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1390
1391 /*
1392 * We have the %os and %gs in our struct regs, but if we need to
1393 * snag a %l or %i we need to go scrounging around in the process's
1394 * address space.
1395 */
1396 if (reg == 0)
1397 return (0);
1398
1399 if (reg < 16)
1400 return ((&rp->r_g1)[reg - 1]);
1401
1402 /*
1403 * Before we look at the user's stack, we'll check the register
1404 * windows to see if the information we want is in there.
1405 */
1406 cookie = dtrace_interrupt_disable();
1407 if (dtrace_getotherwin() > 0) {
1408 value = dtrace_getreg_win(reg, 1);
1409 dtrace_interrupt_enable(cookie);
1410
1411 atomic_inc_64(&fasttrap_getreg_fast_cnt);
1412
1413 return (value);
1414 }
1415 dtrace_interrupt_enable(cookie);
1416
1417 /*
1418 * First check the machpcb structure to see if we've already read
1419 * in the register window we're looking for; if we haven't, (and
1420 * we probably haven't) try to copy in the value of the register.
1421 */
1422 /* LINTED - alignment */
1423 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1424
1425 if (get_udatamodel() == DATAMODEL_NATIVE) {
1426 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1427
1428 if (mpcb->mpcb_wbcnt > 0) {
1429 struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1430 int i = mpcb->mpcb_wbcnt;
1431 do {
1432 i--;
1433 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1434 continue;
1435
1436 atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1437 return (rwin[i].rw_local[reg - 16]);
1438 } while (i > 0);
1439 }
1440
1441 if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1442 goto err;
1443 } else {
1444 struct frame32 *fr =
1445 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1446 uint32_t *v32 = (uint32_t *)&value;
1447
1448 if (mpcb->mpcb_wbcnt > 0) {
1449 struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1450 int i = mpcb->mpcb_wbcnt;
1451 do {
1452 i--;
1453 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1454 continue;
1455
1456 atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1457 return (rwin[i].rw_local[reg - 16]);
1458 } while (i > 0);
1459 }
1460
1461 if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1462 goto err;
1463
1464 v32[0] = 0;
1465 }
1466
1467 atomic_inc_64(&fasttrap_getreg_slow_cnt);
1468 return (value);
1469
1470 err:
1471 /*
1472 * If the copy in failed, the process will be in a irrecoverable
1473 * state, and we have no choice but to kill it.
1474 */
1475 psignal(ttoproc(curthread), SIGILL);
1476 return (0);
1477 }
1478
1479 static uint64_t fasttrap_putreg_fast_cnt;
1480 static uint64_t fasttrap_putreg_mpcb_cnt;
1481 static uint64_t fasttrap_putreg_slow_cnt;
1482
1483 static void
fasttrap_putreg(struct regs * rp,uint_t reg,ulong_t value)1484 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1485 {
1486 dtrace_icookie_t cookie;
1487 struct machpcb *mpcb;
1488 extern void dtrace_putreg_win(uint_t, ulong_t);
1489
1490 if (reg == 0)
1491 return;
1492
1493 if (reg < 16) {
1494 (&rp->r_g1)[reg - 1] = value;
1495 return;
1496 }
1497
1498 /*
1499 * If the user process is still using some register windows, we
1500 * can just place the value in the correct window.
1501 */
1502 cookie = dtrace_interrupt_disable();
1503 if (dtrace_getotherwin() > 0) {
1504 dtrace_putreg_win(reg, value);
1505 dtrace_interrupt_enable(cookie);
1506 atomic_inc_64(&fasttrap_putreg_fast_cnt);
1507 return;
1508 }
1509 dtrace_interrupt_enable(cookie);
1510
1511 /*
1512 * First see if there's a copy of the register window in the
1513 * machpcb structure that we can modify; if there isn't try to
1514 * copy out the value. If that fails, we try to create a new
1515 * register window in the machpcb structure. While this isn't
1516 * _precisely_ the intended use of the machpcb structure, it
1517 * can't cause any problems since we know at this point in the
1518 * code that all of the user's data have been flushed out of the
1519 * register file (since %otherwin is 0).
1520 */
1521 /* LINTED - alignment */
1522 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1523
1524 if (get_udatamodel() == DATAMODEL_NATIVE) {
1525 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1526 /* LINTED - alignment */
1527 struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1528
1529 if (mpcb->mpcb_wbcnt > 0) {
1530 int i = mpcb->mpcb_wbcnt;
1531 do {
1532 i--;
1533 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1534 continue;
1535
1536 rwin[i].rw_local[reg - 16] = value;
1537 atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1538 return;
1539 } while (i > 0);
1540 }
1541
1542 if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1543 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1544 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1545 goto err;
1546
1547 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1548 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1549 mpcb->mpcb_wbcnt++;
1550 atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1551 return;
1552 }
1553 } else {
1554 struct frame32 *fr =
1555 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1556 /* LINTED - alignment */
1557 struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1558 uint32_t v32 = (uint32_t)value;
1559
1560 if (mpcb->mpcb_wbcnt > 0) {
1561 int i = mpcb->mpcb_wbcnt;
1562 do {
1563 i--;
1564 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1565 continue;
1566
1567 rwin[i].rw_local[reg - 16] = v32;
1568 atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1569 return;
1570 } while (i > 0);
1571 }
1572
1573 if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1574 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1575 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1576 goto err;
1577
1578 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1579 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1580 mpcb->mpcb_wbcnt++;
1581 atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1582 return;
1583 }
1584 }
1585
1586 atomic_inc_64(&fasttrap_putreg_slow_cnt);
1587 return;
1588
1589 err:
1590 /*
1591 * If we couldn't record this register's value, the process is in an
1592 * irrecoverable state and we have no choice but to euthanize it.
1593 */
1594 psignal(ttoproc(curthread), SIGILL);
1595 }
1596