xref: /titanic_41/usr/src/uts/intel/fs/proc/prmachdep.c (revision 1557e65f9d0c6fde875d807c12fc03ea20f50280)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/types.h>
33 #include <sys/t_lock.h>
34 #include <sys/param.h>
35 #include <sys/cred.h>
36 #include <sys/debug.h>
37 #include <sys/inline.h>
38 #include <sys/kmem.h>
39 #include <sys/proc.h>
40 #include <sys/regset.h>
41 #include <sys/privregs.h>
42 #include <sys/sysmacros.h>
43 #include <sys/systm.h>
44 #include <sys/vfs.h>
45 #include <sys/vnode.h>
46 #include <sys/psw.h>
47 #include <sys/pcb.h>
48 #include <sys/buf.h>
49 #include <sys/signal.h>
50 #include <sys/user.h>
51 #include <sys/cpuvar.h>
52 
53 #include <sys/fault.h>
54 #include <sys/syscall.h>
55 #include <sys/procfs.h>
56 #include <sys/cmn_err.h>
57 #include <sys/stack.h>
58 #include <sys/debugreg.h>
59 #include <sys/copyops.h>
60 
61 #include <sys/vmem.h>
62 #include <sys/mman.h>
63 #include <sys/vmparam.h>
64 #include <sys/fp.h>
65 #include <sys/archsystm.h>
66 #include <sys/vmsystm.h>
67 #include <vm/hat.h>
68 #include <vm/as.h>
69 #include <vm/seg.h>
70 #include <vm/seg_kmem.h>
71 #include <vm/seg_kp.h>
72 #include <vm/page.h>
73 
74 #include <sys/sysi86.h>
75 
76 #include <fs/proc/prdata.h>
77 
78 int	prnwatch = 10000;	/* maximum number of watched areas */
79 
80 /*
81  * Force a thread into the kernel if it is not already there.
82  * This is a no-op on uniprocessors.
83  */
84 /* ARGSUSED */
85 void
86 prpokethread(kthread_t *t)
87 {
88 	if (t->t_state == TS_ONPROC && t->t_cpu != CPU)
89 		poke_cpu(t->t_cpu->cpu_id);
90 }
91 
92 /*
93  * Return general registers.
94  */
95 void
96 prgetprregs(klwp_t *lwp, prgregset_t prp)
97 {
98 	ASSERT(MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
99 
100 	getgregs(lwp, prp);
101 }
102 
103 /*
104  * Set general registers.
105  * (Note: This can be an alias to setgregs().)
106  */
107 void
108 prsetprregs(klwp_t *lwp, prgregset_t prp, int initial)
109 {
110 	if (initial)		/* set initial values */
111 		lwptoregs(lwp)->r_ps = PSL_USER;
112 	(void) setgregs(lwp, prp);
113 }
114 
115 #ifdef _SYSCALL32_IMPL
116 
117 /*
118  * Convert prgregset32 to native prgregset
119  */
120 void
121 prgregset_32ton(klwp_t *lwp, prgregset32_t src, prgregset_t dst)
122 {
123 	struct regs *rp = lwptoregs(lwp);
124 
125 	dst[REG_GSBASE] = lwp->lwp_pcb.pcb_gsbase;
126 	dst[REG_FSBASE] = lwp->lwp_pcb.pcb_fsbase;
127 
128 	dst[REG_DS] = (uint16_t)src[DS];
129 	dst[REG_ES] = (uint16_t)src[ES];
130 
131 	dst[REG_GS] = (uint16_t)src[GS];
132 	dst[REG_FS] = (uint16_t)src[FS];
133 	dst[REG_SS] = (uint16_t)src[SS];
134 	dst[REG_RSP] = (uint32_t)src[UESP];
135 	dst[REG_RFL] =
136 		(rp->r_ps & ~PSL_USERMASK) | (src[EFL] & PSL_USERMASK);
137 	dst[REG_CS] = (uint16_t)src[CS];
138 	dst[REG_RIP] = (uint32_t)src[EIP];
139 	dst[REG_ERR] = (uint32_t)src[ERR];
140 	dst[REG_TRAPNO] = (uint32_t)src[TRAPNO];
141 	dst[REG_RAX] = (uint32_t)src[EAX];
142 	dst[REG_RCX] = (uint32_t)src[ECX];
143 	dst[REG_RDX] = (uint32_t)src[EDX];
144 	dst[REG_RBX] = (uint32_t)src[EBX];
145 	dst[REG_RBP] = (uint32_t)src[EBP];
146 	dst[REG_RSI] = (uint32_t)src[ESI];
147 	dst[REG_RDI] = (uint32_t)src[EDI];
148 	dst[REG_R8] = dst[REG_R9] = dst[REG_R10] = dst[REG_R11] =
149 	    dst[REG_R12] = dst[REG_R13] = dst[REG_R14] = dst[REG_R15] = 0;
150 }
151 
152 /*
153  * Return 32-bit general registers
154  */
155 void
156 prgetprregs32(klwp_t *lwp, prgregset32_t prp)
157 {
158 	ASSERT(MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
159 	getgregs32(lwp, prp);
160 }
161 
162 #endif	/* _SYSCALL32_IMPL */
163 
164 /*
165  * Get the syscall return values for the lwp.
166  */
167 int
168 prgetrvals(klwp_t *lwp, long *rval1, long *rval2)
169 {
170 	struct regs *r = lwptoregs(lwp);
171 
172 	if (r->r_ps & PS_C)
173 		return (r->r_r0);
174 	if (lwp->lwp_eosys == JUSTRETURN) {
175 		*rval1 = 0;
176 		*rval2 = 0;
177 	} else if (lwp_getdatamodel(lwp) != DATAMODEL_NATIVE) {
178 		/*
179 		 * XX64	Not sure we -really- need to do this, because the
180 		 *	syscall return already masks off the bottom values ..?
181 		 */
182 		*rval1 = r->r_r0 & (uint32_t)0xffffffffu;
183 		*rval2 = r->r_r1 & (uint32_t)0xffffffffu;
184 	} else {
185 		*rval1 = r->r_r0;
186 		*rval2 = r->r_r1;
187 	}
188 	return (0);
189 }
190 
191 /*
192  * Does the system support floating-point, either through hardware
193  * or by trapping and emulating floating-point machine instructions?
194  */
195 int
196 prhasfp(void)
197 {
198 	extern int fp_kind;
199 
200 	return (fp_kind != FP_NO);
201 }
202 
203 /*
204  * Get floating-point registers.
205  */
206 void
207 prgetprfpregs(klwp_t *lwp, prfpregset_t *pfp)
208 {
209 	bzero(pfp, sizeof (prfpregset_t));
210 	getfpregs(lwp, pfp);
211 }
212 
213 #if defined(_SYSCALL32_IMPL)
214 void
215 prgetprfpregs32(klwp_t *lwp, prfpregset32_t *pfp)
216 {
217 	bzero(pfp, sizeof (*pfp));
218 	getfpregs32(lwp, pfp);
219 }
220 #endif	/* _SYSCALL32_IMPL */
221 
222 /*
223  * Set floating-point registers.
224  * (Note: This can be an alias to setfpregs().)
225  */
226 void
227 prsetprfpregs(klwp_t *lwp, prfpregset_t *pfp)
228 {
229 	setfpregs(lwp, pfp);
230 }
231 
232 #if defined(_SYSCALL32_IMPL)
233 void
234 prsetprfpregs32(klwp_t *lwp, prfpregset32_t *pfp)
235 {
236 	setfpregs32(lwp, pfp);
237 }
238 #endif	/* _SYSCALL32_IMPL */
239 
240 /*
241  * Does the system support extra register state?
242  */
243 /* ARGSUSED */
244 int
245 prhasx(proc_t *p)
246 {
247 	return (0);
248 }
249 
250 /*
251  * Get the size of the extra registers.
252  */
253 /* ARGSUSED */
254 int
255 prgetprxregsize(proc_t *p)
256 {
257 	return (0);
258 }
259 
260 /*
261  * Get extra registers.
262  */
263 /*ARGSUSED*/
264 void
265 prgetprxregs(klwp_t *lwp, caddr_t prx)
266 {
267 	/* no extra registers */
268 }
269 
270 /*
271  * Set extra registers.
272  */
273 /*ARGSUSED*/
274 void
275 prsetprxregs(klwp_t *lwp, caddr_t prx)
276 {
277 	/* no extra registers */
278 }
279 
280 /*
281  * Return the base (lower limit) of the process stack.
282  */
283 caddr_t
284 prgetstackbase(proc_t *p)
285 {
286 	return (p->p_usrstack - p->p_stksize);
287 }
288 
289 /*
290  * Return the "addr" field for pr_addr in prpsinfo_t.
291  * This is a vestige of the past, so whatever we return is OK.
292  */
293 caddr_t
294 prgetpsaddr(proc_t *p)
295 {
296 	return ((caddr_t)p);
297 }
298 
299 /*
300  * Arrange to single-step the lwp.
301  */
302 void
303 prstep(klwp_t *lwp, int watchstep)
304 {
305 	ASSERT(MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
306 
307 	/*
308 	 * flag LWP so that its r_efl trace bit (PS_T) will be set on
309 	 * next return to usermode.
310 	 */
311 	lwp->lwp_pcb.pcb_flags |= REQUEST_STEP;
312 	lwp->lwp_pcb.pcb_flags &= ~REQUEST_NOSTEP;
313 
314 	if (watchstep)
315 		lwp->lwp_pcb.pcb_flags |= WATCH_STEP;
316 	else
317 		lwp->lwp_pcb.pcb_flags |= NORMAL_STEP;
318 
319 	aston(lwptot(lwp));	/* let trap() set PS_T in rp->r_efl */
320 }
321 
322 /*
323  * Undo prstep().
324  */
325 void
326 prnostep(klwp_t *lwp)
327 {
328 	ASSERT(ttolwp(curthread) == lwp ||
329 	    MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
330 
331 	/*
332 	 * flag LWP so that its r_efl trace bit (PS_T) will be cleared on
333 	 * next return to usermode.
334 	 */
335 	lwp->lwp_pcb.pcb_flags |= REQUEST_NOSTEP;
336 
337 	lwp->lwp_pcb.pcb_flags &=
338 	    ~(REQUEST_STEP|NORMAL_STEP|WATCH_STEP|DEBUG_PENDING);
339 
340 	aston(lwptot(lwp));	/* let trap() clear PS_T in rp->r_efl */
341 }
342 
343 /*
344  * Return non-zero if a single-step is in effect.
345  */
346 int
347 prisstep(klwp_t *lwp)
348 {
349 	ASSERT(MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
350 
351 	return ((lwp->lwp_pcb.pcb_flags &
352 		(NORMAL_STEP|WATCH_STEP|DEBUG_PENDING)) != 0);
353 }
354 
355 /*
356  * Set the PC to the specified virtual address.
357  */
358 void
359 prsvaddr(klwp_t *lwp, caddr_t vaddr)
360 {
361 	struct regs *r = lwptoregs(lwp);
362 
363 	ASSERT(MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
364 
365 	r->r_pc = (uintptr_t)vaddr;
366 }
367 
368 /*
369  * Map address "addr" in address space "as" into a kernel virtual address.
370  * The memory is guaranteed to be resident and locked down.
371  */
372 caddr_t
373 prmapin(struct as *as, caddr_t addr, int writing)
374 {
375 	page_t *pp;
376 	caddr_t kaddr;
377 	pfn_t pfnum;
378 
379 	/*
380 	 * XXX - Because of past mistakes, we have bits being returned
381 	 * by getpfnum that are actually the page type bits of the pte.
382 	 * When the object we are trying to map is a memory page with
383 	 * a page structure everything is ok and we can use the optimal
384 	 * method, ppmapin.  Otherwise, we have to do something special.
385 	 */
386 	pfnum = hat_getpfnum(as->a_hat, addr);
387 	if (pf_is_memory(pfnum)) {
388 		pp = page_numtopp_nolock(pfnum);
389 		if (pp != NULL) {
390 			ASSERT(PAGE_LOCKED(pp));
391 			kaddr = ppmapin(pp, writing ?
392 			    (PROT_READ | PROT_WRITE) : PROT_READ, (caddr_t)-1);
393 			return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
394 		}
395 	}
396 
397 	/*
398 	 * Oh well, we didn't have a page struct for the object we were
399 	 * trying to map in; ppmapin doesn't handle devices, but allocating a
400 	 * heap address allows ppmapout to free virtual space when done.
401 	 */
402 	kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
403 
404 	hat_devload(kas.a_hat, kaddr, MMU_PAGESIZE,  pfnum,
405 	    writing ? (PROT_READ | PROT_WRITE) : PROT_READ, 0);
406 
407 	return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
408 }
409 
410 /*
411  * Unmap address "addr" in address space "as"; inverse of prmapin().
412  */
413 /* ARGSUSED */
414 void
415 prmapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing)
416 {
417 	extern void ppmapout(caddr_t);
418 
419 	vaddr = (caddr_t)((uintptr_t)vaddr & PAGEMASK);
420 	ppmapout(vaddr);
421 }
422 
423 /*
424  * Make sure the lwp is in an orderly state
425  * for inspection by a debugger through /proc.
426  * Called from stop() and from syslwp_create().
427  */
428 /* ARGSUSED */
429 void
430 prstop(int why, int what)
431 {
432 	klwp_t *lwp = ttolwp(curthread);
433 	struct regs *r = lwptoregs(lwp);
434 
435 	/*
436 	 * Make sure we don't deadlock on a recursive call
437 	 * to prstop().  stop() tests the lwp_nostop flag.
438 	 */
439 	ASSERT(lwp->lwp_nostop == 0);
440 	lwp->lwp_nostop = 1;
441 
442 	if (copyin_nowatch((caddr_t)r->r_pc, &lwp->lwp_pcb.pcb_instr,
443 		    sizeof (lwp->lwp_pcb.pcb_instr)) == 0)
444 		lwp->lwp_pcb.pcb_flags |= INSTR_VALID;
445 	else {
446 		lwp->lwp_pcb.pcb_flags &= ~INSTR_VALID;
447 		lwp->lwp_pcb.pcb_instr = 0;
448 	}
449 
450 	(void) save_syscall_args();
451 	ASSERT(lwp->lwp_nostop == 1);
452 	lwp->lwp_nostop = 0;
453 }
454 
455 /*
456  * Fetch the user-level instruction on which the lwp is stopped.
457  * It was saved by the lwp itself, in prstop().
458  * Return non-zero if the instruction is valid.
459  */
460 int
461 prfetchinstr(klwp_t *lwp, ulong_t *ip)
462 {
463 	*ip = (ulong_t)(instr_t)lwp->lwp_pcb.pcb_instr;
464 	return (lwp->lwp_pcb.pcb_flags & INSTR_VALID);
465 }
466 
467 /*
468  * Called from trap() when a load or store instruction
469  * falls in a watched page but is not a watchpoint.
470  * We emulate the instruction in the kernel.
471  */
472 /* ARGSUSED */
473 int
474 pr_watch_emul(struct regs *rp, caddr_t addr, enum seg_rw rw)
475 {
476 #ifdef SOMEDAY
477 	int res;
478 	proc_t *p = curproc;
479 	char *badaddr = (caddr_t)(-1);
480 	int mapped;
481 
482 	/* prevent recursive calls to pr_watch_emul() */
483 	ASSERT(!(curthread->t_flag & T_WATCHPT));
484 	curthread->t_flag |= T_WATCHPT;
485 
486 	watch_disable_addr(addr, 8, rw);
487 	res = do_unaligned(rp, &badaddr);
488 	watch_enable_addr(addr, 8, rw);
489 
490 	curthread->t_flag &= ~T_WATCHPT;
491 	if (res == SIMU_SUCCESS) {
492 		/* adjust the pc */
493 		return (1);
494 	}
495 #endif
496 	return (0);
497 }
498 
499 /*
500  * Return the number of active entries in the local descriptor table.
501  */
502 int
503 prnldt(proc_t *p)
504 {
505 	int limit, i, n;
506 	user_desc_t *udp;
507 
508 	ASSERT(MUTEX_HELD(&p->p_ldtlock));
509 
510 	/*
511 	 * Currently 64 bit processes cannot have private LDTs.
512 	 */
513 	ASSERT(p->p_model != DATAMODEL_LP64 || p->p_ldt == NULL);
514 
515 	if (p->p_ldt == NULL)
516 		return (0);
517 	n = 0;
518 	limit = p->p_ldtlimit;
519 	ASSERT(limit >= 0 && limit < MAXNLDT);
520 
521 	/*
522 	 * Count all present user descriptors.
523 	 */
524 	for (i = LDT_UDBASE, udp = &p->p_ldt[i]; i <= limit; i++, udp++)
525 		if (udp->usd_type != 0 || udp->usd_dpl != 0 || udp->usd_p != 0)
526 			n++;
527 	return (n);
528 }
529 
530 /*
531  * Fetch the active entries from the local descriptor table.
532  */
533 void
534 prgetldt(proc_t *p, struct ssd *ssd)
535 {
536 	int i, limit;
537 	user_desc_t *udp;
538 
539 	ASSERT(MUTEX_HELD(&p->p_ldtlock));
540 
541 	if (p->p_ldt == NULL)
542 		return;
543 
544 	limit = p->p_ldtlimit;
545 	ASSERT(limit >= 0 && limit < MAXNLDT);
546 
547 	/*
548 	 * All present user descriptors.
549 	 */
550 	for (i = LDT_UDBASE, udp = &p->p_ldt[i]; i <= limit; i++, udp++)
551 		if (udp->usd_type != 0 || udp->usd_dpl != 0 ||
552 		    udp->usd_p != 0)
553 			usd_to_ssd(udp, ssd++, SEL_LDT(i));
554 }
555