xref: /titanic_52/usr/src/uts/common/exec/elf/elf_notes.c (revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
29  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/thread.h>
35 #include <sys/sysmacros.h>
36 #include <sys/signal.h>
37 #include <sys/cred.h>
38 #include <sys/priv.h>
39 #include <sys/user.h>
40 #include <sys/file.h>
41 #include <sys/errno.h>
42 #include <sys/vnode.h>
43 #include <sys/mode.h>
44 #include <sys/vfs.h>
45 #include <sys/mman.h>
46 #include <sys/kmem.h>
47 #include <sys/proc.h>
48 #include <sys/pathname.h>
49 #include <sys/cmn_err.h>
50 #include <sys/systm.h>
51 #include <sys/elf.h>
52 #include <sys/vmsystm.h>
53 #include <sys/debug.h>
54 #include <sys/procfs.h>
55 #include <sys/regset.h>
56 #include <sys/auxv.h>
57 #include <sys/exec.h>
58 #include <sys/prsystm.h>
59 #include <sys/utsname.h>
60 #include <sys/zone.h>
61 #include <vm/as.h>
62 #include <vm/rm.h>
63 #include <sys/modctl.h>
64 #include <sys/systeminfo.h>
65 #include <sys/machelf.h>
66 #include <sys/sunddi.h>
67 #include "elf_impl.h"
68 #if defined(__i386) || defined(__i386_COMPAT)
69 #include <sys/sysi86.h>
70 #endif
71 
72 void
73 setup_note_header(Phdr *v, proc_t *p)
74 {
75 	int nlwp = p->p_lwpcnt;
76 	int nzomb = p->p_zombcnt;
77 	int nfd;
78 	size_t size;
79 	prcred_t *pcrp;
80 	uf_info_t *fip;
81 	uf_entry_t *ufp;
82 	int fd;
83 
84 	fip = P_FINFO(p);
85 	nfd = 0;
86 	mutex_enter(&fip->fi_lock);
87 	for (fd = 0; fd < fip->fi_nfiles; fd++) {
88 		UF_ENTER(ufp, fip, fd);
89 		if ((ufp->uf_file != NULL) && (ufp->uf_file->f_count > 0))
90 			nfd++;
91 		UF_EXIT(ufp);
92 	}
93 	mutex_exit(&fip->fi_lock);
94 
95 	v[0].p_type = PT_NOTE;
96 	v[0].p_flags = PF_R;
97 	v[0].p_filesz = (sizeof (Note) * (9 + 2 * nlwp + nzomb + nfd))
98 	    + roundup(sizeof (psinfo_t), sizeof (Word))
99 	    + roundup(sizeof (pstatus_t), sizeof (Word))
100 	    + roundup(prgetprivsize(), sizeof (Word))
101 	    + roundup(priv_get_implinfo_size(), sizeof (Word))
102 	    + roundup(strlen(platform) + 1, sizeof (Word))
103 	    + roundup(strlen(p->p_zone->zone_name) + 1, sizeof (Word))
104 	    + roundup(__KERN_NAUXV_IMPL * sizeof (aux_entry_t), sizeof (Word))
105 	    + roundup(sizeof (utsname), sizeof (Word))
106 	    + roundup(sizeof (core_content_t), sizeof (Word))
107 	    + (nlwp + nzomb) * roundup(sizeof (lwpsinfo_t), sizeof (Word))
108 	    + nlwp * roundup(sizeof (lwpstatus_t), sizeof (Word))
109 	    + nfd * roundup(sizeof (prfdinfo_t), sizeof (Word));
110 
111 	if (curproc->p_agenttp != NULL) {
112 		v[0].p_filesz += sizeof (Note) +
113 		    roundup(sizeof (psinfo_t), sizeof (Word));
114 	}
115 
116 	size = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
117 	pcrp = kmem_alloc(size, KM_SLEEP);
118 	prgetcred(p, pcrp);
119 	if (pcrp->pr_ngroups != 0) {
120 		v[0].p_filesz += sizeof (Note) + roundup(sizeof (prcred_t) +
121 		    sizeof (gid_t) * (pcrp->pr_ngroups - 1), sizeof (Word));
122 	} else {
123 		v[0].p_filesz += sizeof (Note) +
124 		    roundup(sizeof (prcred_t), sizeof (Word));
125 	}
126 	kmem_free(pcrp, size);
127 
128 
129 #if defined(__i386) || defined(__i386_COMPAT)
130 	mutex_enter(&p->p_ldtlock);
131 	size = prnldt(p) * sizeof (struct ssd);
132 	mutex_exit(&p->p_ldtlock);
133 	if (size != 0)
134 		v[0].p_filesz += sizeof (Note) + roundup(size, sizeof (Word));
135 #endif	/* __i386 || __i386_COMPAT */
136 
137 	if ((size = prhasx(p)? prgetprxregsize(p) : 0) != 0)
138 		v[0].p_filesz += nlwp * sizeof (Note)
139 		    + nlwp * roundup(size, sizeof (Word));
140 
141 #if defined(__sparc)
142 	/*
143 	 * Figure out the number and sizes of register windows.
144 	 */
145 	{
146 		kthread_t *t = p->p_tlist;
147 		do {
148 			if ((size = prnwindows(ttolwp(t))) != 0) {
149 				size = sizeof (gwindows_t) -
150 				    (SPARC_MAXREGWINDOW - size) *
151 				    sizeof (struct rwindow);
152 				v[0].p_filesz += sizeof (Note) +
153 				    roundup(size, sizeof (Word));
154 			}
155 		} while ((t = t->t_forw) != p->p_tlist);
156 	}
157 	/*
158 	 * Space for the Ancillary State Registers.
159 	 */
160 	if (p->p_model == DATAMODEL_LP64)
161 		v[0].p_filesz += nlwp * sizeof (Note)
162 		    + nlwp * roundup(sizeof (asrset_t), sizeof (Word));
163 #endif /* __sparc */
164 }
165 
166 int
167 write_elfnotes(proc_t *p, int sig, vnode_t *vp, offset_t offset,
168     rlim64_t rlimit, cred_t *credp, core_content_t content)
169 {
170 	union {
171 		psinfo_t	psinfo;
172 		pstatus_t	pstatus;
173 		lwpsinfo_t	lwpsinfo;
174 		lwpstatus_t	lwpstatus;
175 #if defined(__sparc)
176 		gwindows_t	gwindows;
177 		asrset_t	asrset;
178 #endif /* __sparc */
179 		char		xregs[1];
180 		aux_entry_t	auxv[__KERN_NAUXV_IMPL];
181 		prcred_t	pcred;
182 		prpriv_t	ppriv;
183 		priv_impl_info_t prinfo;
184 		struct utsname	uts;
185 	} *bigwad;
186 
187 	size_t xregsize = prhasx(p)? prgetprxregsize(p) : 0;
188 	size_t crsize = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
189 	size_t psize = prgetprivsize();
190 	size_t bigsize = MAX(psize, MAX(sizeof (*bigwad),
191 	    MAX(xregsize, crsize)));
192 
193 	priv_impl_info_t *prii;
194 
195 	lwpdir_t *ldp;
196 	lwpent_t *lep;
197 	kthread_t *t;
198 	klwp_t *lwp;
199 	user_t *up;
200 	int i;
201 	int nlwp;
202 	int nzomb;
203 	int error;
204 	uchar_t oldsig;
205 	uf_info_t *fip;
206 	int fd;
207 	vnode_t *vroot;
208 
209 #if defined(__i386) || defined(__i386_COMPAT)
210 	struct ssd *ssd;
211 	size_t ssdsize;
212 #endif	/* __i386 || __i386_COMPAT */
213 
214 	bigsize = MAX(bigsize, priv_get_implinfo_size());
215 
216 	bigwad = kmem_alloc(bigsize, KM_SLEEP);
217 
218 	/*
219 	 * The order of the elfnote entries should be same here
220 	 * and in the gcore(1) command.  Synchronization is
221 	 * needed between the kernel and gcore(1).
222 	 */
223 
224 	/*
225 	 * Get the psinfo, and set the wait status to indicate that a core was
226 	 * dumped.  We have to forge this since p->p_wcode is not set yet.
227 	 */
228 	mutex_enter(&p->p_lock);
229 	prgetpsinfo(p, &bigwad->psinfo);
230 	mutex_exit(&p->p_lock);
231 	bigwad->psinfo.pr_wstat = wstat(CLD_DUMPED, sig);
232 
233 	error = elfnote(vp, &offset, NT_PSINFO, sizeof (bigwad->psinfo),
234 	    (caddr_t)&bigwad->psinfo, rlimit, credp);
235 	if (error)
236 		goto done;
237 
238 	/*
239 	 * Modify t_whystop and lwp_cursig so it appears that the current LWP
240 	 * is stopped after faulting on the signal that caused the core dump.
241 	 * As a result, prgetstatus() will record that signal, the saved
242 	 * lwp_siginfo, and its signal handler in the core file status.  We
243 	 * restore lwp_cursig in case a subsequent signal was received while
244 	 * dumping core.
245 	 */
246 	mutex_enter(&p->p_lock);
247 	lwp = ttolwp(curthread);
248 
249 	oldsig = lwp->lwp_cursig;
250 	lwp->lwp_cursig = (uchar_t)sig;
251 	curthread->t_whystop = PR_FAULTED;
252 
253 	prgetstatus(p, &bigwad->pstatus, p->p_zone);
254 	bigwad->pstatus.pr_lwp.pr_why = 0;
255 
256 	curthread->t_whystop = 0;
257 	lwp->lwp_cursig = oldsig;
258 	mutex_exit(&p->p_lock);
259 
260 	error = elfnote(vp, &offset, NT_PSTATUS, sizeof (bigwad->pstatus),
261 	    (caddr_t)&bigwad->pstatus, rlimit, credp);
262 	if (error)
263 		goto done;
264 
265 	error = elfnote(vp, &offset, NT_PLATFORM, strlen(platform) + 1,
266 	    platform, rlimit, credp);
267 	if (error)
268 		goto done;
269 
270 	up = PTOU(p);
271 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
272 		bigwad->auxv[i].a_type = up->u_auxv[i].a_type;
273 		bigwad->auxv[i].a_un.a_val = up->u_auxv[i].a_un.a_val;
274 	}
275 	error = elfnote(vp, &offset, NT_AUXV, sizeof (bigwad->auxv),
276 	    (caddr_t)bigwad->auxv, rlimit, credp);
277 	if (error)
278 		goto done;
279 
280 	bcopy(&utsname, &bigwad->uts, sizeof (struct utsname));
281 	if (!INGLOBALZONE(p)) {
282 		bcopy(p->p_zone->zone_nodename, &bigwad->uts.nodename,
283 		    _SYS_NMLN);
284 	}
285 	error = elfnote(vp, &offset, NT_UTSNAME, sizeof (struct utsname),
286 	    (caddr_t)&bigwad->uts, rlimit, credp);
287 	if (error)
288 		goto done;
289 
290 	prgetcred(p, &bigwad->pcred);
291 
292 	if (bigwad->pcred.pr_ngroups != 0) {
293 		crsize = sizeof (prcred_t) +
294 		    sizeof (gid_t) * (bigwad->pcred.pr_ngroups - 1);
295 	} else
296 		crsize = sizeof (prcred_t);
297 
298 	error = elfnote(vp, &offset, NT_PRCRED, crsize,
299 	    (caddr_t)&bigwad->pcred, rlimit, credp);
300 	if (error)
301 		goto done;
302 
303 	error = elfnote(vp, &offset, NT_CONTENT, sizeof (core_content_t),
304 	    (caddr_t)&content, rlimit, credp);
305 	if (error)
306 		goto done;
307 
308 	prgetpriv(p, &bigwad->ppriv);
309 
310 	error = elfnote(vp, &offset, NT_PRPRIV, psize,
311 	    (caddr_t)&bigwad->ppriv, rlimit, credp);
312 	if (error)
313 		goto done;
314 
315 	prii = priv_hold_implinfo();
316 	error = elfnote(vp, &offset, NT_PRPRIVINFO, priv_get_implinfo_size(),
317 	    (caddr_t)prii, rlimit, credp);
318 	priv_release_implinfo();
319 	if (error)
320 		goto done;
321 
322 	/* zone can't go away as long as process exists */
323 	error = elfnote(vp, &offset, NT_ZONENAME,
324 	    strlen(p->p_zone->zone_name) + 1, p->p_zone->zone_name,
325 	    rlimit, credp);
326 	if (error)
327 		goto done;
328 
329 
330 	/* open file table */
331 	vroot = PTOU(p)->u_rdir;
332 	if (vroot == NULL)
333 		vroot = rootdir;
334 
335 	VN_HOLD(vroot);
336 
337 	fip = P_FINFO(p);
338 
339 	for (fd = 0; fd < fip->fi_nfiles; fd++) {
340 		uf_entry_t *ufp;
341 		vnode_t *fvp;
342 		struct file *fp;
343 		vattr_t vattr;
344 		prfdinfo_t fdinfo;
345 
346 		bzero(&fdinfo, sizeof (fdinfo));
347 
348 		mutex_enter(&fip->fi_lock);
349 		UF_ENTER(ufp, fip, fd);
350 		if (((fp = ufp->uf_file) == NULL) || (fp->f_count < 1)) {
351 			UF_EXIT(ufp);
352 			mutex_exit(&fip->fi_lock);
353 			continue;
354 		}
355 
356 		fdinfo.pr_fd = fd;
357 		fdinfo.pr_fdflags = ufp->uf_flag;
358 		fdinfo.pr_fileflags = fp->f_flag2;
359 		fdinfo.pr_fileflags <<= 16;
360 		fdinfo.pr_fileflags |= fp->f_flag;
361 		if ((fdinfo.pr_fileflags & (FSEARCH | FEXEC)) == 0)
362 			fdinfo.pr_fileflags += FOPEN;
363 		fdinfo.pr_offset = fp->f_offset;
364 
365 
366 		fvp = fp->f_vnode;
367 		VN_HOLD(fvp);
368 		UF_EXIT(ufp);
369 		mutex_exit(&fip->fi_lock);
370 
371 		/*
372 		 * There are some vnodes that have no corresponding
373 		 * path.  Its reasonable for this to fail, in which
374 		 * case the path will remain an empty string.
375 		 */
376 		(void) vnodetopath(vroot, fvp, fdinfo.pr_path,
377 		    sizeof (fdinfo.pr_path), credp);
378 
379 		error = VOP_GETATTR(fvp, &vattr, 0, credp, NULL);
380 		if (error != 0) {
381 			VN_RELE(fvp);
382 			VN_RELE(vroot);
383 			goto done;
384 		}
385 
386 		if (fvp->v_type == VSOCK)
387 			fdinfo.pr_fileflags |= sock_getfasync(fvp);
388 
389 		VN_RELE(fvp);
390 
391 		/*
392 		 * This logic mirrors fstat(), which we cannot use
393 		 * directly, as it calls copyout().
394 		 */
395 		fdinfo.pr_major = getmajor(vattr.va_fsid);
396 		fdinfo.pr_minor = getminor(vattr.va_fsid);
397 		fdinfo.pr_ino = (ino64_t)vattr.va_nodeid;
398 		fdinfo.pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
399 		fdinfo.pr_uid = vattr.va_uid;
400 		fdinfo.pr_gid = vattr.va_gid;
401 		fdinfo.pr_rmajor = getmajor(vattr.va_rdev);
402 		fdinfo.pr_rminor = getminor(vattr.va_rdev);
403 		fdinfo.pr_size = (off64_t)vattr.va_size;
404 
405 		error = elfnote(vp, &offset, NT_FDINFO,
406 		    sizeof (fdinfo), &fdinfo, rlimit, credp);
407 		if (error) {
408 			VN_RELE(vroot);
409 			goto done;
410 		}
411 	}
412 
413 	VN_RELE(vroot);
414 
415 #if defined(__i386) || defined(__i386_COMPAT)
416 	mutex_enter(&p->p_ldtlock);
417 	ssdsize = prnldt(p) * sizeof (struct ssd);
418 	if (ssdsize != 0) {
419 		ssd = kmem_alloc(ssdsize, KM_SLEEP);
420 		prgetldt(p, ssd);
421 		error = elfnote(vp, &offset, NT_LDT, ssdsize,
422 		    (caddr_t)ssd, rlimit, credp);
423 		kmem_free(ssd, ssdsize);
424 	}
425 	mutex_exit(&p->p_ldtlock);
426 	if (error)
427 		goto done;
428 #endif	/* __i386 || defined(__i386_COMPAT) */
429 
430 	nlwp = p->p_lwpcnt;
431 	nzomb = p->p_zombcnt;
432 	/* for each entry in the lwp directory ... */
433 	for (ldp = p->p_lwpdir; nlwp + nzomb != 0; ldp++) {
434 
435 		if ((lep = ldp->ld_entry) == NULL)	/* empty slot */
436 			continue;
437 
438 		if ((t = lep->le_thread) != NULL) {	/* active lwp */
439 			ASSERT(nlwp != 0);
440 			nlwp--;
441 			lwp = ttolwp(t);
442 			mutex_enter(&p->p_lock);
443 			prgetlwpsinfo(t, &bigwad->lwpsinfo);
444 			mutex_exit(&p->p_lock);
445 		} else {				/* zombie lwp */
446 			ASSERT(nzomb != 0);
447 			nzomb--;
448 			bzero(&bigwad->lwpsinfo, sizeof (bigwad->lwpsinfo));
449 			bigwad->lwpsinfo.pr_lwpid = lep->le_lwpid;
450 			bigwad->lwpsinfo.pr_state = SZOMB;
451 			bigwad->lwpsinfo.pr_sname = 'Z';
452 			bigwad->lwpsinfo.pr_start.tv_sec = lep->le_start;
453 		}
454 		error = elfnote(vp, &offset, NT_LWPSINFO,
455 		    sizeof (bigwad->lwpsinfo), (caddr_t)&bigwad->lwpsinfo,
456 		    rlimit, credp);
457 		if (error)
458 			goto done;
459 		if (t == NULL)		/* nothing more to do for a zombie */
460 			continue;
461 
462 		mutex_enter(&p->p_lock);
463 		if (t == curthread) {
464 			/*
465 			 * Modify t_whystop and lwp_cursig so it appears that
466 			 * the current LWP is stopped after faulting on the
467 			 * signal that caused the core dump.  As a result,
468 			 * prgetlwpstatus() will record that signal, the saved
469 			 * lwp_siginfo, and its signal handler in the core file
470 			 * status.  We restore lwp_cursig in case a subsequent
471 			 * signal was received while dumping core.
472 			 */
473 			oldsig = lwp->lwp_cursig;
474 			lwp->lwp_cursig = (uchar_t)sig;
475 			t->t_whystop = PR_FAULTED;
476 
477 			prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
478 			bigwad->lwpstatus.pr_why = 0;
479 
480 			t->t_whystop = 0;
481 			lwp->lwp_cursig = oldsig;
482 		} else {
483 			prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
484 		}
485 		mutex_exit(&p->p_lock);
486 		error = elfnote(vp, &offset, NT_LWPSTATUS,
487 		    sizeof (bigwad->lwpstatus), (caddr_t)&bigwad->lwpstatus,
488 		    rlimit, credp);
489 		if (error)
490 			goto done;
491 
492 #if defined(__sparc)
493 		/*
494 		 * Unspilled SPARC register windows.
495 		 */
496 		{
497 			size_t size = prnwindows(lwp);
498 
499 			if (size != 0) {
500 				size = sizeof (gwindows_t) -
501 				    (SPARC_MAXREGWINDOW - size) *
502 				    sizeof (struct rwindow);
503 				prgetwindows(lwp, &bigwad->gwindows);
504 				error = elfnote(vp, &offset, NT_GWINDOWS,
505 				    size, (caddr_t)&bigwad->gwindows,
506 				    rlimit, credp);
507 				if (error)
508 					goto done;
509 			}
510 		}
511 		/*
512 		 * Ancillary State Registers.
513 		 */
514 		if (p->p_model == DATAMODEL_LP64) {
515 			prgetasregs(lwp, bigwad->asrset);
516 			error = elfnote(vp, &offset, NT_ASRS,
517 			    sizeof (asrset_t), (caddr_t)bigwad->asrset,
518 			    rlimit, credp);
519 			if (error)
520 				goto done;
521 		}
522 #endif /* __sparc */
523 
524 		if (xregsize) {
525 			prgetprxregs(lwp, bigwad->xregs);
526 			error = elfnote(vp, &offset, NT_PRXREG,
527 			    xregsize, bigwad->xregs, rlimit, credp);
528 			if (error)
529 				goto done;
530 		}
531 
532 		if (t->t_lwp->lwp_spymaster != NULL) {
533 			void *psaddr = t->t_lwp->lwp_spymaster;
534 #ifdef _ELF32_COMPAT
535 			/*
536 			 * On a 64-bit kernel with 32-bit ELF compatibility,
537 			 * this file is compiled into two different objects:
538 			 * one is compiled normally, and the other is compiled
539 			 * with _ELF32_COMPAT set -- and therefore with a
540 			 * psinfo_t defined to be a psinfo32_t.  However, the
541 			 * psinfo_t denoting our spymaster is always of the
542 			 * native type; if we are in the _ELF32_COMPAT case,
543 			 * we need to explicitly convert it.
544 			 */
545 			if (p->p_model == DATAMODEL_ILP32) {
546 				psinfo_kto32(psaddr, &bigwad->psinfo);
547 				psaddr = &bigwad->psinfo;
548 			}
549 #endif
550 
551 			error = elfnote(vp, &offset, NT_SPYMASTER,
552 			    sizeof (psinfo_t), psaddr, rlimit, credp);
553 			if (error)
554 				goto done;
555 		}
556 	}
557 	ASSERT(nlwp == 0);
558 
559 done:
560 	kmem_free(bigwad, bigsize);
561 	return (error);
562 }
563