xref: /illumos-gate/usr/src/uts/common/exec/elf/elf_notes.c (revision 49b7860084dbba18bc00b29413d6182197f9fe93)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
29  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/thread.h>
35 #include <sys/sysmacros.h>
36 #include <sys/signal.h>
37 #include <sys/cred.h>
38 #include <sys/priv.h>
39 #include <sys/user.h>
40 #include <sys/file.h>
41 #include <sys/errno.h>
42 #include <sys/vnode.h>
43 #include <sys/mode.h>
44 #include <sys/vfs.h>
45 #include <sys/mman.h>
46 #include <sys/kmem.h>
47 #include <sys/proc.h>
48 #include <sys/pathname.h>
49 #include <sys/cmn_err.h>
50 #include <sys/systm.h>
51 #include <sys/elf.h>
52 #include <sys/vmsystm.h>
53 #include <sys/debug.h>
54 #include <sys/procfs.h>
55 #include <sys/regset.h>
56 #include <sys/auxv.h>
57 #include <sys/exec.h>
58 #include <sys/prsystm.h>
59 #include <sys/utsname.h>
60 #include <sys/zone.h>
61 #include <vm/as.h>
62 #include <vm/rm.h>
63 #include <sys/modctl.h>
64 #include <sys/systeminfo.h>
65 #include <sys/machelf.h>
66 #include <sys/sunddi.h>
67 #include "elf_impl.h"
68 #if defined(__i386) || defined(__i386_COMPAT)
69 #include <sys/sysi86.h>
70 #endif
71 
72 void
73 setup_note_header(Phdr *v, proc_t *p)
74 {
75 	int nlwp = p->p_lwpcnt;
76 	int nzomb = p->p_zombcnt;
77 	int nfd;
78 	size_t size;
79 	prcred_t *pcrp;
80 	uf_info_t *fip;
81 	uf_entry_t *ufp;
82 	int fd;
83 
84 	fip = P_FINFO(p);
85 	nfd = 0;
86 	mutex_enter(&fip->fi_lock);
87 	for (fd = 0; fd < fip->fi_nfiles; fd++) {
88 		UF_ENTER(ufp, fip, fd);
89 		if ((ufp->uf_file != NULL) && (ufp->uf_file->f_count > 0))
90 			nfd++;
91 		UF_EXIT(ufp);
92 	}
93 	mutex_exit(&fip->fi_lock);
94 
95 	v[0].p_type = PT_NOTE;
96 	v[0].p_flags = PF_R;
97 	v[0].p_filesz = (sizeof (Note) * (9 + 2 * nlwp + nzomb + nfd))
98 	    + roundup(sizeof (psinfo_t), sizeof (Word))
99 	    + roundup(sizeof (pstatus_t), sizeof (Word))
100 	    + roundup(prgetprivsize(), sizeof (Word))
101 	    + roundup(priv_get_implinfo_size(), sizeof (Word))
102 	    + roundup(strlen(platform) + 1, sizeof (Word))
103 	    + roundup(strlen(p->p_zone->zone_name) + 1, sizeof (Word))
104 	    + roundup(__KERN_NAUXV_IMPL * sizeof (aux_entry_t), sizeof (Word))
105 	    + roundup(sizeof (utsname), sizeof (Word))
106 	    + roundup(sizeof (core_content_t), sizeof (Word))
107 	    + (nlwp + nzomb) * roundup(sizeof (lwpsinfo_t), sizeof (Word))
108 	    + nlwp * roundup(sizeof (lwpstatus_t), sizeof (Word))
109 	    + nfd * roundup(sizeof (prfdinfo_t), sizeof (Word));
110 
111 	if (curproc->p_agenttp != NULL) {
112 		v[0].p_filesz += sizeof (Note) +
113 		    roundup(sizeof (psinfo_t), sizeof (Word));
114 	}
115 
116 	size = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
117 	pcrp = kmem_alloc(size, KM_SLEEP);
118 	prgetcred(p, pcrp);
119 	if (pcrp->pr_ngroups != 0) {
120 		v[0].p_filesz += sizeof (Note) + roundup(sizeof (prcred_t) +
121 		    sizeof (gid_t) * (pcrp->pr_ngroups - 1), sizeof (Word));
122 	} else {
123 		v[0].p_filesz += sizeof (Note) +
124 		    roundup(sizeof (prcred_t), sizeof (Word));
125 	}
126 	kmem_free(pcrp, size);
127 
128 
129 #if defined(__i386) || defined(__i386_COMPAT)
130 	mutex_enter(&p->p_ldtlock);
131 	size = prnldt(p) * sizeof (struct ssd);
132 	mutex_exit(&p->p_ldtlock);
133 	if (size != 0)
134 		v[0].p_filesz += sizeof (Note) + roundup(size, sizeof (Word));
135 #endif	/* __i386 || __i386_COMPAT */
136 
137 	if ((size = prhasx(p)? prgetprxregsize(p) : 0) != 0)
138 		v[0].p_filesz += nlwp * sizeof (Note)
139 		    + nlwp * roundup(size, sizeof (Word));
140 
141 #if defined(__sparc)
142 	/*
143 	 * Figure out the number and sizes of register windows.
144 	 */
145 	{
146 		kthread_t *t = p->p_tlist;
147 		do {
148 			if ((size = prnwindows(ttolwp(t))) != 0) {
149 				size = sizeof (gwindows_t) -
150 				    (SPARC_MAXREGWINDOW - size) *
151 				    sizeof (struct rwindow);
152 				v[0].p_filesz += sizeof (Note) +
153 				    roundup(size, sizeof (Word));
154 			}
155 		} while ((t = t->t_forw) != p->p_tlist);
156 	}
157 	/*
158 	 * Space for the Ancillary State Registers.
159 	 */
160 	if (p->p_model == DATAMODEL_LP64)
161 		v[0].p_filesz += nlwp * sizeof (Note)
162 		    + nlwp * roundup(sizeof (asrset_t), sizeof (Word));
163 #endif /* __sparc */
164 }
165 
166 int
167 write_elfnotes(proc_t *p, int sig, vnode_t *vp, offset_t offset,
168     rlim64_t rlimit, cred_t *credp, core_content_t content)
169 {
170 	union {
171 		psinfo_t	psinfo;
172 		pstatus_t	pstatus;
173 		lwpsinfo_t	lwpsinfo;
174 		lwpstatus_t	lwpstatus;
175 #if defined(__sparc)
176 		gwindows_t	gwindows;
177 		asrset_t	asrset;
178 #endif /* __sparc */
179 		char		xregs[1];
180 		aux_entry_t	auxv[__KERN_NAUXV_IMPL];
181 		prcred_t	pcred;
182 		prpriv_t	ppriv;
183 		priv_impl_info_t prinfo;
184 		struct utsname	uts;
185 	} *bigwad;
186 
187 	size_t xregsize = prhasx(p)? prgetprxregsize(p) : 0;
188 	size_t crsize = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
189 	size_t psize = prgetprivsize();
190 	size_t bigsize = MAX(psize, MAX(sizeof (*bigwad),
191 	    MAX(xregsize, crsize)));
192 
193 	priv_impl_info_t *prii;
194 
195 	lwpdir_t *ldp;
196 	lwpent_t *lep;
197 	kthread_t *t;
198 	klwp_t *lwp;
199 	user_t *up;
200 	int i;
201 	int nlwp;
202 	int nzomb;
203 	int error;
204 	uchar_t oldsig;
205 	uf_info_t *fip;
206 	int fd;
207 	vnode_t *vroot;
208 
209 #if defined(__i386) || defined(__i386_COMPAT)
210 	struct ssd *ssd;
211 	size_t ssdsize;
212 #endif	/* __i386 || __i386_COMPAT */
213 
214 	bigsize = MAX(bigsize, priv_get_implinfo_size());
215 
216 	bigwad = kmem_alloc(bigsize, KM_SLEEP);
217 
218 	/*
219 	 * The order of the elfnote entries should be same here
220 	 * and in the gcore(1) command.  Synchronization is
221 	 * needed between the kernel and gcore(1).
222 	 */
223 
224 	/*
225 	 * Get the psinfo, and set the wait status to indicate that a core was
226 	 * dumped.  We have to forge this since p->p_wcode is not set yet.
227 	 */
228 	mutex_enter(&p->p_lock);
229 	prgetpsinfo(p, &bigwad->psinfo);
230 	mutex_exit(&p->p_lock);
231 	bigwad->psinfo.pr_wstat = wstat(CLD_DUMPED, sig);
232 
233 	error = elfnote(vp, &offset, NT_PSINFO, sizeof (bigwad->psinfo),
234 	    (caddr_t)&bigwad->psinfo, rlimit, credp);
235 	if (error)
236 		goto done;
237 
238 	/*
239 	 * Modify t_whystop and lwp_cursig so it appears that the current LWP
240 	 * is stopped after faulting on the signal that caused the core dump.
241 	 * As a result, prgetstatus() will record that signal, the saved
242 	 * lwp_siginfo, and its signal handler in the core file status.  We
243 	 * restore lwp_cursig in case a subsequent signal was received while
244 	 * dumping core.
245 	 */
246 	mutex_enter(&p->p_lock);
247 	lwp = ttolwp(curthread);
248 
249 	oldsig = lwp->lwp_cursig;
250 	lwp->lwp_cursig = (uchar_t)sig;
251 	curthread->t_whystop = PR_FAULTED;
252 
253 	prgetstatus(p, &bigwad->pstatus, p->p_zone);
254 	bigwad->pstatus.pr_lwp.pr_why = 0;
255 
256 	curthread->t_whystop = 0;
257 	lwp->lwp_cursig = oldsig;
258 	mutex_exit(&p->p_lock);
259 
260 	error = elfnote(vp, &offset, NT_PSTATUS, sizeof (bigwad->pstatus),
261 	    (caddr_t)&bigwad->pstatus, rlimit, credp);
262 	if (error)
263 		goto done;
264 
265 	error = elfnote(vp, &offset, NT_PLATFORM, strlen(platform) + 1,
266 	    platform, rlimit, credp);
267 	if (error)
268 		goto done;
269 
270 	up = PTOU(p);
271 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
272 		bigwad->auxv[i].a_type = up->u_auxv[i].a_type;
273 		bigwad->auxv[i].a_un.a_val = up->u_auxv[i].a_un.a_val;
274 	}
275 	error = elfnote(vp, &offset, NT_AUXV, sizeof (bigwad->auxv),
276 	    (caddr_t)bigwad->auxv, rlimit, credp);
277 	if (error)
278 		goto done;
279 
280 	bcopy(&utsname, &bigwad->uts, sizeof (struct utsname));
281 	if (!INGLOBALZONE(p)) {
282 		bcopy(p->p_zone->zone_nodename, &bigwad->uts.nodename,
283 		    _SYS_NMLN);
284 	}
285 	error = elfnote(vp, &offset, NT_UTSNAME, sizeof (struct utsname),
286 	    (caddr_t)&bigwad->uts, rlimit, credp);
287 	if (error)
288 		goto done;
289 
290 	prgetcred(p, &bigwad->pcred);
291 
292 	if (bigwad->pcred.pr_ngroups != 0) {
293 		crsize = sizeof (prcred_t) +
294 		    sizeof (gid_t) * (bigwad->pcred.pr_ngroups - 1);
295 	} else
296 		crsize = sizeof (prcred_t);
297 
298 	error = elfnote(vp, &offset, NT_PRCRED, crsize,
299 	    (caddr_t)&bigwad->pcred, rlimit, credp);
300 	if (error)
301 		goto done;
302 
303 	error = elfnote(vp, &offset, NT_CONTENT, sizeof (core_content_t),
304 	    (caddr_t)&content, rlimit, credp);
305 	if (error)
306 		goto done;
307 
308 	prgetpriv(p, &bigwad->ppriv);
309 
310 	error = elfnote(vp, &offset, NT_PRPRIV, psize,
311 	    (caddr_t)&bigwad->ppriv, rlimit, credp);
312 	if (error)
313 		goto done;
314 
315 	prii = priv_hold_implinfo();
316 	error = elfnote(vp, &offset, NT_PRPRIVINFO, priv_get_implinfo_size(),
317 	    (caddr_t)prii, rlimit, credp);
318 	priv_release_implinfo();
319 	if (error)
320 		goto done;
321 
322 	/* zone can't go away as long as process exists */
323 	error = elfnote(vp, &offset, NT_ZONENAME,
324 	    strlen(p->p_zone->zone_name) + 1, p->p_zone->zone_name,
325 	    rlimit, credp);
326 	if (error)
327 		goto done;
328 
329 
330 	/* open file table */
331 	vroot = PTOU(p)->u_rdir;
332 	if (vroot == NULL)
333 		vroot = rootdir;
334 
335 	VN_HOLD(vroot);
336 
337 	fip = P_FINFO(p);
338 
339 	for (fd = 0; fd < fip->fi_nfiles; fd++) {
340 		uf_entry_t *ufp;
341 		vnode_t *fvp;
342 		struct file *fp;
343 		vattr_t vattr;
344 		prfdinfo_t fdinfo;
345 
346 		bzero(&fdinfo, sizeof (fdinfo));
347 
348 		mutex_enter(&fip->fi_lock);
349 		UF_ENTER(ufp, fip, fd);
350 		if (((fp = ufp->uf_file) == NULL) || (fp->f_count < 1)) {
351 			UF_EXIT(ufp);
352 			mutex_exit(&fip->fi_lock);
353 			continue;
354 		}
355 
356 		fdinfo.pr_fd = fd;
357 		fdinfo.pr_fdflags = ufp->uf_flag;
358 		fdinfo.pr_fileflags = fp->f_flag2;
359 		fdinfo.pr_fileflags <<= 16;
360 		fdinfo.pr_fileflags |= fp->f_flag;
361 		if ((fdinfo.pr_fileflags & (FSEARCH | FEXEC)) == 0)
362 			fdinfo.pr_fileflags += FOPEN;
363 		fdinfo.pr_offset = fp->f_offset;
364 
365 
366 		fvp = fp->f_vnode;
367 		VN_HOLD(fvp);
368 		UF_EXIT(ufp);
369 		mutex_exit(&fip->fi_lock);
370 
371 		/*
372 		 * There are some vnodes that have no corresponding
373 		 * path.  Its reasonable for this to fail, in which
374 		 * case the path will remain an empty string.
375 		 */
376 		(void) vnodetopath(vroot, fvp, fdinfo.pr_path,
377 		    sizeof (fdinfo.pr_path), credp);
378 
379 		if (VOP_GETATTR(fvp, &vattr, 0, credp, NULL) != 0) {
380 			/*
381 			 * Try to write at least a subset of information
382 			 */
383 			fdinfo.pr_major = 0;
384 			fdinfo.pr_minor = 0;
385 			fdinfo.pr_ino = 0;
386 			fdinfo.pr_mode = 0;
387 			fdinfo.pr_uid = (uid_t)-1;
388 			fdinfo.pr_gid = (gid_t)-1;
389 			fdinfo.pr_rmajor = 0;
390 			fdinfo.pr_rminor = 0;
391 			fdinfo.pr_size = -1;
392 
393 			error = elfnote(vp, &offset, NT_FDINFO,
394 			    sizeof (fdinfo), &fdinfo, rlimit, credp);
395 			VN_RELE(fvp);
396 			VN_RELE(vroot);
397 			if (error)
398 				goto done;
399 			continue;
400 		}
401 
402 		if (fvp->v_type == VSOCK)
403 			fdinfo.pr_fileflags |= sock_getfasync(fvp);
404 
405 		VN_RELE(fvp);
406 
407 		/*
408 		 * This logic mirrors fstat(), which we cannot use
409 		 * directly, as it calls copyout().
410 		 */
411 		fdinfo.pr_major = getmajor(vattr.va_fsid);
412 		fdinfo.pr_minor = getminor(vattr.va_fsid);
413 		fdinfo.pr_ino = (ino64_t)vattr.va_nodeid;
414 		fdinfo.pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
415 		fdinfo.pr_uid = vattr.va_uid;
416 		fdinfo.pr_gid = vattr.va_gid;
417 		fdinfo.pr_rmajor = getmajor(vattr.va_rdev);
418 		fdinfo.pr_rminor = getminor(vattr.va_rdev);
419 		fdinfo.pr_size = (off64_t)vattr.va_size;
420 
421 		error = elfnote(vp, &offset, NT_FDINFO,
422 		    sizeof (fdinfo), &fdinfo, rlimit, credp);
423 		if (error) {
424 			VN_RELE(vroot);
425 			goto done;
426 		}
427 	}
428 
429 	VN_RELE(vroot);
430 
431 #if defined(__i386) || defined(__i386_COMPAT)
432 	mutex_enter(&p->p_ldtlock);
433 	ssdsize = prnldt(p) * sizeof (struct ssd);
434 	if (ssdsize != 0) {
435 		ssd = kmem_alloc(ssdsize, KM_SLEEP);
436 		prgetldt(p, ssd);
437 		error = elfnote(vp, &offset, NT_LDT, ssdsize,
438 		    (caddr_t)ssd, rlimit, credp);
439 		kmem_free(ssd, ssdsize);
440 	}
441 	mutex_exit(&p->p_ldtlock);
442 	if (error)
443 		goto done;
444 #endif	/* __i386 || defined(__i386_COMPAT) */
445 
446 	nlwp = p->p_lwpcnt;
447 	nzomb = p->p_zombcnt;
448 	/* for each entry in the lwp directory ... */
449 	for (ldp = p->p_lwpdir; nlwp + nzomb != 0; ldp++) {
450 
451 		if ((lep = ldp->ld_entry) == NULL)	/* empty slot */
452 			continue;
453 
454 		if ((t = lep->le_thread) != NULL) {	/* active lwp */
455 			ASSERT(nlwp != 0);
456 			nlwp--;
457 			lwp = ttolwp(t);
458 			mutex_enter(&p->p_lock);
459 			prgetlwpsinfo(t, &bigwad->lwpsinfo);
460 			mutex_exit(&p->p_lock);
461 		} else {				/* zombie lwp */
462 			ASSERT(nzomb != 0);
463 			nzomb--;
464 			bzero(&bigwad->lwpsinfo, sizeof (bigwad->lwpsinfo));
465 			bigwad->lwpsinfo.pr_lwpid = lep->le_lwpid;
466 			bigwad->lwpsinfo.pr_state = SZOMB;
467 			bigwad->lwpsinfo.pr_sname = 'Z';
468 			bigwad->lwpsinfo.pr_start.tv_sec = lep->le_start;
469 		}
470 		error = elfnote(vp, &offset, NT_LWPSINFO,
471 		    sizeof (bigwad->lwpsinfo), (caddr_t)&bigwad->lwpsinfo,
472 		    rlimit, credp);
473 		if (error)
474 			goto done;
475 		if (t == NULL)		/* nothing more to do for a zombie */
476 			continue;
477 
478 		mutex_enter(&p->p_lock);
479 		if (t == curthread) {
480 			/*
481 			 * Modify t_whystop and lwp_cursig so it appears that
482 			 * the current LWP is stopped after faulting on the
483 			 * signal that caused the core dump.  As a result,
484 			 * prgetlwpstatus() will record that signal, the saved
485 			 * lwp_siginfo, and its signal handler in the core file
486 			 * status.  We restore lwp_cursig in case a subsequent
487 			 * signal was received while dumping core.
488 			 */
489 			oldsig = lwp->lwp_cursig;
490 			lwp->lwp_cursig = (uchar_t)sig;
491 			t->t_whystop = PR_FAULTED;
492 
493 			prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
494 			bigwad->lwpstatus.pr_why = 0;
495 
496 			t->t_whystop = 0;
497 			lwp->lwp_cursig = oldsig;
498 		} else {
499 			prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
500 		}
501 		mutex_exit(&p->p_lock);
502 		error = elfnote(vp, &offset, NT_LWPSTATUS,
503 		    sizeof (bigwad->lwpstatus), (caddr_t)&bigwad->lwpstatus,
504 		    rlimit, credp);
505 		if (error)
506 			goto done;
507 
508 #if defined(__sparc)
509 		/*
510 		 * Unspilled SPARC register windows.
511 		 */
512 		{
513 			size_t size = prnwindows(lwp);
514 
515 			if (size != 0) {
516 				size = sizeof (gwindows_t) -
517 				    (SPARC_MAXREGWINDOW - size) *
518 				    sizeof (struct rwindow);
519 				prgetwindows(lwp, &bigwad->gwindows);
520 				error = elfnote(vp, &offset, NT_GWINDOWS,
521 				    size, (caddr_t)&bigwad->gwindows,
522 				    rlimit, credp);
523 				if (error)
524 					goto done;
525 			}
526 		}
527 		/*
528 		 * Ancillary State Registers.
529 		 */
530 		if (p->p_model == DATAMODEL_LP64) {
531 			prgetasregs(lwp, bigwad->asrset);
532 			error = elfnote(vp, &offset, NT_ASRS,
533 			    sizeof (asrset_t), (caddr_t)bigwad->asrset,
534 			    rlimit, credp);
535 			if (error)
536 				goto done;
537 		}
538 #endif /* __sparc */
539 
540 		if (xregsize) {
541 			prgetprxregs(lwp, bigwad->xregs);
542 			error = elfnote(vp, &offset, NT_PRXREG,
543 			    xregsize, bigwad->xregs, rlimit, credp);
544 			if (error)
545 				goto done;
546 		}
547 
548 		if (t->t_lwp->lwp_spymaster != NULL) {
549 			void *psaddr = t->t_lwp->lwp_spymaster;
550 #ifdef _ELF32_COMPAT
551 			/*
552 			 * On a 64-bit kernel with 32-bit ELF compatibility,
553 			 * this file is compiled into two different objects:
554 			 * one is compiled normally, and the other is compiled
555 			 * with _ELF32_COMPAT set -- and therefore with a
556 			 * psinfo_t defined to be a psinfo32_t.  However, the
557 			 * psinfo_t denoting our spymaster is always of the
558 			 * native type; if we are in the _ELF32_COMPAT case,
559 			 * we need to explicitly convert it.
560 			 */
561 			if (p->p_model == DATAMODEL_ILP32) {
562 				psinfo_kto32(psaddr, &bigwad->psinfo);
563 				psaddr = &bigwad->psinfo;
564 			}
565 #endif
566 
567 			error = elfnote(vp, &offset, NT_SPYMASTER,
568 			    sizeof (psinfo_t), psaddr, rlimit, credp);
569 			if (error)
570 				goto done;
571 		}
572 	}
573 	ASSERT(nlwp == 0);
574 
575 done:
576 	kmem_free(bigwad, bigsize);
577 	return (error);
578 }
579