1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. 29 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/thread.h> 35 #include <sys/sysmacros.h> 36 #include <sys/signal.h> 37 #include <sys/cred.h> 38 #include <sys/priv.h> 39 #include <sys/user.h> 40 #include <sys/file.h> 41 #include <sys/errno.h> 42 #include <sys/vnode.h> 43 #include <sys/mode.h> 44 #include <sys/vfs.h> 45 #include <sys/mman.h> 46 #include <sys/kmem.h> 47 #include <sys/proc.h> 48 #include <sys/pathname.h> 49 #include <sys/cmn_err.h> 50 #include <sys/systm.h> 51 #include <sys/elf.h> 52 #include <sys/vmsystm.h> 53 #include <sys/debug.h> 54 #include <sys/procfs.h> 55 #include <sys/regset.h> 56 #include <sys/auxv.h> 57 #include <sys/exec.h> 58 #include <sys/prsystm.h> 59 #include <sys/utsname.h> 60 #include <sys/zone.h> 61 #include <vm/as.h> 62 #include <vm/rm.h> 63 #include <sys/modctl.h> 64 #include <sys/systeminfo.h> 65 #include <sys/machelf.h> 66 #include <sys/sunddi.h> 67 #include "elf_impl.h" 68 #if defined(__i386) || defined(__i386_COMPAT) 69 #include <sys/sysi86.h> 70 #endif 71 72 void 73 setup_note_header(Phdr *v, proc_t *p) 74 { 75 int nlwp = p->p_lwpcnt; 76 int nzomb = p->p_zombcnt; 77 int nfd; 78 size_t size; 79 prcred_t *pcrp; 80 uf_info_t *fip; 81 uf_entry_t *ufp; 82 int fd; 83 84 fip = P_FINFO(p); 85 nfd = 0; 86 mutex_enter(&fip->fi_lock); 87 for (fd = 0; fd < fip->fi_nfiles; fd++) { 88 UF_ENTER(ufp, fip, fd); 89 if ((ufp->uf_file != NULL) && (ufp->uf_file->f_count > 0)) 90 nfd++; 91 UF_EXIT(ufp); 92 } 93 mutex_exit(&fip->fi_lock); 94 95 v[0].p_type = PT_NOTE; 96 v[0].p_flags = PF_R; 97 v[0].p_filesz = (sizeof (Note) * (9 + 2 * nlwp + nzomb + nfd)) 98 + roundup(sizeof (psinfo_t), sizeof (Word)) 99 + roundup(sizeof (pstatus_t), sizeof (Word)) 100 + roundup(prgetprivsize(), sizeof (Word)) 101 + roundup(priv_get_implinfo_size(), sizeof (Word)) 102 + roundup(strlen(platform) + 1, sizeof (Word)) 103 + roundup(strlen(p->p_zone->zone_name) + 1, sizeof (Word)) 104 + roundup(__KERN_NAUXV_IMPL * sizeof (aux_entry_t), sizeof (Word)) 105 + roundup(sizeof (utsname), sizeof (Word)) 106 + roundup(sizeof (core_content_t), sizeof (Word)) 107 + (nlwp + nzomb) * roundup(sizeof (lwpsinfo_t), sizeof (Word)) 108 + nlwp * roundup(sizeof (lwpstatus_t), sizeof (Word)) 109 + nfd * roundup(sizeof (prfdinfo_t), sizeof (Word)); 110 111 if (curproc->p_agenttp != NULL) { 112 v[0].p_filesz += sizeof (Note) + 113 roundup(sizeof (psinfo_t), sizeof (Word)); 114 } 115 116 size = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1); 117 pcrp = kmem_alloc(size, KM_SLEEP); 118 prgetcred(p, pcrp); 119 if (pcrp->pr_ngroups != 0) { 120 v[0].p_filesz += sizeof (Note) + roundup(sizeof (prcred_t) + 121 sizeof (gid_t) * (pcrp->pr_ngroups - 1), sizeof (Word)); 122 } else { 123 v[0].p_filesz += sizeof (Note) + 124 roundup(sizeof (prcred_t), sizeof (Word)); 125 } 126 kmem_free(pcrp, size); 127 128 129 #if defined(__i386) || defined(__i386_COMPAT) 130 mutex_enter(&p->p_ldtlock); 131 size = prnldt(p) * sizeof (struct ssd); 132 mutex_exit(&p->p_ldtlock); 133 if (size != 0) 134 v[0].p_filesz += sizeof (Note) + roundup(size, sizeof (Word)); 135 #endif /* __i386 || __i386_COMPAT */ 136 137 if ((size = prhasx(p)? prgetprxregsize(p) : 0) != 0) 138 v[0].p_filesz += nlwp * sizeof (Note) 139 + nlwp * roundup(size, sizeof (Word)); 140 141 #if defined(__sparc) 142 /* 143 * Figure out the number and sizes of register windows. 144 */ 145 { 146 kthread_t *t = p->p_tlist; 147 do { 148 if ((size = prnwindows(ttolwp(t))) != 0) { 149 size = sizeof (gwindows_t) - 150 (SPARC_MAXREGWINDOW - size) * 151 sizeof (struct rwindow); 152 v[0].p_filesz += sizeof (Note) + 153 roundup(size, sizeof (Word)); 154 } 155 } while ((t = t->t_forw) != p->p_tlist); 156 } 157 /* 158 * Space for the Ancillary State Registers. 159 */ 160 if (p->p_model == DATAMODEL_LP64) 161 v[0].p_filesz += nlwp * sizeof (Note) 162 + nlwp * roundup(sizeof (asrset_t), sizeof (Word)); 163 #endif /* __sparc */ 164 } 165 166 int 167 write_elfnotes(proc_t *p, int sig, vnode_t *vp, offset_t offset, 168 rlim64_t rlimit, cred_t *credp, core_content_t content) 169 { 170 union { 171 psinfo_t psinfo; 172 pstatus_t pstatus; 173 lwpsinfo_t lwpsinfo; 174 lwpstatus_t lwpstatus; 175 #if defined(__sparc) 176 gwindows_t gwindows; 177 asrset_t asrset; 178 #endif /* __sparc */ 179 char xregs[1]; 180 aux_entry_t auxv[__KERN_NAUXV_IMPL]; 181 prcred_t pcred; 182 prpriv_t ppriv; 183 priv_impl_info_t prinfo; 184 struct utsname uts; 185 } *bigwad; 186 187 size_t xregsize = prhasx(p)? prgetprxregsize(p) : 0; 188 size_t crsize = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1); 189 size_t psize = prgetprivsize(); 190 size_t bigsize = MAX(psize, MAX(sizeof (*bigwad), 191 MAX(xregsize, crsize))); 192 193 priv_impl_info_t *prii; 194 195 lwpdir_t *ldp; 196 lwpent_t *lep; 197 kthread_t *t; 198 klwp_t *lwp; 199 user_t *up; 200 int i; 201 int nlwp; 202 int nzomb; 203 int error; 204 uchar_t oldsig; 205 uf_info_t *fip; 206 int fd; 207 vnode_t *vroot; 208 209 #if defined(__i386) || defined(__i386_COMPAT) 210 struct ssd *ssd; 211 size_t ssdsize; 212 #endif /* __i386 || __i386_COMPAT */ 213 214 bigsize = MAX(bigsize, priv_get_implinfo_size()); 215 216 bigwad = kmem_alloc(bigsize, KM_SLEEP); 217 218 /* 219 * The order of the elfnote entries should be same here 220 * and in the gcore(1) command. Synchronization is 221 * needed between the kernel and gcore(1). 222 */ 223 224 /* 225 * Get the psinfo, and set the wait status to indicate that a core was 226 * dumped. We have to forge this since p->p_wcode is not set yet. 227 */ 228 mutex_enter(&p->p_lock); 229 prgetpsinfo(p, &bigwad->psinfo); 230 mutex_exit(&p->p_lock); 231 bigwad->psinfo.pr_wstat = wstat(CLD_DUMPED, sig); 232 233 error = elfnote(vp, &offset, NT_PSINFO, sizeof (bigwad->psinfo), 234 (caddr_t)&bigwad->psinfo, rlimit, credp); 235 if (error) 236 goto done; 237 238 /* 239 * Modify t_whystop and lwp_cursig so it appears that the current LWP 240 * is stopped after faulting on the signal that caused the core dump. 241 * As a result, prgetstatus() will record that signal, the saved 242 * lwp_siginfo, and its signal handler in the core file status. We 243 * restore lwp_cursig in case a subsequent signal was received while 244 * dumping core. 245 */ 246 mutex_enter(&p->p_lock); 247 lwp = ttolwp(curthread); 248 249 oldsig = lwp->lwp_cursig; 250 lwp->lwp_cursig = (uchar_t)sig; 251 curthread->t_whystop = PR_FAULTED; 252 253 prgetstatus(p, &bigwad->pstatus, p->p_zone); 254 bigwad->pstatus.pr_lwp.pr_why = 0; 255 256 curthread->t_whystop = 0; 257 lwp->lwp_cursig = oldsig; 258 mutex_exit(&p->p_lock); 259 260 error = elfnote(vp, &offset, NT_PSTATUS, sizeof (bigwad->pstatus), 261 (caddr_t)&bigwad->pstatus, rlimit, credp); 262 if (error) 263 goto done; 264 265 error = elfnote(vp, &offset, NT_PLATFORM, strlen(platform) + 1, 266 platform, rlimit, credp); 267 if (error) 268 goto done; 269 270 up = PTOU(p); 271 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 272 bigwad->auxv[i].a_type = up->u_auxv[i].a_type; 273 bigwad->auxv[i].a_un.a_val = up->u_auxv[i].a_un.a_val; 274 } 275 error = elfnote(vp, &offset, NT_AUXV, sizeof (bigwad->auxv), 276 (caddr_t)bigwad->auxv, rlimit, credp); 277 if (error) 278 goto done; 279 280 bcopy(&utsname, &bigwad->uts, sizeof (struct utsname)); 281 if (!INGLOBALZONE(p)) { 282 bcopy(p->p_zone->zone_nodename, &bigwad->uts.nodename, 283 _SYS_NMLN); 284 } 285 error = elfnote(vp, &offset, NT_UTSNAME, sizeof (struct utsname), 286 (caddr_t)&bigwad->uts, rlimit, credp); 287 if (error) 288 goto done; 289 290 prgetcred(p, &bigwad->pcred); 291 292 if (bigwad->pcred.pr_ngroups != 0) { 293 crsize = sizeof (prcred_t) + 294 sizeof (gid_t) * (bigwad->pcred.pr_ngroups - 1); 295 } else 296 crsize = sizeof (prcred_t); 297 298 error = elfnote(vp, &offset, NT_PRCRED, crsize, 299 (caddr_t)&bigwad->pcred, rlimit, credp); 300 if (error) 301 goto done; 302 303 error = elfnote(vp, &offset, NT_CONTENT, sizeof (core_content_t), 304 (caddr_t)&content, rlimit, credp); 305 if (error) 306 goto done; 307 308 prgetpriv(p, &bigwad->ppriv); 309 310 error = elfnote(vp, &offset, NT_PRPRIV, psize, 311 (caddr_t)&bigwad->ppriv, rlimit, credp); 312 if (error) 313 goto done; 314 315 prii = priv_hold_implinfo(); 316 error = elfnote(vp, &offset, NT_PRPRIVINFO, priv_get_implinfo_size(), 317 (caddr_t)prii, rlimit, credp); 318 priv_release_implinfo(); 319 if (error) 320 goto done; 321 322 /* zone can't go away as long as process exists */ 323 error = elfnote(vp, &offset, NT_ZONENAME, 324 strlen(p->p_zone->zone_name) + 1, p->p_zone->zone_name, 325 rlimit, credp); 326 if (error) 327 goto done; 328 329 330 /* open file table */ 331 vroot = PTOU(p)->u_rdir; 332 if (vroot == NULL) 333 vroot = rootdir; 334 335 VN_HOLD(vroot); 336 337 fip = P_FINFO(p); 338 339 for (fd = 0; fd < fip->fi_nfiles; fd++) { 340 uf_entry_t *ufp; 341 vnode_t *fvp; 342 struct file *fp; 343 vattr_t vattr; 344 prfdinfo_t fdinfo; 345 346 bzero(&fdinfo, sizeof (fdinfo)); 347 348 mutex_enter(&fip->fi_lock); 349 UF_ENTER(ufp, fip, fd); 350 if (((fp = ufp->uf_file) == NULL) || (fp->f_count < 1)) { 351 UF_EXIT(ufp); 352 mutex_exit(&fip->fi_lock); 353 continue; 354 } 355 356 fdinfo.pr_fd = fd; 357 fdinfo.pr_fdflags = ufp->uf_flag; 358 fdinfo.pr_fileflags = fp->f_flag2; 359 fdinfo.pr_fileflags <<= 16; 360 fdinfo.pr_fileflags |= fp->f_flag; 361 if ((fdinfo.pr_fileflags & (FSEARCH | FEXEC)) == 0) 362 fdinfo.pr_fileflags += FOPEN; 363 fdinfo.pr_offset = fp->f_offset; 364 365 366 fvp = fp->f_vnode; 367 VN_HOLD(fvp); 368 UF_EXIT(ufp); 369 mutex_exit(&fip->fi_lock); 370 371 /* 372 * There are some vnodes that have no corresponding 373 * path. Its reasonable for this to fail, in which 374 * case the path will remain an empty string. 375 */ 376 (void) vnodetopath(vroot, fvp, fdinfo.pr_path, 377 sizeof (fdinfo.pr_path), credp); 378 379 error = VOP_GETATTR(fvp, &vattr, 0, credp, NULL); 380 if (error != 0) { 381 VN_RELE(fvp); 382 VN_RELE(vroot); 383 goto done; 384 } 385 386 if (fvp->v_type == VSOCK) 387 fdinfo.pr_fileflags |= sock_getfasync(fvp); 388 389 VN_RELE(fvp); 390 391 /* 392 * This logic mirrors fstat(), which we cannot use 393 * directly, as it calls copyout(). 394 */ 395 fdinfo.pr_major = getmajor(vattr.va_fsid); 396 fdinfo.pr_minor = getminor(vattr.va_fsid); 397 fdinfo.pr_ino = (ino64_t)vattr.va_nodeid; 398 fdinfo.pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode; 399 fdinfo.pr_uid = vattr.va_uid; 400 fdinfo.pr_gid = vattr.va_gid; 401 fdinfo.pr_rmajor = getmajor(vattr.va_rdev); 402 fdinfo.pr_rminor = getminor(vattr.va_rdev); 403 fdinfo.pr_size = (off64_t)vattr.va_size; 404 405 error = elfnote(vp, &offset, NT_FDINFO, 406 sizeof (fdinfo), &fdinfo, rlimit, credp); 407 if (error) { 408 goto done; 409 } 410 } 411 412 #if defined(__i386) || defined(__i386_COMPAT) 413 mutex_enter(&p->p_ldtlock); 414 ssdsize = prnldt(p) * sizeof (struct ssd); 415 if (ssdsize != 0) { 416 ssd = kmem_alloc(ssdsize, KM_SLEEP); 417 prgetldt(p, ssd); 418 error = elfnote(vp, &offset, NT_LDT, ssdsize, 419 (caddr_t)ssd, rlimit, credp); 420 kmem_free(ssd, ssdsize); 421 } 422 mutex_exit(&p->p_ldtlock); 423 if (error) 424 goto done; 425 #endif /* __i386 || defined(__i386_COMPAT) */ 426 427 nlwp = p->p_lwpcnt; 428 nzomb = p->p_zombcnt; 429 /* for each entry in the lwp directory ... */ 430 for (ldp = p->p_lwpdir; nlwp + nzomb != 0; ldp++) { 431 432 if ((lep = ldp->ld_entry) == NULL) /* empty slot */ 433 continue; 434 435 if ((t = lep->le_thread) != NULL) { /* active lwp */ 436 ASSERT(nlwp != 0); 437 nlwp--; 438 lwp = ttolwp(t); 439 mutex_enter(&p->p_lock); 440 prgetlwpsinfo(t, &bigwad->lwpsinfo); 441 mutex_exit(&p->p_lock); 442 } else { /* zombie lwp */ 443 ASSERT(nzomb != 0); 444 nzomb--; 445 bzero(&bigwad->lwpsinfo, sizeof (bigwad->lwpsinfo)); 446 bigwad->lwpsinfo.pr_lwpid = lep->le_lwpid; 447 bigwad->lwpsinfo.pr_state = SZOMB; 448 bigwad->lwpsinfo.pr_sname = 'Z'; 449 bigwad->lwpsinfo.pr_start.tv_sec = lep->le_start; 450 } 451 error = elfnote(vp, &offset, NT_LWPSINFO, 452 sizeof (bigwad->lwpsinfo), (caddr_t)&bigwad->lwpsinfo, 453 rlimit, credp); 454 if (error) 455 goto done; 456 if (t == NULL) /* nothing more to do for a zombie */ 457 continue; 458 459 mutex_enter(&p->p_lock); 460 if (t == curthread) { 461 /* 462 * Modify t_whystop and lwp_cursig so it appears that 463 * the current LWP is stopped after faulting on the 464 * signal that caused the core dump. As a result, 465 * prgetlwpstatus() will record that signal, the saved 466 * lwp_siginfo, and its signal handler in the core file 467 * status. We restore lwp_cursig in case a subsequent 468 * signal was received while dumping core. 469 */ 470 oldsig = lwp->lwp_cursig; 471 lwp->lwp_cursig = (uchar_t)sig; 472 t->t_whystop = PR_FAULTED; 473 474 prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone); 475 bigwad->lwpstatus.pr_why = 0; 476 477 t->t_whystop = 0; 478 lwp->lwp_cursig = oldsig; 479 } else { 480 prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone); 481 } 482 mutex_exit(&p->p_lock); 483 error = elfnote(vp, &offset, NT_LWPSTATUS, 484 sizeof (bigwad->lwpstatus), (caddr_t)&bigwad->lwpstatus, 485 rlimit, credp); 486 if (error) 487 goto done; 488 489 #if defined(__sparc) 490 /* 491 * Unspilled SPARC register windows. 492 */ 493 { 494 size_t size = prnwindows(lwp); 495 496 if (size != 0) { 497 size = sizeof (gwindows_t) - 498 (SPARC_MAXREGWINDOW - size) * 499 sizeof (struct rwindow); 500 prgetwindows(lwp, &bigwad->gwindows); 501 error = elfnote(vp, &offset, NT_GWINDOWS, 502 size, (caddr_t)&bigwad->gwindows, 503 rlimit, credp); 504 if (error) 505 goto done; 506 } 507 } 508 /* 509 * Ancillary State Registers. 510 */ 511 if (p->p_model == DATAMODEL_LP64) { 512 prgetasregs(lwp, bigwad->asrset); 513 error = elfnote(vp, &offset, NT_ASRS, 514 sizeof (asrset_t), (caddr_t)bigwad->asrset, 515 rlimit, credp); 516 if (error) 517 goto done; 518 } 519 #endif /* __sparc */ 520 521 if (xregsize) { 522 prgetprxregs(lwp, bigwad->xregs); 523 error = elfnote(vp, &offset, NT_PRXREG, 524 xregsize, bigwad->xregs, rlimit, credp); 525 if (error) 526 goto done; 527 } 528 529 if (t->t_lwp->lwp_spymaster != NULL) { 530 void *psaddr = t->t_lwp->lwp_spymaster; 531 #ifdef _ELF32_COMPAT 532 /* 533 * On a 64-bit kernel with 32-bit ELF compatibility, 534 * this file is compiled into two different objects: 535 * one is compiled normally, and the other is compiled 536 * with _ELF32_COMPAT set -- and therefore with a 537 * psinfo_t defined to be a psinfo32_t. However, the 538 * psinfo_t denoting our spymaster is always of the 539 * native type; if we are in the _ELF32_COMPAT case, 540 * we need to explicitly convert it. 541 */ 542 if (p->p_model == DATAMODEL_ILP32) { 543 psinfo_kto32(psaddr, &bigwad->psinfo); 544 psaddr = &bigwad->psinfo; 545 } 546 #endif 547 548 error = elfnote(vp, &offset, NT_SPYMASTER, 549 sizeof (psinfo_t), psaddr, rlimit, credp); 550 if (error) 551 goto done; 552 } 553 } 554 ASSERT(nlwp == 0); 555 556 done: 557 kmem_free(bigwad, bigsize); 558 return (error); 559 } 560