xref: /illumos-gate/usr/src/uts/common/os/core.c (revision efd4c9b63ad77503c101fc6c2ed8ba96c9d52964)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/time.h>
32 #include <sys/sysmacros.h>
33 #include <sys/proc.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/user.h>
37 #include <sys/utsname.h>
38 #include <sys/errno.h>
39 #include <sys/signal.h>
40 #include <sys/siginfo.h>
41 #include <sys/fault.h>
42 #include <sys/syscall.h>
43 #include <sys/ucontext.h>
44 #include <sys/prsystm.h>
45 #include <sys/vnode.h>
46 #include <sys/var.h>
47 #include <sys/file.h>
48 #include <sys/pathname.h>
49 #include <sys/vfs.h>
50 #include <sys/exec.h>
51 #include <sys/debug.h>
52 #include <sys/stack.h>
53 #include <sys/kmem.h>
54 #include <sys/schedctl.h>
55 #include <sys/core.h>
56 #include <sys/corectl.h>
57 #include <sys/cmn_err.h>
58 #include <vm/as.h>
59 #include <sys/rctl.h>
60 #include <sys/nbmlock.h>
61 #include <sys/stat.h>
62 #include <sys/zone.h>
63 #include <sys/contract/process_impl.h>
64 #include <sys/ddi.h>
65 
66 /*
67  * Processes running within a zone potentially dump core in 3 locations,
68  * based on the per-process, per-zone, and the global zone's core settings.
69  *
70  * Per-zone and global zone settings are often referred to as "global"
71  * settings since they apply to the system (or zone) as a whole, as
72  * opposed to a particular process.
73  */
74 enum core_types {
75 	CORE_PROC,	/* Use per-process settings */
76 	CORE_ZONE,	/* Use per-zone settings */
77 	CORE_GLOBAL	/* Use global zone settings */
78 };
79 
80 /*
81  * Log information about "global" core dumps to syslog.
82  */
83 static void
84 core_log(struct core_globals *cg, int error, const char *why, const char *path,
85     zoneid_t zoneid)
86 {
87 	proc_t *p = curproc;
88 	pid_t pid = p->p_pid;
89 	char *fn = PTOU(p)->u_comm;
90 
91 	if (!(cg->core_options & CC_GLOBAL_LOG))
92 		return;
93 
94 	if (path == NULL)
95 		zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s", fn, pid, why);
96 	else if (error == 0)
97 		zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s: %s", fn, pid,
98 		    why, path);
99 	else
100 		zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s, errno=%d: %s",
101 		    fn, pid, why, error, path);
102 }
103 
104 /*
105  * Private version of vn_remove().
106  * Refuse to unlink a directory or an unwritable file.
107  * Also allow the process to access files normally inaccessible due to
108  * chroot(2) or Zone limitations.
109  */
110 static int
111 remove_core_file(char *fp, enum core_types core_type)
112 {
113 	vnode_t *vp = NULL;		/* entry vnode */
114 	vnode_t *dvp;			/* ptr to parent dir vnode */
115 	vfs_t *dvfsp;
116 	int error;
117 	int in_crit = 0;
118 	pathname_t pn;			/* name of entry */
119 	vnode_t *startvp, *rootvp;
120 
121 	if ((error = pn_get(fp, UIO_SYSSPACE, &pn)) != 0)
122 		return (error);
123 	/*
124 	 * Determine what rootvp to use.
125 	 */
126 	if (core_type == CORE_PROC) {
127 		rootvp = (PTOU(curproc)->u_rdir == NULL ?
128 		    curproc->p_zone->zone_rootvp : PTOU(curproc)->u_rdir);
129 		startvp = (fp[0] == '/' ? rootvp : PTOU(curproc)->u_cdir);
130 	} else if (core_type == CORE_ZONE) {
131 		startvp = curproc->p_zone->zone_rootvp;
132 		rootvp = curproc->p_zone->zone_rootvp;
133 	} else {
134 		ASSERT(core_type == CORE_GLOBAL);
135 		startvp = rootdir;
136 		rootvp = rootdir;
137 	}
138 	VN_HOLD(startvp);
139 	if (rootvp != rootdir)
140 		VN_HOLD(rootvp);
141 	if ((error = lookuppnvp(&pn, NULL, NO_FOLLOW, &dvp, &vp, rootvp,
142 	    startvp, CRED())) != 0) {
143 		pn_free(&pn);
144 		return (error);
145 	}
146 	/*
147 	 * Succeed if there is no file.
148 	 * Fail if the file is not a regular file.
149 	 * Fail if the filesystem is mounted read-only.
150 	 * Fail if the file is not writeable.
151 	 * Fail if the file has NBMAND share reservations.
152 	 */
153 	if (vp == NULL)
154 		error = 0;
155 	else if (vp->v_type != VREG)
156 		error = EACCES;
157 	else if ((dvfsp = dvp->v_vfsp) != NULL &&
158 	    (dvfsp->vfs_flag & VFS_RDONLY))
159 		error = EROFS;
160 	else if ((error = VOP_ACCESS(vp, VWRITE, 0, CRED(), NULL)) == 0) {
161 		if (nbl_need_check(vp)) {
162 			nbl_start_crit(vp, RW_READER);
163 			in_crit = 1;
164 			if (nbl_share_conflict(vp, NBL_REMOVE, NULL)) {
165 				error = EACCES;
166 			}
167 		}
168 		if (!error) {
169 			error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
170 		}
171 	}
172 
173 	pn_free(&pn);
174 	if (vp != NULL) {
175 		if (in_crit)
176 			nbl_end_crit(vp);
177 		VN_RELE(vp);
178 	}
179 	VN_RELE(dvp);
180 	return (error);
181 }
182 
183 /*
184  * Create the core file in a location that may be normally inaccessible due
185  * to chroot(2) or Zone limitations.
186  */
187 static int
188 create_core_file(char *fp, enum core_types core_type, vnode_t **vpp)
189 {
190 	int error;
191 	mode_t perms = (S_IRUSR | S_IWUSR);
192 	pathname_t pn;
193 	char *file;
194 	vnode_t *vp;
195 	vnode_t *dvp;
196 	vattr_t vattr;
197 	cred_t *credp = CRED();
198 
199 	if (core_type == CORE_PROC) {
200 		file = fp;
201 		dvp = NULL;	/* regular lookup */
202 	} else {
203 		vnode_t *startvp, *rootvp;
204 
205 		ASSERT(core_type == CORE_ZONE || core_type == CORE_GLOBAL);
206 		/*
207 		 * This is tricky because we want to dump the core in
208 		 * a location which may normally be inaccessible
209 		 * to us (due to chroot(2) limitations, or zone
210 		 * membership), and hence need to overcome u_rdir
211 		 * restrictions.  The basic idea is to separate
212 		 * the path from the filename, lookup the
213 		 * pathname separately (starting from the global
214 		 * zone's root directory), and then open the
215 		 * file starting at the directory vnode.
216 		 */
217 		if (error = pn_get(fp, UIO_SYSSPACE, &pn))
218 			return (error);
219 
220 		if (core_type == CORE_ZONE) {
221 			startvp = rootvp = curproc->p_zone->zone_rootvp;
222 		} else {
223 			startvp = rootvp = rootdir;
224 		}
225 		/*
226 		 * rootvp and startvp will be VN_RELE()'d by lookuppnvp() if
227 		 * necessary.
228 		 */
229 		VN_HOLD(startvp);
230 		if (rootvp != rootdir)
231 			VN_HOLD(rootvp);
232 		/*
233 		 * Do a lookup on the full path, ignoring the actual file, but
234 		 * finding the vnode for the directory.  It's OK if the file
235 		 * doesn't exist -- it most likely won't since we just removed
236 		 * it.
237 		 */
238 		error = lookuppnvp(&pn, NULL, FOLLOW, &dvp, NULLVPP,
239 		    rootvp, startvp, credp);
240 		pn_free(&pn);
241 		if (error != 0)
242 			return (error);
243 		ASSERT(dvp != NULL);
244 		/*
245 		 * Now find the final component in the path (ie, the name of
246 		 * the core file).
247 		 */
248 		if (error = pn_get(fp, UIO_SYSSPACE, &pn)) {
249 			VN_RELE(dvp);
250 			return (error);
251 		}
252 		pn_setlast(&pn);
253 		file = pn.pn_path;
254 	}
255 	error =  vn_openat(file, UIO_SYSSPACE,
256 	    FWRITE | FTRUNC | FEXCL | FCREAT | FOFFMAX,
257 	    perms, &vp, CRCREAT, PTOU(curproc)->u_cmask, dvp, -1);
258 	if (core_type != CORE_PROC) {
259 		VN_RELE(dvp);
260 		pn_free(&pn);
261 	}
262 	/*
263 	 * Don't dump a core file owned by "nobody".
264 	 */
265 	vattr.va_mask = AT_UID;
266 	if (error == 0 &&
267 	    (VOP_GETATTR(vp, &vattr, 0, credp, NULL) != 0 ||
268 	    vattr.va_uid != crgetuid(credp))) {
269 		(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0,
270 		    credp, NULL);
271 		VN_RELE(vp);
272 		(void) remove_core_file(fp, core_type);
273 		error = EACCES;
274 	}
275 	*vpp = vp;
276 	return (error);
277 }
278 
279 /*
280  * Install the specified held cred into the process, and return a pointer to
281  * the held cred which was previously the value of p->p_cred.
282  */
283 static cred_t *
284 set_cred(proc_t *p, cred_t *newcr)
285 {
286 	cred_t *oldcr;
287 	uid_t olduid, newuid;
288 
289 	/*
290 	 * Place a hold on the existing cred, and then install the new
291 	 * cred into the proc structure.
292 	 */
293 	mutex_enter(&p->p_crlock);
294 	oldcr = p->p_cred;
295 	crhold(oldcr);
296 	p->p_cred = newcr;
297 	mutex_exit(&p->p_crlock);
298 
299 	ASSERT(crgetzoneid(oldcr) == crgetzoneid(newcr));
300 
301 	/*
302 	 * If the real uid is changing, keep the per-user process
303 	 * counts accurate.
304 	 */
305 	olduid = crgetruid(oldcr);
306 	newuid = crgetruid(newcr);
307 	if (olduid != newuid) {
308 		zoneid_t zoneid = crgetzoneid(newcr);
309 
310 		mutex_enter(&pidlock);
311 		upcount_dec(olduid, zoneid);
312 		upcount_inc(newuid, zoneid);
313 		mutex_exit(&pidlock);
314 	}
315 
316 	/*
317 	 * Broadcast the new cred to all the other threads.  The old
318 	 * cred can be safely returned because we have a hold on it.
319 	 */
320 	crset(p, newcr);
321 	return (oldcr);
322 }
323 
324 static int
325 do_core(char *fp, int sig, enum core_types core_type, struct core_globals *cg)
326 {
327 	proc_t *p = curproc;
328 	cred_t *credp = CRED();
329 	rlim64_t rlimit;
330 	vnode_t *vp;
331 	int error = 0;
332 	struct execsw *eswp;
333 	cred_t *ocredp = NULL;
334 	int is_setid = 0;
335 	core_content_t content;
336 	uid_t uid;
337 	gid_t gid;
338 
339 	if (core_type == CORE_GLOBAL || core_type == CORE_ZONE) {
340 		mutex_enter(&cg->core_lock);
341 		content = cg->core_content;
342 		mutex_exit(&cg->core_lock);
343 		rlimit = cg->core_rlimit;
344 	} else {
345 		mutex_enter(&p->p_lock);
346 		rlimit = rctl_enforced_value(rctlproc_legacy[RLIMIT_CORE],
347 		    p->p_rctls, p);
348 		content = corectl_content_value(p->p_content);
349 		mutex_exit(&p->p_lock);
350 	}
351 
352 	if (rlimit == 0)
353 		return (EFBIG);
354 
355 	/*
356 	 * If SNOCD is set, or if the effective, real, and saved ids do
357 	 * not match up, no one but a privileged user is allowed to view
358 	 * this core file.  Set the credentials and the owner to root.
359 	 */
360 	if ((p->p_flag & SNOCD) ||
361 	    (uid = crgetuid(credp)) != crgetruid(credp) ||
362 	    uid != crgetsuid(credp) ||
363 	    (gid = crgetgid(credp)) != crgetrgid(credp) ||
364 	    gid != crgetsgid(credp)) {
365 		/*
366 		 * Because this is insecure against certain forms of file
367 		 * system attack, do it only if set-id core files have been
368 		 * enabled via corectl(CC_GLOBAL_SETID | CC_PROCESS_SETID).
369 		 */
370 		if (((core_type == CORE_GLOBAL || core_type == CORE_ZONE) &&
371 		    !(cg->core_options & CC_GLOBAL_SETID)) ||
372 		    (core_type == CORE_PROC &&
373 		    !(cg->core_options & CC_PROCESS_SETID)))
374 			return (ENOTSUP);
375 
376 		is_setid = 1;
377 	}
378 
379 	/*
380 	 * If we are doing a "global" core dump or a set-id core dump,
381 	 * use kcred to do the dumping.
382 	 */
383 	if (core_type == CORE_GLOBAL || core_type == CORE_ZONE || is_setid) {
384 		/*
385 		 * Use the zone's "kcred" to prevent privilege
386 		 * escalation.
387 		 */
388 		credp = zone_get_kcred(getzoneid());
389 		ASSERT(credp != NULL);
390 		ocredp = set_cred(p, credp);
391 	}
392 
393 	/*
394 	 * First remove any existing core file, then
395 	 * open the new core file with (O_EXCL|O_CREAT).
396 	 *
397 	 * The reasons for doing this are manifold:
398 	 *
399 	 * For security reasons, we don't want root processes
400 	 * to dump core through a symlink because that would
401 	 * allow a malicious user to clobber any file on
402 	 * the system if s/he could convince a root process,
403 	 * perhaps a set-uid root process that s/he started,
404 	 * to dump core in a directory writable by that user.
405 	 * Similar security reasons apply to hard links.
406 	 * For symmetry we do this unconditionally, not
407 	 * just for root processes.
408 	 *
409 	 * If the process has the core file mmap()d into the
410 	 * address space, we would be modifying the address
411 	 * space that we are trying to dump if we did not first
412 	 * remove the core file.  (The command "file core"
413 	 * is the canonical example of this possibility.)
414 	 *
415 	 * Opening the core file with O_EXCL|O_CREAT ensures than
416 	 * two concurrent core dumps don't clobber each other.
417 	 * One is bound to lose; we don't want to make both lose.
418 	 */
419 	if ((error = remove_core_file(fp, core_type)) == 0) {
420 		error = create_core_file(fp, core_type, &vp);
421 	}
422 
423 	/*
424 	 * Now that vn_open is complete, reset the process's credentials if
425 	 * we changed them, and make 'credp' point to kcred used
426 	 * above.  We use 'credp' to do i/o on the core file below, but leave
427 	 * p->p_cred set to the original credential to allow the core file
428 	 * to record this information.
429 	 */
430 	if (ocredp != NULL)
431 		credp = set_cred(p, ocredp);
432 
433 	if (error == 0) {
434 		int closerr;
435 #if defined(__sparc)
436 		(void) flush_user_windows_to_stack(NULL);
437 #endif
438 		if ((eswp = PTOU(curproc)->u_execsw) == NULL ||
439 		    (eswp = findexec_by_magic(eswp->exec_magic)) == NULL) {
440 			error = ENOSYS;
441 		} else {
442 			error = eswp->exec_core(vp, p, credp, rlimit, sig,
443 			    content);
444 			rw_exit(eswp->exec_lock);
445 		}
446 
447 		closerr = VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, credp, NULL);
448 		VN_RELE(vp);
449 		if (error == 0)
450 			error = closerr;
451 	}
452 
453 	if (ocredp != NULL)
454 		crfree(credp);
455 
456 	return (error);
457 }
458 
459 /*
460  * Convert a core name pattern to a pathname.
461  */
462 static int
463 expand_string(const char *pat, char *fp, int size, cred_t *cr)
464 {
465 	proc_t *p = curproc;
466 	char buf[24];
467 	int len, i;
468 	char *s;
469 	char c;
470 
471 	while ((c = *pat++) != '\0') {
472 		if (size < 2)
473 			return (ENAMETOOLONG);
474 		if (c != '%') {
475 			size--;
476 			*fp++ = c;
477 			continue;
478 		}
479 		if ((c = *pat++) == '\0') {
480 			size--;
481 			*fp++ = '%';
482 			break;
483 		}
484 		switch (c) {
485 		case 'p':	/* pid */
486 			(void) sprintf((s = buf), "%d", p->p_pid);
487 			break;
488 		case 'u':	/* effective uid */
489 			(void) sprintf((s = buf), "%u", crgetuid(p->p_cred));
490 			break;
491 		case 'g':	/* effective gid */
492 			(void) sprintf((s = buf), "%u", crgetgid(p->p_cred));
493 			break;
494 		case 'f':	/* exec'd filename */
495 			s = PTOU(p)->u_comm;
496 			break;
497 		case 'd':	/* exec'd dirname */
498 			/*
499 			 * Even if pathname caching is disabled, we should
500 			 * be able to lookup the pathname for a directory.
501 			 */
502 			if (p->p_execdir != NULL && vnodetopath(NULL,
503 			    p->p_execdir, fp, size, cr) == 0) {
504 				len = (int)strlen(fp);
505 				ASSERT(len < size);
506 				ASSERT(len >= 1);
507 				ASSERT(fp[0] == '/');
508 
509 				/*
510 				 * Strip off the leading slash.
511 				 */
512 				for (i = 0; i < len; i++) {
513 					fp[i] = fp[i + 1];
514 				}
515 
516 				len--;
517 
518 				size -= len;
519 				fp += len;
520 			} else {
521 				*fp = '\0';
522 			}
523 
524 			continue;
525 		case 'n':	/* system nodename */
526 			s = uts_nodename();
527 			break;
528 		case 'm':	/* machine (sun4u, etc) */
529 			s = utsname.machine;
530 			break;
531 		case 't':	/* decimal value of time(2) */
532 			(void) sprintf((s = buf), "%ld", gethrestime_sec());
533 			break;
534 		case 'z':
535 			s = p->p_zone->zone_name;
536 			break;
537 		case '%':
538 			(void) strcpy((s = buf), "%");
539 			break;
540 		default:
541 			s = buf;
542 			buf[0] = '%';
543 			buf[1] = c;
544 			buf[2] = '\0';
545 			break;
546 		}
547 		len = (int)strlen(s);
548 		if ((size -= len) <= 0)
549 			return (ENAMETOOLONG);
550 		(void) strcpy(fp, s);
551 		fp += len;
552 	}
553 
554 	*fp = '\0';
555 	return (0);
556 }
557 
558 static int
559 dump_one_core(int sig, rlim64_t rlimit, enum core_types core_type,
560     struct core_globals *cg, char **name)
561 {
562 	refstr_t *rp;
563 	proc_t *p = curproc;
564 	zoneid_t zoneid;
565 	int error;
566 	char *fp;
567 	cred_t *cr;
568 
569 	ASSERT(core_type == CORE_ZONE || core_type == CORE_GLOBAL);
570 	zoneid = (core_type == CORE_ZONE ? getzoneid() : GLOBAL_ZONEID);
571 
572 	mutex_enter(&cg->core_lock);
573 	if ((rp = cg->core_file) != NULL)
574 		refstr_hold(rp);
575 	mutex_exit(&cg->core_lock);
576 	if (rp == NULL) {
577 		core_log(cg, 0, "no global core file pattern exists", NULL,
578 		    zoneid);
579 		return (1);	/* core file not generated */
580 	}
581 	fp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
582 	cr = zone_get_kcred(getzoneid());
583 	error = expand_string(refstr_value(rp), fp, MAXPATHLEN, cr);
584 	crfree(cr);
585 	if (error != 0) {
586 		core_log(cg, 0, "global core file pattern too long",
587 		    refstr_value(rp), zoneid);
588 	} else if ((error = do_core(fp, sig, core_type, cg)) == 0) {
589 		core_log(cg, 0, "core dumped", fp, zoneid);
590 	} else if (error == ENOTSUP) {
591 		core_log(cg, 0, "setid process, core not dumped", fp, zoneid);
592 	} else if (error == ENOSPC) {
593 		core_log(cg, 0, "no space left on device, core truncated",
594 		    fp, zoneid);
595 	} else if (error == EFBIG) {
596 		if (rlimit == 0)
597 			core_log(cg, 0, "core rlimit is zero, core not dumped",
598 			    fp, zoneid);
599 		else
600 			core_log(cg, 0, "core rlimit exceeded, core truncated",
601 			    fp, zoneid);
602 		/*
603 		 * In addition to the core result logging, we
604 		 * may also have explicit actions defined on
605 		 * core file size violations via the resource
606 		 * control framework.
607 		 */
608 		mutex_enter(&p->p_lock);
609 		(void) rctl_action(rctlproc_legacy[RLIMIT_CORE],
610 		    p->p_rctls, p, RCA_SAFE);
611 		mutex_exit(&p->p_lock);
612 	} else {
613 		core_log(cg, error, "core dump failed", fp, zoneid);
614 	}
615 	refstr_rele(rp);
616 	if (name != NULL)
617 		*name = fp;
618 	else
619 		kmem_free(fp, MAXPATHLEN);
620 	return (error);
621 }
622 
623 int
624 core(int sig, int ext)
625 {
626 	proc_t *p = curproc;
627 	klwp_t *lwp = ttolwp(curthread);
628 	refstr_t *rp;
629 	char *fp_process = NULL, *fp_global = NULL, *fp_zone = NULL;
630 	int error1 = 1;
631 	int error2 = 1;
632 	int error3 = 1;
633 	k_sigset_t sigmask;
634 	k_sigset_t sighold;
635 	rlim64_t rlimit;
636 	struct core_globals *my_cg, *global_cg;
637 
638 	global_cg = zone_getspecific(core_zone_key, global_zone);
639 	ASSERT(global_cg != NULL);
640 
641 	my_cg = zone_getspecific(core_zone_key, curproc->p_zone);
642 	ASSERT(my_cg != NULL);
643 
644 	/* core files suppressed? */
645 	if (!(my_cg->core_options & (CC_PROCESS_PATH|CC_GLOBAL_PATH)) &&
646 	    !(global_cg->core_options & CC_GLOBAL_PATH)) {
647 		if (!ext && p->p_ct_process != NULL)
648 			contract_process_core(p->p_ct_process, p, sig,
649 			    NULL, NULL, NULL);
650 		return (1);
651 	}
652 
653 	/*
654 	 * Block all signals except SIGHUP, SIGINT, SIGKILL, and SIGTERM.
655 	 * These signals are allowed to interrupt the core dump.
656 	 * SIGQUIT is not allowed because it is supposed to make a core.
657 	 * Additionally, get current limit on core file size for handling later
658 	 * error reporting.
659 	 */
660 	mutex_enter(&p->p_lock);
661 
662 	p->p_flag |= SDOCORE;
663 	schedctl_finish_sigblock(curthread);
664 	sigmask = curthread->t_hold;	/* remember for later */
665 	sigfillset(&sighold);
666 	if (!sigismember(&sigmask, SIGHUP))
667 		sigdelset(&sighold, SIGHUP);
668 	if (!sigismember(&sigmask, SIGINT))
669 		sigdelset(&sighold, SIGINT);
670 	if (!sigismember(&sigmask, SIGKILL))
671 		sigdelset(&sighold, SIGKILL);
672 	if (!sigismember(&sigmask, SIGTERM))
673 		sigdelset(&sighold, SIGTERM);
674 	curthread->t_hold = sighold;
675 
676 	rlimit = rctl_enforced_value(rctlproc_legacy[RLIMIT_CORE], p->p_rctls,
677 	    p);
678 
679 	mutex_exit(&p->p_lock);
680 
681 	/*
682 	 * Undo any watchpoints.
683 	 */
684 	pr_free_watched_pages(p);
685 
686 	/*
687 	 * The presence of a current signal prevents file i/o
688 	 * from succeeding over a network.  We copy the current
689 	 * signal information to the side and cancel the current
690 	 * signal so that the core dump will succeed.
691 	 */
692 	ASSERT(lwp->lwp_cursig == sig);
693 	lwp->lwp_cursig = 0;
694 	lwp->lwp_extsig = 0;
695 	if (lwp->lwp_curinfo == NULL) {
696 		bzero(&lwp->lwp_siginfo, sizeof (k_siginfo_t));
697 		lwp->lwp_siginfo.si_signo = sig;
698 		lwp->lwp_siginfo.si_code = SI_NOINFO;
699 	} else {
700 		bcopy(&lwp->lwp_curinfo->sq_info,
701 		    &lwp->lwp_siginfo, sizeof (k_siginfo_t));
702 		siginfofree(lwp->lwp_curinfo);
703 		lwp->lwp_curinfo = NULL;
704 	}
705 
706 	/*
707 	 * Convert the core file name patterns into path names
708 	 * and call do_core() to write the core files.
709 	 */
710 
711 	if (my_cg->core_options & CC_PROCESS_PATH) {
712 		mutex_enter(&p->p_lock);
713 		if (p->p_corefile != NULL)
714 			rp = corectl_path_value(p->p_corefile);
715 		else
716 			rp = NULL;
717 		mutex_exit(&p->p_lock);
718 		if (rp != NULL) {
719 			fp_process = kmem_alloc(MAXPATHLEN, KM_SLEEP);
720 			error1 = expand_string(refstr_value(rp),
721 			    fp_process, MAXPATHLEN, p->p_cred);
722 			if (error1 == 0)
723 				error1 = do_core(fp_process, sig, CORE_PROC,
724 				    my_cg);
725 			refstr_rele(rp);
726 		}
727 	}
728 
729 	if (my_cg->core_options & CC_GLOBAL_PATH)
730 		error2 = dump_one_core(sig, rlimit, CORE_ZONE, my_cg,
731 		    &fp_global);
732 	if (global_cg != my_cg && (global_cg->core_options & CC_GLOBAL_PATH))
733 		error3 = dump_one_core(sig, rlimit, CORE_GLOBAL, global_cg,
734 		    &fp_zone);
735 
736 	/*
737 	 * Restore the signal hold mask.
738 	 */
739 	mutex_enter(&p->p_lock);
740 	curthread->t_hold = sigmask;
741 	mutex_exit(&p->p_lock);
742 
743 	if (!ext && p->p_ct_process != NULL)
744 		contract_process_core(p->p_ct_process, p, sig,
745 		    error1 == 0 ? fp_process : NULL,
746 		    error2 == 0 ? fp_global : NULL,
747 		    error3 == 0 ? fp_zone : NULL);
748 
749 	if (fp_process != NULL)
750 		kmem_free(fp_process, MAXPATHLEN);
751 	if (fp_global != NULL)
752 		kmem_free(fp_global, MAXPATHLEN);
753 	if (fp_zone != NULL)
754 		kmem_free(fp_zone, MAXPATHLEN);
755 
756 	/*
757 	 * Return non-zero if no core file was created.
758 	 */
759 	return (error1 != 0 && error2 != 0 && error3 != 0);
760 }
761 
762 /*
763  * Maximum chunk size for dumping core files,
764  * size in pages, patchable in /etc/system
765  */
766 uint_t	core_chunk = 32;
767 
768 /*
769  * The delay between core_write() calls, in microseconds.  The default
770  * matches one "normal" clock tick, or 10 milliseconds.
771  */
772 clock_t	core_delay_usec = 10000;
773 
774 /*
775  * Common code to core dump process memory.  The core_seg routine does i/o
776  * using core_write() below, and so it has the same failure semantics.
777  */
778 int
779 core_seg(proc_t *p, vnode_t *vp, offset_t offset, caddr_t addr, size_t size,
780     rlim64_t rlimit, cred_t *credp)
781 {
782 	caddr_t eaddr;
783 	caddr_t base;
784 	size_t len;
785 	int err = 0;
786 
787 	eaddr = addr + size;
788 	for (base = addr; base < eaddr; base += len) {
789 		len = eaddr - base;
790 		if (as_memory(p->p_as, &base, &len) != 0)
791 			return (0);
792 		/*
793 		 * Reduce len to a reasonable value so that we don't
794 		 * overwhelm the VM system with a monstrously large
795 		 * single write and cause pageout to stop running.
796 		 */
797 		if (len > (size_t)core_chunk * PAGESIZE)
798 			len = (size_t)core_chunk * PAGESIZE;
799 
800 		err = core_write(vp, UIO_USERSPACE,
801 		    offset + (size_t)(base - addr), base, len, rlimit, credp);
802 
803 		if (err == 0) {
804 			/*
805 			 * Give pageout a chance to run.
806 			 * Also allow core dumping to be interruptible.
807 			 */
808 			err = delay_sig(drv_usectohz(core_delay_usec));
809 		}
810 		if (err)
811 			return (err);
812 	}
813 	return (0);
814 }
815 
816 /*
817  * Wrapper around vn_rdwr to perform writes to a core file.  For core files,
818  * we always want to write as much as we possibly can, and then make sure to
819  * return either 0 to the caller (for success), or the actual errno value.
820  * By using this function, the caller can omit additional code for handling
821  * retries and errors for partial writes returned by vn_rdwr.  If vn_rdwr
822  * unexpectedly returns zero but no progress has been made, we return ENOSPC.
823  */
824 int
825 core_write(vnode_t *vp, enum uio_seg segflg, offset_t offset,
826     const void *buf, size_t len, rlim64_t rlimit, cred_t *credp)
827 {
828 	ssize_t resid = len;
829 	int error = 0;
830 
831 	while (len != 0) {
832 		error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, len, offset,
833 		    segflg, 0, rlimit, credp, &resid);
834 
835 		if (error != 0)
836 			break;
837 
838 		if (resid >= len)
839 			return (ENOSPC);
840 
841 		buf = (const char *)buf + len - resid;
842 		offset += len - resid;
843 		len = resid;
844 	}
845 
846 	return (error);
847 }
848