1 /*
2 * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause
3 *
4 * Copyright (c) 1982, 1986, 1989, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 * - kern_sig.c
36 */
37 /*
38 * Copyright (c) 1993, David Greenman
39 * All rights reserved.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 * -kern_exec.c
62 */
63
64 #include <sys/systm.h>
65 #include <sys/acct.h>
66 #include <sys/compressor.h>
67 #include <sys/devctl.h>
68 #include <sys/fcntl.h>
69 #include <sys/jail.h>
70 #include <sys/limits.h>
71 #include <sys/namei.h>
72 #include <sys/proc.h>
73 #include <sys/sbuf.h>
74 #include <sys/stat.h>
75 #include <sys/sysctl.h>
76 #include <sys/sysent.h>
77 #include <sys/syslog.h>
78 #include <sys/ucoredump.h>
79 #include <sys/unistd.h>
80 #include <sys/vnode.h>
81
82 #include <security/audit/audit.h>
83
84 #define GZIP_SUFFIX ".gz"
85 #define ZSTD_SUFFIX ".zst"
86
87 #define MAX_NUM_CORE_FILES 100000
88 #ifndef NUM_CORE_FILES
89 #define NUM_CORE_FILES 5
90 #endif
91
92 static coredumper_handle_fn coredump_vnode;
93 static struct coredumper vnode_coredumper = {
94 .cd_name = "vnode_coredumper",
95 .cd_handle = coredump_vnode,
96 };
97
98 SYSINIT(vnode_coredumper_register, SI_SUB_EXEC, SI_ORDER_ANY,
99 coredumper_register, &vnode_coredumper);
100
101 _Static_assert(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES,
102 "NUM_CORE_FILES is out of range (0 to " __STRING(MAX_NUM_CORE_FILES) ")");
103 static int num_cores = NUM_CORE_FILES;
104
105 static int capmode_coredump;
106 SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN,
107 &capmode_coredump, 0, "Allow processes in capability mode to dump core");
108
109 static int set_core_nodump_flag = 0;
110 SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag,
111 0, "Enable setting the NODUMP flag on coredump files");
112
113 static int coredump_devctl = 0;
114 SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl,
115 0, "Generate a devctl notification when processes coredump");
116
117 /*
118 * corefilename[] is protected by the allproc_lock.
119 */
120 static char corefilename[MAXPATHLEN] = { "%N.core" };
121 TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename));
122
123 static int
sysctl_kern_corefile(SYSCTL_HANDLER_ARGS)124 sysctl_kern_corefile(SYSCTL_HANDLER_ARGS)
125 {
126 int error;
127
128 sx_xlock(&allproc_lock);
129 error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename),
130 req);
131 sx_xunlock(&allproc_lock);
132
133 return (error);
134 }
135 SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW |
136 CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A",
137 "Process corefile name format string");
138
139 static int
sysctl_debug_num_cores_check(SYSCTL_HANDLER_ARGS)140 sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS)
141 {
142 int error;
143 int new_val;
144
145 new_val = num_cores;
146 error = sysctl_handle_int(oidp, &new_val, 0, req);
147 if (error != 0 || req->newptr == NULL)
148 return (error);
149 if (new_val > MAX_NUM_CORE_FILES)
150 new_val = MAX_NUM_CORE_FILES;
151 if (new_val < 0)
152 new_val = 0;
153 num_cores = new_val;
154 return (0);
155 }
156 SYSCTL_PROC(_debug, OID_AUTO, ncores,
157 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int),
158 sysctl_debug_num_cores_check, "I",
159 "Maximum number of generated process corefiles while using index format");
160
161 static void
vnode_close_locked(struct thread * td,struct vnode * vp)162 vnode_close_locked(struct thread *td, struct vnode *vp)
163 {
164
165 VOP_UNLOCK(vp);
166 vn_close(vp, FWRITE, td->td_ucred, td);
167 }
168
169 int
core_vn_write(const struct coredump_writer * cdw,const void * base,size_t len,off_t offset,enum uio_seg seg,struct ucred * cred,size_t * resid,struct thread * td)170 core_vn_write(const struct coredump_writer *cdw, const void *base, size_t len,
171 off_t offset, enum uio_seg seg, struct ucred *cred, size_t *resid,
172 struct thread *td)
173 {
174 struct coredump_vnode_ctx *ctx = cdw->ctx;
175
176 return (vn_rdwr_inchunks(UIO_WRITE, ctx->vp, __DECONST(void *, base),
177 len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
178 cred, ctx->fcred, resid, td));
179 }
180
181 int
core_vn_extend(const struct coredump_writer * cdw,off_t newsz,struct ucred * cred)182 core_vn_extend(const struct coredump_writer *cdw, off_t newsz,
183 struct ucred *cred)
184 {
185 struct coredump_vnode_ctx *ctx = cdw->ctx;
186 struct mount *mp;
187 int error;
188
189 error = vn_start_write(ctx->vp, &mp, V_WAIT);
190 if (error != 0)
191 return (error);
192 vn_lock(ctx->vp, LK_EXCLUSIVE | LK_RETRY);
193 error = vn_truncate_locked(ctx->vp, newsz, false, cred);
194 VOP_UNLOCK(ctx->vp);
195 vn_finished_write(mp);
196 return (error);
197 }
198
199 /*
200 * If the core format has a %I in it, then we need to check
201 * for existing corefiles before defining a name.
202 * To do this we iterate over 0..ncores to find a
203 * non-existing core file name to use. If all core files are
204 * already used we choose the oldest one.
205 */
206 static int
corefile_open_last(struct thread * td,char * name,int indexpos,int indexlen,int ncores,struct vnode ** vpp)207 corefile_open_last(struct thread *td, char *name, int indexpos,
208 int indexlen, int ncores, struct vnode **vpp)
209 {
210 struct vnode *oldvp, *nextvp, *vp;
211 struct vattr vattr;
212 struct nameidata nd;
213 int error, i, flags, oflags, cmode;
214 char ch;
215 struct timespec lasttime;
216
217 nextvp = oldvp = NULL;
218 cmode = S_IRUSR | S_IWUSR;
219 oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
220 (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
221
222 for (i = 0; i < ncores; i++) {
223 flags = O_CREAT | FWRITE | O_NOFOLLOW;
224
225 ch = name[indexpos + indexlen];
226 (void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen,
227 i);
228 name[indexpos + indexlen] = ch;
229
230 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name);
231 error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
232 NULL);
233 if (error != 0)
234 break;
235
236 vp = nd.ni_vp;
237 NDFREE_PNBUF(&nd);
238 if ((flags & O_CREAT) == O_CREAT) {
239 nextvp = vp;
240 break;
241 }
242
243 error = VOP_GETATTR(vp, &vattr, td->td_ucred);
244 if (error != 0) {
245 vnode_close_locked(td, vp);
246 break;
247 }
248
249 if (oldvp == NULL ||
250 lasttime.tv_sec > vattr.va_mtime.tv_sec ||
251 (lasttime.tv_sec == vattr.va_mtime.tv_sec &&
252 lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) {
253 if (oldvp != NULL)
254 vn_close(oldvp, FWRITE, td->td_ucred, td);
255 oldvp = vp;
256 VOP_UNLOCK(oldvp);
257 lasttime = vattr.va_mtime;
258 } else {
259 vnode_close_locked(td, vp);
260 }
261 }
262
263 if (oldvp != NULL) {
264 if (nextvp == NULL) {
265 if ((td->td_proc->p_flag & P_SUGID) != 0) {
266 error = EFAULT;
267 vn_close(oldvp, FWRITE, td->td_ucred, td);
268 } else {
269 nextvp = oldvp;
270 error = vn_lock(nextvp, LK_EXCLUSIVE);
271 if (error != 0) {
272 vn_close(nextvp, FWRITE, td->td_ucred,
273 td);
274 nextvp = NULL;
275 }
276 }
277 } else {
278 vn_close(oldvp, FWRITE, td->td_ucred, td);
279 }
280 }
281 if (error != 0) {
282 if (nextvp != NULL)
283 vnode_close_locked(td, oldvp);
284 } else {
285 *vpp = nextvp;
286 }
287
288 return (error);
289 }
290
291 /*
292 * corefile_open(comm, uid, pid, td, compress, vpp, namep)
293 * Expand the name described in corefilename, using name, uid, and pid
294 * and open/create core file.
295 * corefilename is a printf-like string, with three format specifiers:
296 * %N name of process ("name")
297 * %P process id (pid)
298 * %U user id (uid)
299 * For example, "%N.core" is the default; they can be disabled completely
300 * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
301 * This is controlled by the sysctl variable kern.corefile (see above).
302 */
303 static int
corefile_open(const char * comm,uid_t uid,pid_t pid,struct thread * td,int compress,int signum,struct vnode ** vpp,char ** namep)304 corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td,
305 int compress, int signum, struct vnode **vpp, char **namep)
306 {
307 struct sbuf sb;
308 struct nameidata nd;
309 const char *format;
310 char *hostname, *name;
311 int cmode, error, flags, i, indexpos, indexlen, oflags, ncores;
312
313 hostname = NULL;
314 format = corefilename;
315 name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO);
316 indexlen = 0;
317 indexpos = -1;
318 ncores = num_cores;
319 (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN);
320 sx_slock(&allproc_lock);
321 for (i = 0; format[i] != '\0'; i++) {
322 switch (format[i]) {
323 case '%': /* Format character */
324 i++;
325 switch (format[i]) {
326 case '%':
327 sbuf_putc(&sb, '%');
328 break;
329 case 'H': /* hostname */
330 if (hostname == NULL) {
331 hostname = malloc(MAXHOSTNAMELEN,
332 M_TEMP, M_WAITOK);
333 }
334 getcredhostname(td->td_ucred, hostname,
335 MAXHOSTNAMELEN);
336 sbuf_cat(&sb, hostname);
337 break;
338 case 'I': /* autoincrementing index */
339 if (indexpos != -1) {
340 sbuf_printf(&sb, "%%I");
341 break;
342 }
343
344 indexpos = sbuf_len(&sb);
345 sbuf_printf(&sb, "%u", ncores - 1);
346 indexlen = sbuf_len(&sb) - indexpos;
347 break;
348 case 'N': /* process name */
349 sbuf_printf(&sb, "%s", comm);
350 break;
351 case 'P': /* process id */
352 sbuf_printf(&sb, "%u", pid);
353 break;
354 case 'S': /* signal number */
355 sbuf_printf(&sb, "%i", signum);
356 break;
357 case 'U': /* user id */
358 sbuf_printf(&sb, "%u", uid);
359 break;
360 default:
361 log(LOG_ERR,
362 "Unknown format character %c in "
363 "corename `%s'\n", format[i], format);
364 break;
365 }
366 break;
367 default:
368 sbuf_putc(&sb, format[i]);
369 break;
370 }
371 }
372 sx_sunlock(&allproc_lock);
373 free(hostname, M_TEMP);
374 if (compress == COMPRESS_GZIP)
375 sbuf_cat(&sb, GZIP_SUFFIX);
376 else if (compress == COMPRESS_ZSTD)
377 sbuf_cat(&sb, ZSTD_SUFFIX);
378 if (sbuf_error(&sb) != 0) {
379 log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too "
380 "long\n", (long)pid, comm, (u_long)uid);
381 sbuf_delete(&sb);
382 free(name, M_TEMP);
383 return (ENOMEM);
384 }
385 sbuf_finish(&sb);
386 sbuf_delete(&sb);
387
388 if (indexpos != -1) {
389 error = corefile_open_last(td, name, indexpos, indexlen, ncores,
390 vpp);
391 if (error != 0) {
392 log(LOG_ERR,
393 "pid %d (%s), uid (%u): Path `%s' failed "
394 "on initial open test, error = %d\n",
395 pid, comm, uid, name, error);
396 }
397 } else {
398 cmode = S_IRUSR | S_IWUSR;
399 oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
400 (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
401 flags = O_CREAT | FWRITE | O_NOFOLLOW;
402 if ((td->td_proc->p_flag & P_SUGID) != 0)
403 flags |= O_EXCL;
404
405 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name);
406 error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
407 NULL);
408 if (error == 0) {
409 *vpp = nd.ni_vp;
410 NDFREE_PNBUF(&nd);
411 }
412 }
413
414 if (error != 0) {
415 #ifdef AUDIT
416 audit_proc_coredump(td, name, error);
417 #endif
418 free(name, M_TEMP);
419 return (error);
420 }
421 *namep = name;
422 return (0);
423 }
424
425 /*
426 * The vnode dumper is the traditional coredump handler. Our policy and limits
427 * are generally checked already, so it creates the coredump name and passes on
428 * a vnode and a size limit to the process-specific coredump routine if there is
429 * one. If there _is not_ one, it returns ENOSYS; otherwise it returns the
430 * error from the process-specific routine.
431 */
432 static int
coredump_vnode(struct thread * td,off_t limit)433 coredump_vnode(struct thread *td, off_t limit)
434 {
435 struct proc *p = td->td_proc;
436 struct ucred *cred = td->td_ucred;
437 struct vnode *vp;
438 struct coredump_vnode_ctx wctx;
439 struct coredump_writer cdw = { };
440 struct flock lf;
441 struct vattr vattr;
442 size_t fullpathsize;
443 int error, error1, jid, locked, ppid, sig;
444 char *name; /* name of corefile */
445 void *rl_cookie;
446 char *fullpath, *freepath = NULL;
447 struct sbuf *sb;
448
449 PROC_LOCK_ASSERT(p, MA_OWNED);
450
451 ppid = p->p_oppid;
452 sig = p->p_sig;
453 jid = p->p_ucred->cr_prison->pr_id;
454 PROC_UNLOCK(p);
455
456 error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td,
457 compress_user_cores, sig, &vp, &name);
458 if (error != 0)
459 return (error);
460
461 /*
462 * Don't dump to non-regular files or files with links.
463 * Do not dump into system files. Effective user must own the corefile.
464 */
465 if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 ||
466 vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 ||
467 vattr.va_uid != cred->cr_uid) {
468 VOP_UNLOCK(vp);
469 error = EFAULT;
470 goto out;
471 }
472
473 VOP_UNLOCK(vp);
474
475 /* Postpone other writers, including core dumps of other processes. */
476 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
477
478 lf.l_whence = SEEK_SET;
479 lf.l_start = 0;
480 lf.l_len = 0;
481 lf.l_type = F_WRLCK;
482 locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0);
483
484 VATTR_NULL(&vattr);
485 vattr.va_size = 0;
486 if (set_core_nodump_flag)
487 vattr.va_flags = UF_NODUMP;
488 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
489 VOP_SETATTR(vp, &vattr, cred);
490 VOP_UNLOCK(vp);
491 PROC_LOCK(p);
492 p->p_acflag |= ACORE;
493 PROC_UNLOCK(p);
494
495 wctx.vp = vp;
496 wctx.fcred = NOCRED;
497
498 cdw.ctx = &wctx;
499 cdw.write_fn = core_vn_write;
500 cdw.extend_fn = core_vn_extend;
501
502 if (p->p_sysent->sv_coredump != NULL) {
503 error = p->p_sysent->sv_coredump(td, &cdw, limit, 0);
504 } else {
505 error = ENOSYS;
506 }
507
508 if (locked) {
509 lf.l_type = F_UNLCK;
510 VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
511 }
512 vn_rangelock_unlock(vp, rl_cookie);
513
514 /*
515 * Notify the userland helper that a process triggered a core dump.
516 * This allows the helper to run an automated debugging session.
517 */
518 if (error != 0 || coredump_devctl == 0)
519 goto out;
520 sb = sbuf_new_auto();
521 if (vn_fullpath_global(p->p_textvp, &fullpath, &freepath) != 0)
522 goto out2;
523 sbuf_cat(sb, "comm=\"");
524 devctl_safe_quote_sb(sb, fullpath);
525 free(freepath, M_TEMP);
526 sbuf_cat(sb, "\" core=\"");
527
528 /*
529 * We can't lookup core file vp directly. When we're replacing a core, and
530 * other random times, we flush the name cache, so it will fail. Instead,
531 * if the path of the core is relative, add the current dir in front if it.
532 */
533 if (name[0] != '/') {
534 fullpathsize = MAXPATHLEN;
535 freepath = malloc(fullpathsize, M_TEMP, M_WAITOK);
536 if (vn_getcwd(freepath, &fullpath, &fullpathsize) != 0) {
537 free(freepath, M_TEMP);
538 goto out2;
539 }
540 devctl_safe_quote_sb(sb, fullpath);
541 free(freepath, M_TEMP);
542 sbuf_putc(sb, '/');
543 }
544 devctl_safe_quote_sb(sb, name);
545 sbuf_putc(sb, '"');
546
547 sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d",
548 jid, p->p_pid, ppid, sig);
549 if (sbuf_finish(sb) == 0)
550 devctl_notify("kernel", "signal", "coredump", sbuf_data(sb));
551 out2:
552 sbuf_delete(sb);
553 out:
554 error1 = vn_close(vp, FWRITE, cred, td);
555 if (error == 0)
556 error = error1;
557 #ifdef AUDIT
558 audit_proc_coredump(td, name, error);
559 #endif
560 free(name, M_TEMP);
561 return (error);
562 }
563