1 /*- 2 * Copyright (c) 2015 John Baldwin <jhb@FreeBSD.org> 3 * Copyright (c) 2023 The FreeBSD Foundation 4 * 5 * This software was developed by Jake Freeland <jfree@FreeBSD.org> 6 * under sponsorship from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/capsicum.h> 32 #include <sys/cpuset.h> 33 #include <sys/ktrace.h> 34 #include <sys/socket.h> 35 #include <sys/sysent.h> 36 #include <sys/time.h> 37 #include <sys/uio.h> 38 #include <sys/user.h> 39 #include <sys/wait.h> 40 41 #include <machine/sysarch.h> 42 #include <netinet/in.h> 43 44 #include <atf-c.h> 45 #include <capsicum_helpers.h> 46 #include <errno.h> 47 #include <fcntl.h> 48 #include <netdb.h> 49 #include <signal.h> 50 #include <sysdecode.h> 51 52 /* 53 * A variant of ATF_REQUIRE that is suitable for use in child 54 * processes. This only works if the parent process is tripped up by 55 * the early exit and fails some requirement itself. 56 */ 57 #define CHILD_REQUIRE(exp) do { \ 58 if (!(exp)) \ 59 child_fail_require(__FILE__, __LINE__, \ 60 #exp " not met\n"); \ 61 } while (0) 62 #define CHILD_REQUIRE_EQ(actual, expected) do { \ 63 __typeof__(expected) _e = expected; \ 64 __typeof__(actual) _a = actual; \ 65 if (_e != _a) \ 66 child_fail_require(__FILE__, __LINE__, #actual \ 67 " (%jd) == " #expected " (%jd) not met\n", \ 68 (intmax_t)_a, (intmax_t)_e); \ 69 } while (0) 70 71 static __dead2 void 72 child_fail_require(const char *file, int line, const char *fmt, ...) 73 { 74 va_list ap; 75 char buf[1024]; 76 77 /* Use write() not fprintf() to avoid possible duplicate output. */ 78 snprintf(buf, sizeof(buf), "%s:%d: ", file, line); 79 write(STDERR_FILENO, buf, strlen(buf)); 80 va_start(ap, fmt); 81 vsnprintf(buf, sizeof(buf), fmt, ap); 82 write(STDERR_FILENO, buf, strlen(buf)); 83 va_end(ap); 84 85 _exit(32); 86 } 87 88 /* 89 * Determine sysdecode ABI based on proc's ABI in sv_flags. 90 */ 91 static enum sysdecode_abi 92 syscallabi(u_int sv_flags) 93 { 94 switch (sv_flags & SV_ABI_MASK) { 95 case SV_ABI_FREEBSD: 96 return (SYSDECODE_ABI_FREEBSD); 97 case SV_ABI_LINUX: 98 #ifdef __LP64__ 99 if ((sv_flags & SV_ILP32) != 0) 100 return (SYSDECODE_ABI_LINUX32); 101 #endif 102 return (SYSDECODE_ABI_LINUX); 103 } 104 return (SYSDECODE_ABI_UNKNOWN); 105 } 106 107 /* 108 * Start tracing capability violations and notify child that it can execute. 109 * Return @numv capability violations from child in @v. 110 */ 111 static void 112 cap_trace_child(int cpid, struct ktr_cap_fail *v, int numv) 113 { 114 struct ktr_header header; 115 int error, fd, i; 116 117 ATF_REQUIRE((fd = open("ktrace.out", 118 O_RDONLY | O_CREAT | O_TRUNC)) != -1); 119 ATF_REQUIRE(ktrace("ktrace.out", KTROP_SET, 120 KTRFAC_CAPFAIL, cpid) != -1); 121 /* Notify child that we've starting tracing. */ 122 ATF_REQUIRE(kill(cpid, SIGUSR1) != -1); 123 /* Wait for child to raise violation and exit. */ 124 ATF_REQUIRE(waitpid(cpid, &error, 0) != -1); 125 ATF_REQUIRE(WIFEXITED(error)); 126 ATF_REQUIRE_EQ(WEXITSTATUS(error), 0); 127 /* Read ktrace header and ensure violation occurred. */ 128 for (i = 0; i < numv; ++i) { 129 ATF_REQUIRE((error = read(fd, &header, sizeof(header))) != -1); 130 ATF_REQUIRE_EQ(error, sizeof(header)); 131 ATF_REQUIRE_EQ(header.ktr_len, sizeof(*v)); 132 ATF_REQUIRE_EQ(header.ktr_pid, cpid); 133 /* Read the capability violation. */ 134 ATF_REQUIRE((error = read(fd, v + i, 135 sizeof(*v))) != -1); 136 ATF_REQUIRE_EQ(error, sizeof(*v)); 137 } 138 ATF_REQUIRE(close(fd) != -1); 139 } 140 141 /* 142 * Test if ktrace will record an operation that is done with 143 * insufficient rights. 144 */ 145 ATF_TC_WITHOUT_HEAD(ktrace__cap_not_capable); 146 ATF_TC_BODY(ktrace__cap_not_capable, tc) 147 { 148 struct ktr_cap_fail violation; 149 cap_rights_t rights; 150 sigset_t set = { }; 151 pid_t pid; 152 int error; 153 154 /* Block SIGUSR1 so child does not terminate. */ 155 ATF_REQUIRE(sigaddset(&set, SIGUSR1) != -1); 156 ATF_REQUIRE(sigprocmask(SIG_BLOCK, &set, NULL) != -1); 157 158 ATF_REQUIRE((pid = fork()) != -1); 159 if (pid == 0) { 160 /* Limit fd rights to CAP_READ. */ 161 cap_rights_init(&rights, CAP_READ); 162 CHILD_REQUIRE(caph_rights_limit(STDIN_FILENO, &rights) != -1); 163 CHILD_REQUIRE(caph_enter() != -1); 164 /* Wait until ktrace has started. */ 165 CHILD_REQUIRE(sigwait(&set, &error) != -1); 166 CHILD_REQUIRE_EQ(error, SIGUSR1); 167 /* Write without CAP_WRITE. */ 168 CHILD_REQUIRE(write(STDIN_FILENO, &pid, sizeof(pid)) == -1); 169 CHILD_REQUIRE_EQ(errno, ENOTCAPABLE); 170 exit(0); 171 } 172 173 cap_trace_child(pid, &violation, 1); 174 ATF_REQUIRE_EQ(violation.cap_type, CAPFAIL_NOTCAPABLE); 175 ATF_REQUIRE(cap_rights_is_set(&violation.cap_data.cap_needed, 176 CAP_WRITE)); 177 } 178 179 /* 180 * Test if ktrace will record an attempt to increase rights. 181 */ 182 ATF_TC_WITHOUT_HEAD(ktrace__cap_increase_rights); 183 ATF_TC_BODY(ktrace__cap_increase_rights, tc) 184 { 185 struct ktr_cap_fail violation; 186 cap_rights_t rights; 187 sigset_t set = { }; 188 pid_t pid; 189 int error; 190 191 /* Block SIGUSR1 so child does not terminate. */ 192 ATF_REQUIRE(sigaddset(&set, SIGUSR1) != -1); 193 ATF_REQUIRE(sigprocmask(SIG_BLOCK, &set, NULL) != -1); 194 195 ATF_REQUIRE((pid = fork()) != -1); 196 if (pid == 0) { 197 /* Limit fd rights to CAP_READ. */ 198 cap_rights_init(&rights, CAP_READ); 199 CHILD_REQUIRE(caph_rights_limit(STDIN_FILENO, &rights) != -1); 200 CHILD_REQUIRE(caph_enter() != -1); 201 /* Wait until ktrace has started. */ 202 CHILD_REQUIRE(sigwait(&set, &error) != -1); 203 CHILD_REQUIRE_EQ(error, SIGUSR1); 204 /* Increase fd rights to include CAP_WRITE. */ 205 cap_rights_set(&rights, CAP_WRITE); 206 CHILD_REQUIRE(caph_rights_limit(STDIN_FILENO, &rights) == -1); 207 CHILD_REQUIRE_EQ(errno, ENOTCAPABLE); 208 exit(0); 209 } 210 211 cap_trace_child(pid, &violation, 1); 212 ATF_REQUIRE_EQ(violation.cap_type, CAPFAIL_INCREASE); 213 ATF_REQUIRE(cap_rights_is_set(&violation.cap_data.cap_needed, 214 CAP_WRITE)); 215 } 216 217 /* 218 * Test if disallowed syscalls are reported as capability violations. 219 */ 220 ATF_TC_WITHOUT_HEAD(ktrace__cap_syscall); 221 ATF_TC_BODY(ktrace__cap_syscall, tc) 222 { 223 struct kinfo_file kinf; 224 struct ktr_cap_fail violation[2]; 225 sigset_t set = { }; 226 pid_t pid; 227 int error; 228 229 /* Block SIGUSR1 so child does not terminate. */ 230 ATF_REQUIRE(sigaddset(&set, SIGUSR1) != -1); 231 ATF_REQUIRE(sigprocmask(SIG_BLOCK, &set, NULL) != -1); 232 233 ATF_REQUIRE((pid = fork()) != -1); 234 if (pid == 0) { 235 /* Wait until ktrace has started. */ 236 CHILD_REQUIRE(sigwait(&set, &error) != -1); 237 CHILD_REQUIRE_EQ(error, SIGUSR1); 238 /* chdir() is not permitted in capability mode. */ 239 CHILD_REQUIRE(chdir(".") != -1); 240 kinf.kf_structsize = sizeof(struct kinfo_file); 241 /* 242 * fcntl() is permitted in capability mode, 243 * but the F_KINFO cmd is not. 244 */ 245 CHILD_REQUIRE(fcntl(STDIN_FILENO, F_KINFO, &kinf) != -1); 246 exit(0); 247 } 248 249 cap_trace_child(pid, violation, 2); 250 ATF_REQUIRE_EQ(violation[0].cap_type, CAPFAIL_SYSCALL); 251 error = syscallabi(violation[0].cap_svflags); 252 ATF_REQUIRE_STREQ(sysdecode_syscallname(error, violation[0].cap_code), 253 "chdir"); 254 255 ATF_REQUIRE_EQ(violation[1].cap_type, CAPFAIL_SYSCALL); 256 error = syscallabi(violation[1].cap_svflags); 257 ATF_REQUIRE_STREQ(sysdecode_syscallname(error, violation[1].cap_code), 258 "fcntl"); 259 ATF_REQUIRE_EQ(violation[1].cap_data.cap_int, F_KINFO); 260 } 261 262 /* 263 * Test if sending a signal to another process is reported as 264 * a signal violation. 265 */ 266 ATF_TC_WITHOUT_HEAD(ktrace__cap_signal); 267 ATF_TC_BODY(ktrace__cap_signal, tc) 268 { 269 struct ktr_cap_fail violation; 270 sigset_t set = { }; 271 pid_t pid; 272 int error; 273 274 /* Block SIGUSR1 so child does not terminate. */ 275 ATF_REQUIRE(sigaddset(&set, SIGUSR1) != -1); 276 ATF_REQUIRE(sigprocmask(SIG_BLOCK, &set, NULL) != -1); 277 278 ATF_REQUIRE((pid = fork()) != -1); 279 if (pid == 0) { 280 /* Wait until ktrace has started. */ 281 CHILD_REQUIRE(sigwait(&set, &error) != -1); 282 CHILD_REQUIRE_EQ(error, SIGUSR1); 283 /* 284 * Signals may only be sent to ourself. Sending signals 285 * to other processes is not allowed in capability mode. 286 */ 287 CHILD_REQUIRE(kill(getppid(), SIGCONT) != -1); 288 exit(0); 289 } 290 291 cap_trace_child(pid, &violation, 1); 292 ATF_REQUIRE_EQ(violation.cap_type, CAPFAIL_SIGNAL); 293 error = syscallabi(violation.cap_svflags); 294 ATF_REQUIRE_STREQ(sysdecode_syscallname(error, violation.cap_code), 295 "kill"); 296 ATF_REQUIRE_EQ(violation.cap_data.cap_int, SIGCONT); 297 } 298 299 /* 300 * Test if opening a socket with a restricted protocol is reported 301 * as a protocol violation. 302 */ 303 ATF_TC_WITHOUT_HEAD(ktrace__cap_proto); 304 ATF_TC_BODY(ktrace__cap_proto, tc) 305 { 306 struct ktr_cap_fail violation; 307 sigset_t set = { }; 308 pid_t pid; 309 int error; 310 311 /* Block SIGUSR1 so child does not terminate. */ 312 ATF_REQUIRE(sigaddset(&set, SIGUSR1) != -1); 313 ATF_REQUIRE(sigprocmask(SIG_BLOCK, &set, NULL) != -1); 314 315 ATF_REQUIRE((pid = fork()) != -1); 316 if (pid == 0) { 317 /* Wait until ktrace has started. */ 318 CHILD_REQUIRE(sigwait(&set, &error) != -1); 319 CHILD_REQUIRE_EQ(error, SIGUSR1); 320 /* 321 * Certain protocols may not be used in capability mode. 322 * ICMP's raw-protocol interface is not allowed. 323 */ 324 CHILD_REQUIRE(close(socket(AF_INET, SOCK_RAW, 325 IPPROTO_ICMP)) != -1); 326 exit(0); 327 } 328 329 cap_trace_child(pid, &violation, 1); 330 ATF_REQUIRE_EQ(violation.cap_type, CAPFAIL_PROTO); 331 error = syscallabi(violation.cap_svflags); 332 ATF_REQUIRE_STREQ(sysdecode_syscallname(error, violation.cap_code), 333 "socket"); 334 ATF_REQUIRE_EQ(violation.cap_data.cap_int, IPPROTO_ICMP); 335 } 336 337 /* 338 * Test if sending data to an address using a socket is 339 * reported as a sockaddr violation. 340 */ 341 ATF_TC_WITHOUT_HEAD(ktrace__cap_sockaddr); 342 ATF_TC_BODY(ktrace__cap_sockaddr, tc) 343 { 344 struct sockaddr_in addr = { }, *saddr; 345 struct ktr_cap_fail violation; 346 sigset_t set = { }; 347 pid_t pid; 348 int error, sfd; 349 350 /* Block SIGUSR1 so child does not terminate. */ 351 ATF_REQUIRE(sigaddset(&set, SIGUSR1) != -1); 352 ATF_REQUIRE(sigprocmask(SIG_BLOCK, &set, NULL) != -1); 353 354 CHILD_REQUIRE((sfd = socket(AF_INET, SOCK_DGRAM, 355 IPPROTO_UDP)) != -1); 356 addr.sin_family = AF_INET; 357 addr.sin_port = htons(5000); 358 addr.sin_addr.s_addr = INADDR_ANY; 359 CHILD_REQUIRE(bind(sfd, (const struct sockaddr *)&addr, 360 sizeof(addr)) != -1); 361 362 ATF_REQUIRE((pid = fork()) != -1); 363 if (pid == 0) { 364 /* Wait until ktrace has started. */ 365 CHILD_REQUIRE(sigwait(&set, &error) != -1); 366 CHILD_REQUIRE_EQ(error, SIGUSR1); 367 /* 368 * Sending data to an address is not permitted. 369 * In this case, sending data to @addr causes a 370 * violation. 371 */ 372 CHILD_REQUIRE(sendto(sfd, NULL, 0, 0, 373 (const struct sockaddr *)&addr, sizeof(addr)) != -1); 374 exit(0); 375 } 376 377 cap_trace_child(pid, &violation, 1); 378 ATF_REQUIRE_EQ(violation.cap_type, CAPFAIL_SOCKADDR); 379 error = syscallabi(violation.cap_svflags); 380 ATF_REQUIRE_STREQ(sysdecode_syscallname(error, violation.cap_code), 381 "sendto"); 382 saddr = (struct sockaddr_in *)&violation.cap_data.cap_sockaddr; 383 ATF_REQUIRE_EQ(saddr->sin_family, AF_INET); 384 ATF_REQUIRE_EQ(saddr->sin_port, htons(5000)); 385 ATF_REQUIRE_EQ(saddr->sin_addr.s_addr, INADDR_ANY); 386 close(sfd); 387 } 388 389 /* 390 * Test if openat() with AT_FDCWD and absolute path are reported 391 * as namei violations. 392 */ 393 ATF_TC_WITHOUT_HEAD(ktrace__cap_namei); 394 ATF_TC_BODY(ktrace__cap_namei, tc) 395 { 396 struct ktr_cap_fail violation[2]; 397 sigset_t set = { }; 398 pid_t pid; 399 int error; 400 401 /* Block SIGUSR1 so child does not terminate. */ 402 ATF_REQUIRE(sigaddset(&set, SIGUSR1) != -1); 403 ATF_REQUIRE(sigprocmask(SIG_BLOCK, &set, NULL) != -1); 404 405 ATF_REQUIRE((pid = fork()) != -1); 406 if (pid == 0) { 407 /* Wait until ktrace has started. */ 408 CHILD_REQUIRE(sigwait(&set, &error) != -1); 409 CHILD_REQUIRE_EQ(error, SIGUSR1); 410 /* 411 * The AT_FDCWD file descriptor has not been opened 412 * and will be inaccessible in capability mode. 413 */ 414 CHILD_REQUIRE(close(openat(AT_FDCWD, "ktrace.out", 415 O_RDONLY | O_CREAT)) != -1); 416 /* 417 * Absolute paths are inaccessible in capability mode. 418 */ 419 CHILD_REQUIRE(close(openat(-1, "/", O_RDONLY)) != -1); 420 exit(0); 421 } 422 423 cap_trace_child(pid, violation, 2); 424 ATF_REQUIRE_EQ(violation[0].cap_type, CAPFAIL_NAMEI); 425 error = syscallabi(violation[0].cap_svflags); 426 ATF_REQUIRE_STREQ(sysdecode_syscallname(error, violation[0].cap_code), 427 "openat"); 428 ATF_REQUIRE_STREQ(violation[0].cap_data.cap_path, "AT_FDCWD"); 429 430 ATF_REQUIRE_EQ(violation[1].cap_type, CAPFAIL_NAMEI); 431 error = syscallabi(violation[1].cap_svflags); 432 ATF_REQUIRE_STREQ(sysdecode_syscallname(error, violation[1].cap_code), 433 "openat"); 434 ATF_REQUIRE_STREQ(violation[1].cap_data.cap_path, "/"); 435 } 436 437 /* 438 * Test if changing another process's cpu set is recorded as 439 * a cpuset violation. 440 */ 441 ATF_TC_WITHOUT_HEAD(ktrace__cap_cpuset); 442 ATF_TC_BODY(ktrace__cap_cpuset, tc) 443 { 444 struct ktr_cap_fail violation; 445 cpuset_t cpuset_mask = { }; 446 sigset_t set = { }; 447 pid_t pid; 448 int error; 449 450 /* Block SIGUSR1 so child does not terminate. */ 451 ATF_REQUIRE(sigaddset(&set, SIGUSR1) != -1); 452 ATF_REQUIRE(sigprocmask(SIG_BLOCK, &set, NULL) != -1); 453 454 ATF_REQUIRE((pid = fork()) != -1); 455 if (pid == 0) { 456 /* Wait until ktrace has started. */ 457 CHILD_REQUIRE(sigwait(&set, &error) != -1); 458 CHILD_REQUIRE_EQ(error, SIGUSR1); 459 /* 460 * Set cpu 0 affinity for parent process. 461 * Other process's cpu sets are restricted in capability 462 * mode, so this will raise a violation. 463 */ 464 CPU_SET(0, &cpuset_mask); 465 CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, 466 getppid(), sizeof(cpuset_mask), &cpuset_mask) != -1); 467 exit(0); 468 } 469 470 cap_trace_child(pid, &violation, 1); 471 ATF_REQUIRE_EQ(violation.cap_type, CAPFAIL_CPUSET); 472 error = syscallabi(violation.cap_svflags); 473 ATF_REQUIRE_STREQ(sysdecode_syscallname(error, violation.cap_code), 474 "cpuset_setaffinity"); 475 } 476 477 ATF_TP_ADD_TCS(tp) 478 { 479 ATF_TP_ADD_TC(tp, ktrace__cap_not_capable); 480 ATF_TP_ADD_TC(tp, ktrace__cap_increase_rights); 481 ATF_TP_ADD_TC(tp, ktrace__cap_syscall); 482 ATF_TP_ADD_TC(tp, ktrace__cap_signal); 483 ATF_TP_ADD_TC(tp, ktrace__cap_proto); 484 ATF_TP_ADD_TC(tp, ktrace__cap_sockaddr); 485 ATF_TP_ADD_TC(tp, ktrace__cap_namei); 486 ATF_TP_ADD_TC(tp, ktrace__cap_cpuset); 487 return (atf_no_error()); 488 } 489