1 /* 2 * Copyright (c) 2021 Dell Inc. or its subsidiaries. All Rights Reserved. 3 * Copyright (c) 2022 The FreeBSD Foundation 4 * 5 * Portions of this software were developed by Mark Johnston under sponsorship 6 * from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * Test behavior when a mapping of a shared shadow vm object is 32 * invalidated by COW from another mapping. In particular, when 33 * minherit(INHERT_SHARE) is applied to a COW mapping, a subsequently 34 * forked child process will share the parent's shadow object. Thus, 35 * pages already mapped into one sharing process may be written from 36 * another, triggering a copy into the shadow object. The VM system 37 * expects that a fully shadowed page is unmapped, but at one point the 38 * use of a shared shadow object could break this invariant. 39 * 40 * This is a regression test for an issue isolated by rlibby@FreeBSD.org 41 * from an issue detected by stress2's collapse.sh by jeff@FreeBSD.org. 42 * The issue became CVE-2021-29626. 43 * 44 * This file is written as an ATF test suite but may be compiled as a 45 * standalone program with -DSTANDALONE (and optionally -DDEBUG). 46 */ 47 48 #include <sys/param.h> 49 #include <sys/mman.h> 50 #include <sys/procctl.h> 51 #include <sys/resource.h> 52 #include <sys/sysctl.h> 53 #include <sys/wait.h> 54 55 #include <machine/atomic.h> 56 57 #include <err.h> 58 #include <errno.h> 59 #include <stdbool.h> 60 #include <stddef.h> 61 #include <stdio.h> 62 #include <stdlib.h> 63 #include <unistd.h> 64 65 #ifdef STANDALONE 66 #define ATF_REQUIRE(x) do { \ 67 if (!(x)) \ 68 errx(1, "%s", #x); \ 69 } while (0) 70 #else 71 #include <atf-c.h> 72 #endif 73 74 #ifdef DEBUG 75 #define dprintf(...) printf(__VA_ARGS__) 76 #else 77 #define dprintf(...) 78 #endif 79 80 #define DEPTH 5 81 82 #define FLAG_COLLAPSE 0x1 83 #define FLAG_BLOCK_XFER 0x2 84 #define FLAG_FULLMOD 0x4 85 #define FLAG_MASK (FLAG_COLLAPSE | FLAG_BLOCK_XFER | FLAG_FULLMOD) 86 87 struct shared_state { 88 void *p; 89 size_t len; 90 size_t modlen; 91 size_t pagesize; 92 bool collapse; 93 bool block_xfer; 94 bool lazy_cow; 95 bool okay; 96 volatile bool exiting[DEPTH]; 97 volatile bool exit; 98 volatile bool p3_did_write; 99 }; 100 101 /* 102 * Program flow. There are three or four processes that are descendants 103 * of the process running the test (P0), where arrows go from parents to 104 * children, and thicker arrows indicate sharing a certain memory region 105 * without COW semantics: 106 * P0 -> P1 -> P2 => P3 107 * \=> P4 108 * The main idea is that P1 maps a memory region, and that region is 109 * shared with P2/P3, but with COW semantics. When P3 modifies the 110 * memory, P2 ought to see that modification. P4 optionally exists to 111 * defeat a COW optimization. 112 */ 113 114 #define child_err(...) do { \ 115 ss->exit = true; \ 116 err(1, __VA_ARGS__); \ 117 } while (0) 118 119 #define child_errx(...) do { \ 120 ss->exit = true; \ 121 errx(1, __VA_ARGS__); \ 122 } while (0) 123 124 #define SLEEP_TIME_US 1000 125 126 static void child(struct shared_state *ss, int depth); 127 128 static pid_t 129 child_fork(struct shared_state *ss, int depth) 130 { 131 pid_t pid = fork(); 132 if (pid == -1) 133 child_err("fork"); 134 else if (pid == 0) 135 child(ss, depth); 136 return pid; 137 } 138 139 static void 140 child_fault(struct shared_state *ss) 141 { 142 size_t i; 143 144 for (i = 0; i < ss->len; i += ss->pagesize) 145 (void)((volatile char *)ss->p)[i]; 146 } 147 148 static void 149 child_write(struct shared_state *ss, int val, size_t len) 150 { 151 size_t i; 152 153 for (i = 0; i < len; i += ss->pagesize) 154 ((int *)ss->p)[i / sizeof(int)] = val; 155 atomic_thread_fence_rel(); 156 } 157 158 static void 159 child_wait_p3_write(struct shared_state *ss) 160 { 161 while (!ss->p3_did_write) { 162 if (ss->exit) 163 exit(1); 164 usleep(SLEEP_TIME_US); 165 } 166 atomic_thread_fence_acq(); 167 } 168 169 static void 170 child_verify(struct shared_state *ss, int depth, int newval, int oldval) 171 { 172 size_t i; 173 int expectval, foundval; 174 175 for (i = 0; i < ss->len; i += ss->pagesize) { 176 expectval = i < ss->modlen ? newval : oldval; 177 foundval = ((int *)ss->p)[i / sizeof(int)]; 178 if (foundval == expectval) 179 continue; 180 child_errx("P%d saw %d but expected %d, %d was the old value", 181 depth, foundval, expectval, oldval); 182 } 183 } 184 185 static void 186 child(struct shared_state *ss, int depth) 187 { 188 pid_t mypid, oldval, pid; 189 190 if (depth < 1 || depth >= DEPTH) 191 child_errx("Bad depth %d", depth); 192 mypid = getpid(); 193 dprintf("P%d (pid %d) started\n", depth, mypid); 194 switch (depth) { 195 case 1: 196 /* Shared memory undergoing test. */ 197 ss->p = mmap(NULL, ss->len, PROT_READ | PROT_WRITE, 198 MAP_SHARED | MAP_ANON, -1, 0); 199 if (ss->p == MAP_FAILED) 200 child_err("mmap"); 201 202 /* P1 stamps the shared memory. */ 203 child_write(ss, mypid, ss->len); 204 if (!ss->lazy_cow) { 205 if (mlock(ss->p, ss->len) == -1) 206 child_err("mlock"); 207 if (mprotect(ss->p, ss->len, PROT_READ) == -1) 208 child_err("mprotect"); 209 } 210 if (ss->block_xfer) { 211 /* 212 * P4 is forked so that its existence blocks a page COW 213 * path where the page is simply transferred between 214 * objects, rather than being copied. 215 */ 216 child_fork(ss, 4); 217 } 218 /* 219 * P1 specifies that modifications from its child processes not 220 * be shared with P1. Child process reads can be serviced from 221 * pages in P1's object, but writes must be COW'd. 222 */ 223 if (minherit(ss->p, ss->len, INHERIT_COPY) != 0) 224 child_err("minherit"); 225 /* Fork P2. */ 226 child_fork(ss, depth + 1); 227 /* P1 and P4 wait for P3's writes before exiting. */ 228 child_wait_p3_write(ss); 229 child_verify(ss, depth, mypid, mypid); 230 if (!ss->collapse) { 231 /* Hang around to prevent collapse. */ 232 while (!ss->exit) 233 usleep(SLEEP_TIME_US); 234 } 235 /* Exit so the P2 -> P1/P4 shadow chain can collapse. */ 236 break; 237 case 2: 238 /* 239 * P2 now specifies that modifications from its child processes 240 * be shared. P2 and P3 will share a shadow object. 241 */ 242 if (minherit(ss->p, ss->len, INHERIT_SHARE) != 0) 243 child_err("minherit"); 244 245 /* 246 * P2 faults a page in P1's object before P1 exits and the 247 * shadow chain is collapsed. This may be redundant if the 248 * (read-only) mappings were copied by fork(), but it doesn't 249 * hurt. 250 */ 251 child_fault(ss); 252 oldval = atomic_load_acq_int(ss->p); 253 254 /* Fork P3. */ 255 pid = child_fork(ss, depth + 1); 256 if (ss->collapse) { 257 /* Wait for P1 and P4 to exit, triggering collapse. */ 258 while (!ss->exiting[1] || 259 (ss->block_xfer && !ss->exiting[4])) 260 usleep(SLEEP_TIME_US); 261 /* 262 * This is racy, just guess at how long it may take 263 * them to finish exiting. 264 */ 265 usleep(100 * 1000); 266 } 267 /* P2 waits for P3's modification. */ 268 child_wait_p3_write(ss); 269 child_verify(ss, depth, pid, oldval); 270 ss->okay = true; 271 ss->exit = true; 272 break; 273 case 3: 274 /* 275 * Use mlock()+mprotect() to trigger the COW. This 276 * exercises a different COW handler than the one used 277 * for lazy faults. 278 */ 279 if (!ss->lazy_cow) { 280 if (mlock(ss->p, ss->len) == -1) 281 child_err("mlock"); 282 if (mprotect(ss->p, ss->len, PROT_READ | PROT_WRITE) == 283 -1) 284 child_err("mprotect"); 285 } 286 287 /* 288 * P3 writes the memory. A page is faulted into the shared 289 * P2/P3 shadow object. P2's mapping of the page in P1's 290 * object must now be shot down, or else P2 will wrongly 291 * continue to have that page mapped. 292 */ 293 child_write(ss, mypid, ss->modlen); 294 ss->p3_did_write = true; 295 dprintf("P3 (pid %d) wrote its pid\n", mypid); 296 break; 297 case 4: 298 /* Just hang around until P3 is done writing. */ 299 oldval = atomic_load_acq_int(ss->p); 300 child_wait_p3_write(ss); 301 child_verify(ss, depth, oldval, oldval); 302 break; 303 default: 304 child_errx("Bad depth %d", depth); 305 } 306 307 dprintf("P%d (pid %d) exiting\n", depth, mypid); 308 ss->exiting[depth] = true; 309 exit(0); 310 } 311 312 static void 313 do_one_shared_shadow_inval(bool lazy_cow, size_t pagesize, size_t len, 314 unsigned int flags) 315 { 316 struct shared_state *ss; 317 pid_t pid; 318 int status; 319 320 pid = getpid(); 321 322 dprintf("P0 (pid %d) %s(collapse=%d, block_xfer=%d, full_mod=%d)\n", 323 pid, __func__, (int)collapse, (int)block_xfer, (int)full_mod); 324 325 ATF_REQUIRE(procctl(P_PID, pid, PROC_REAP_ACQUIRE, NULL) == 0); 326 327 /* Shared memory for coordination. */ 328 ss = mmap(NULL, sizeof(*ss), PROT_READ | PROT_WRITE, 329 MAP_SHARED | MAP_ANON, -1, 0); 330 ATF_REQUIRE(ss != MAP_FAILED); 331 332 ss->len = len; 333 ss->modlen = (flags & FLAG_FULLMOD) ? ss->len : ss->len / 2; 334 ss->pagesize = pagesize; 335 ss->collapse = (flags & FLAG_COLLAPSE) != 0; 336 ss->block_xfer = (flags & FLAG_BLOCK_XFER) != 0; 337 ss->lazy_cow = lazy_cow; 338 339 pid = fork(); 340 ATF_REQUIRE(pid != -1); 341 if (pid == 0) 342 child(ss, 1); 343 344 /* Wait for all descendants to exit. */ 345 do { 346 pid = wait(&status); 347 ATF_REQUIRE(WIFEXITED(status)); 348 } while (pid != -1 || errno != ECHILD); 349 350 atomic_thread_fence_acq(); 351 ATF_REQUIRE(ss->okay); 352 353 ATF_REQUIRE(munmap(ss, sizeof(*ss)) == 0); 354 ATF_REQUIRE(procctl(P_PID, getpid(), PROC_REAP_RELEASE, NULL) == 0); 355 } 356 357 static void 358 do_shared_shadow_inval(bool lazy_cow) 359 { 360 size_t largepagesize, pagesize, pagesizes[MAXPAGESIZES], sysctllen; 361 362 sysctllen = sizeof(pagesizes); 363 ATF_REQUIRE(sysctlbyname("hw.pagesizes", pagesizes, &sysctllen, NULL, 364 0) == 0); 365 ATF_REQUIRE(sysctllen >= sizeof(size_t)); 366 367 pagesize = pagesizes[0]; 368 largepagesize = MAXPAGESIZES >= 2 && 369 sysctllen >= 2 * sizeof(size_t) && pagesizes[1] != 0 ? 370 pagesizes[1] : 2 * 1024 * 1024; 371 372 for (unsigned int i = 0; i <= FLAG_MASK; i++) { 373 do_one_shared_shadow_inval(lazy_cow, pagesize, 374 pagesize, i); 375 do_one_shared_shadow_inval(lazy_cow, pagesize, 376 2 * pagesize, i); 377 do_one_shared_shadow_inval(lazy_cow, pagesize, 378 largepagesize - pagesize, i); 379 do_one_shared_shadow_inval(lazy_cow, pagesize, 380 largepagesize, i); 381 do_one_shared_shadow_inval(lazy_cow, pagesize, 382 largepagesize + pagesize, i); 383 } 384 } 385 386 static void 387 do_shared_shadow_inval_eager(void) 388 { 389 struct rlimit rl; 390 391 rl.rlim_cur = rl.rlim_max = RLIM_INFINITY; 392 ATF_REQUIRE(setrlimit(RLIMIT_MEMLOCK, &rl) == 0); 393 394 do_shared_shadow_inval(false); 395 } 396 397 static void 398 do_shared_shadow_inval_lazy(void) 399 { 400 do_shared_shadow_inval(true); 401 } 402 403 #ifdef STANDALONE 404 int 405 main(void) 406 { 407 do_shared_shadow_inval_lazy(); 408 do_shared_shadow_inval_eager(); 409 printf("pass\n"); 410 } 411 #else 412 ATF_TC_WITHOUT_HEAD(shared_shadow_inval__lazy_cow); 413 ATF_TC_BODY(shared_shadow_inval__lazy_cow, tc) 414 { 415 do_shared_shadow_inval_lazy(); 416 } 417 418 ATF_TC(shared_shadow_inval__eager_cow); 419 ATF_TC_HEAD(shared_shadow_inval__eager_cow, tc) 420 { 421 /* Needed to raise the mlock() limit. */ 422 atf_tc_set_md_var(tc, "require.user", "root"); 423 } 424 ATF_TC_BODY(shared_shadow_inval__eager_cow, tc) 425 { 426 do_shared_shadow_inval_eager(); 427 } 428 429 ATF_TP_ADD_TCS(tp) 430 { 431 ATF_TP_ADD_TC(tp, shared_shadow_inval__lazy_cow); 432 ATF_TP_ADD_TC(tp, shared_shadow_inval__eager_cow); 433 return (atf_no_error()); 434 } 435 #endif /* !STANDALONE */ 436