1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 #define _GNU_SOURCE 4 #include "../kselftest_harness.h" 5 #include <linux/prctl.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 #include <sys/mman.h> 11 #include <sys/prctl.h> 12 #include <sys/syscall.h> 13 #include <sys/wait.h> 14 #include <linux/perf_event.h> 15 #include "vm_util.h" 16 17 FIXTURE(merge) 18 { 19 unsigned int page_size; 20 char *carveout; 21 struct procmap_fd procmap; 22 }; 23 24 FIXTURE_SETUP(merge) 25 { 26 self->page_size = psize(); 27 /* Carve out PROT_NONE region to map over. */ 28 self->carveout = mmap(NULL, 12 * self->page_size, PROT_NONE, 29 MAP_ANON | MAP_PRIVATE, -1, 0); 30 ASSERT_NE(self->carveout, MAP_FAILED); 31 /* Setup PROCMAP_QUERY interface. */ 32 ASSERT_EQ(open_self_procmap(&self->procmap), 0); 33 } 34 35 FIXTURE_TEARDOWN(merge) 36 { 37 ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0); 38 ASSERT_EQ(close_procmap(&self->procmap), 0); 39 /* 40 * Clear unconditionally, as some tests set this. It is no issue if this 41 * fails (KSM may be disabled for instance). 42 */ 43 prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0); 44 } 45 46 TEST_F(merge, mprotect_unfaulted_left) 47 { 48 unsigned int page_size = self->page_size; 49 char *carveout = self->carveout; 50 struct procmap_fd *procmap = &self->procmap; 51 char *ptr; 52 53 /* 54 * Map 10 pages of R/W memory within. MAP_NORESERVE so we don't hit 55 * merge failure due to lack of VM_ACCOUNT flag by mistake. 56 * 57 * |-----------------------| 58 * | unfaulted | 59 * |-----------------------| 60 */ 61 ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE, 62 MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); 63 ASSERT_NE(ptr, MAP_FAILED); 64 /* 65 * Now make the first 5 pages read-only, splitting the VMA: 66 * 67 * RO RW 68 * |-----------|-----------| 69 * | unfaulted | unfaulted | 70 * |-----------|-----------| 71 */ 72 ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0); 73 /* 74 * Fault in the first of the last 5 pages so it gets an anon_vma and 75 * thus the whole VMA becomes 'faulted': 76 * 77 * RO RW 78 * |-----------|-----------| 79 * | unfaulted | faulted | 80 * |-----------|-----------| 81 */ 82 ptr[5 * page_size] = 'x'; 83 /* 84 * Now mprotect() the RW region read-only, we should merge (though for 85 * ~15 years we did not! :): 86 * 87 * RO 88 * |-----------------------| 89 * | faulted | 90 * |-----------------------| 91 */ 92 ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0); 93 94 /* Assert that the merge succeeded using PROCMAP_QUERY. */ 95 ASSERT_TRUE(find_vma_procmap(procmap, ptr)); 96 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); 97 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); 98 } 99 100 TEST_F(merge, mprotect_unfaulted_right) 101 { 102 unsigned int page_size = self->page_size; 103 char *carveout = self->carveout; 104 struct procmap_fd *procmap = &self->procmap; 105 char *ptr; 106 107 /* 108 * |-----------------------| 109 * | unfaulted | 110 * |-----------------------| 111 */ 112 ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE, 113 MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); 114 ASSERT_NE(ptr, MAP_FAILED); 115 /* 116 * Now make the last 5 pages read-only, splitting the VMA: 117 * 118 * RW RO 119 * |-----------|-----------| 120 * | unfaulted | unfaulted | 121 * |-----------|-----------| 122 */ 123 ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0); 124 /* 125 * Fault in the first of the first 5 pages so it gets an anon_vma and 126 * thus the whole VMA becomes 'faulted': 127 * 128 * RW RO 129 * |-----------|-----------| 130 * | faulted | unfaulted | 131 * |-----------|-----------| 132 */ 133 ptr[0] = 'x'; 134 /* 135 * Now mprotect() the RW region read-only, we should merge: 136 * 137 * RO 138 * |-----------------------| 139 * | faulted | 140 * |-----------------------| 141 */ 142 ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0); 143 144 /* Assert that the merge succeeded using PROCMAP_QUERY. */ 145 ASSERT_TRUE(find_vma_procmap(procmap, ptr)); 146 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); 147 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); 148 } 149 150 TEST_F(merge, mprotect_unfaulted_both) 151 { 152 unsigned int page_size = self->page_size; 153 char *carveout = self->carveout; 154 struct procmap_fd *procmap = &self->procmap; 155 char *ptr; 156 157 /* 158 * |-----------------------| 159 * | unfaulted | 160 * |-----------------------| 161 */ 162 ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, 163 MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); 164 ASSERT_NE(ptr, MAP_FAILED); 165 /* 166 * Now make the first and last 3 pages read-only, splitting the VMA: 167 * 168 * RO RW RO 169 * |-----------|-----------|-----------| 170 * | unfaulted | unfaulted | unfaulted | 171 * |-----------|-----------|-----------| 172 */ 173 ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); 174 ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); 175 /* 176 * Fault in the first of the middle 3 pages so it gets an anon_vma and 177 * thus the whole VMA becomes 'faulted': 178 * 179 * RO RW RO 180 * |-----------|-----------|-----------| 181 * | unfaulted | faulted | unfaulted | 182 * |-----------|-----------|-----------| 183 */ 184 ptr[3 * page_size] = 'x'; 185 /* 186 * Now mprotect() the RW region read-only, we should merge: 187 * 188 * RO 189 * |-----------------------| 190 * | faulted | 191 * |-----------------------| 192 */ 193 ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); 194 195 /* Assert that the merge succeeded using PROCMAP_QUERY. */ 196 ASSERT_TRUE(find_vma_procmap(procmap, ptr)); 197 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); 198 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); 199 } 200 201 TEST_F(merge, mprotect_faulted_left_unfaulted_right) 202 { 203 unsigned int page_size = self->page_size; 204 char *carveout = self->carveout; 205 struct procmap_fd *procmap = &self->procmap; 206 char *ptr; 207 208 /* 209 * |-----------------------| 210 * | unfaulted | 211 * |-----------------------| 212 */ 213 ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, 214 MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); 215 ASSERT_NE(ptr, MAP_FAILED); 216 /* 217 * Now make the last 3 pages read-only, splitting the VMA: 218 * 219 * RW RO 220 * |-----------------------|-----------| 221 * | unfaulted | unfaulted | 222 * |-----------------------|-----------| 223 */ 224 ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); 225 /* 226 * Fault in the first of the first 6 pages so it gets an anon_vma and 227 * thus the whole VMA becomes 'faulted': 228 * 229 * RW RO 230 * |-----------------------|-----------| 231 * | unfaulted | unfaulted | 232 * |-----------------------|-----------| 233 */ 234 ptr[0] = 'x'; 235 /* 236 * Now make the first 3 pages read-only, splitting the VMA: 237 * 238 * RO RW RO 239 * |-----------|-----------|-----------| 240 * | faulted | faulted | unfaulted | 241 * |-----------|-----------|-----------| 242 */ 243 ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); 244 /* 245 * Now mprotect() the RW region read-only, we should merge: 246 * 247 * RO 248 * |-----------------------| 249 * | faulted | 250 * |-----------------------| 251 */ 252 ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); 253 254 /* Assert that the merge succeeded using PROCMAP_QUERY. */ 255 ASSERT_TRUE(find_vma_procmap(procmap, ptr)); 256 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); 257 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); 258 } 259 260 TEST_F(merge, mprotect_unfaulted_left_faulted_right) 261 { 262 unsigned int page_size = self->page_size; 263 char *carveout = self->carveout; 264 struct procmap_fd *procmap = &self->procmap; 265 char *ptr; 266 267 /* 268 * |-----------------------| 269 * | unfaulted | 270 * |-----------------------| 271 */ 272 ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, 273 MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); 274 ASSERT_NE(ptr, MAP_FAILED); 275 /* 276 * Now make the first 3 pages read-only, splitting the VMA: 277 * 278 * RO RW 279 * |-----------|-----------------------| 280 * | unfaulted | unfaulted | 281 * |-----------|-----------------------| 282 */ 283 ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); 284 /* 285 * Fault in the first of the last 6 pages so it gets an anon_vma and 286 * thus the whole VMA becomes 'faulted': 287 * 288 * RO RW 289 * |-----------|-----------------------| 290 * | unfaulted | faulted | 291 * |-----------|-----------------------| 292 */ 293 ptr[3 * page_size] = 'x'; 294 /* 295 * Now make the last 3 pages read-only, splitting the VMA: 296 * 297 * RO RW RO 298 * |-----------|-----------|-----------| 299 * | unfaulted | faulted | faulted | 300 * |-----------|-----------|-----------| 301 */ 302 ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); 303 /* 304 * Now mprotect() the RW region read-only, we should merge: 305 * 306 * RO 307 * |-----------------------| 308 * | faulted | 309 * |-----------------------| 310 */ 311 ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); 312 313 /* Assert that the merge succeeded using PROCMAP_QUERY. */ 314 ASSERT_TRUE(find_vma_procmap(procmap, ptr)); 315 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); 316 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); 317 } 318 319 TEST_F(merge, forked_target_vma) 320 { 321 unsigned int page_size = self->page_size; 322 char *carveout = self->carveout; 323 struct procmap_fd *procmap = &self->procmap; 324 pid_t pid; 325 char *ptr, *ptr2; 326 int i; 327 328 /* 329 * |-----------| 330 * | unfaulted | 331 * |-----------| 332 */ 333 ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, 334 MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); 335 ASSERT_NE(ptr, MAP_FAILED); 336 337 /* 338 * Fault in process. 339 * 340 * |-----------| 341 * | faulted | 342 * |-----------| 343 */ 344 ptr[0] = 'x'; 345 346 pid = fork(); 347 ASSERT_NE(pid, -1); 348 349 if (pid != 0) { 350 wait(NULL); 351 return; 352 } 353 354 /* Child process below: */ 355 356 /* Reopen for child. */ 357 ASSERT_EQ(close_procmap(&self->procmap), 0); 358 ASSERT_EQ(open_self_procmap(&self->procmap), 0); 359 360 /* unCOWing everything does not cause the AVC to go away. */ 361 for (i = 0; i < 5 * page_size; i += page_size) 362 ptr[i] = 'x'; 363 364 /* 365 * Map in adjacent VMA in child. 366 * 367 * forked 368 * |-----------|-----------| 369 * | faulted | unfaulted | 370 * |-----------|-----------| 371 * ptr ptr2 372 */ 373 ptr2 = mmap(&ptr[5 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, 374 MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); 375 ASSERT_NE(ptr2, MAP_FAILED); 376 377 /* Make sure not merged. */ 378 ASSERT_TRUE(find_vma_procmap(procmap, ptr)); 379 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); 380 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 5 * page_size); 381 } 382 383 TEST_F(merge, forked_source_vma) 384 { 385 unsigned int page_size = self->page_size; 386 char *carveout = self->carveout; 387 struct procmap_fd *procmap = &self->procmap; 388 pid_t pid; 389 char *ptr, *ptr2; 390 int i; 391 392 /* 393 * |-----------|------------| 394 * | unfaulted | <unmapped> | 395 * |-----------|------------| 396 */ 397 ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, 398 MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); 399 ASSERT_NE(ptr, MAP_FAILED); 400 401 /* 402 * Fault in process. 403 * 404 * |-----------|------------| 405 * | faulted | <unmapped> | 406 * |-----------|------------| 407 */ 408 ptr[0] = 'x'; 409 410 pid = fork(); 411 ASSERT_NE(pid, -1); 412 413 if (pid != 0) { 414 wait(NULL); 415 return; 416 } 417 418 /* Child process below: */ 419 420 /* Reopen for child. */ 421 ASSERT_EQ(close_procmap(&self->procmap), 0); 422 ASSERT_EQ(open_self_procmap(&self->procmap), 0); 423 424 /* unCOWing everything does not cause the AVC to go away. */ 425 for (i = 0; i < 5 * page_size; i += page_size) 426 ptr[i] = 'x'; 427 428 /* 429 * Map in adjacent VMA in child, ptr2 after ptr, but incompatible. 430 * 431 * forked RW RWX 432 * |-----------|-----------| 433 * | faulted | unfaulted | 434 * |-----------|-----------| 435 * ptr ptr2 436 */ 437 ptr2 = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC, 438 MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); 439 ASSERT_NE(ptr2, MAP_FAILED); 440 441 /* Make sure not merged. */ 442 ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); 443 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); 444 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); 445 446 /* 447 * Now mprotect forked region to RWX so it becomes the source for the 448 * merge to unfaulted region: 449 * 450 * forked RWX RWX 451 * |-----------|-----------| 452 * | faulted | unfaulted | 453 * |-----------|-----------| 454 * ptr ptr2 455 * 456 * This should NOT result in a merge, as ptr was forked. 457 */ 458 ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC), 0); 459 /* Again, make sure not merged. */ 460 ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); 461 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); 462 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); 463 } 464 465 TEST_F(merge, handle_uprobe_upon_merged_vma) 466 { 467 const size_t attr_sz = sizeof(struct perf_event_attr); 468 unsigned int page_size = self->page_size; 469 const char *probe_file = "./foo"; 470 char *carveout = self->carveout; 471 struct perf_event_attr attr; 472 unsigned long type; 473 void *ptr1, *ptr2; 474 int fd; 475 476 fd = open(probe_file, O_RDWR|O_CREAT, 0600); 477 ASSERT_GE(fd, 0); 478 479 ASSERT_EQ(ftruncate(fd, page_size), 0); 480 if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) { 481 SKIP(goto out, "Failed to read uprobe sysfs file, skipping"); 482 } 483 484 memset(&attr, 0, attr_sz); 485 attr.size = attr_sz; 486 attr.type = type; 487 attr.config1 = (__u64)(long)probe_file; 488 attr.config2 = 0x0; 489 490 ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0); 491 492 ptr1 = mmap(&carveout[page_size], 10 * page_size, PROT_EXEC, 493 MAP_PRIVATE | MAP_FIXED, fd, 0); 494 ASSERT_NE(ptr1, MAP_FAILED); 495 496 ptr2 = mremap(ptr1, page_size, 2 * page_size, 497 MREMAP_MAYMOVE | MREMAP_FIXED, ptr1 + 5 * page_size); 498 ASSERT_NE(ptr2, MAP_FAILED); 499 500 ASSERT_NE(mremap(ptr2, page_size, page_size, 501 MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED); 502 503 out: 504 close(fd); 505 remove(probe_file); 506 } 507 508 TEST_F(merge, ksm_merge) 509 { 510 unsigned int page_size = self->page_size; 511 char *carveout = self->carveout; 512 struct procmap_fd *procmap = &self->procmap; 513 char *ptr, *ptr2; 514 int err; 515 516 /* 517 * Map two R/W immediately adjacent to one another, they should 518 * trivially merge: 519 * 520 * |-----------|-----------| 521 * | R/W | R/W | 522 * |-----------|-----------| 523 * ptr ptr2 524 */ 525 526 ptr = mmap(&carveout[page_size], page_size, PROT_READ | PROT_WRITE, 527 MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); 528 ASSERT_NE(ptr, MAP_FAILED); 529 ptr2 = mmap(&carveout[2 * page_size], page_size, 530 PROT_READ | PROT_WRITE, 531 MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); 532 ASSERT_NE(ptr2, MAP_FAILED); 533 ASSERT_TRUE(find_vma_procmap(procmap, ptr)); 534 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); 535 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size); 536 537 /* Unmap the second half of this merged VMA. */ 538 ASSERT_EQ(munmap(ptr2, page_size), 0); 539 540 /* OK, now enable global KSM merge. We clear this on test teardown. */ 541 err = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); 542 if (err == -1) { 543 int errnum = errno; 544 545 /* Only non-failure case... */ 546 ASSERT_EQ(errnum, EINVAL); 547 /* ...but indicates we should skip. */ 548 SKIP(return, "KSM memory merging not supported, skipping."); 549 } 550 551 /* 552 * Now map a VMA adjacent to the existing that was just made 553 * VM_MERGEABLE, this should merge as well. 554 */ 555 ptr2 = mmap(&carveout[2 * page_size], page_size, 556 PROT_READ | PROT_WRITE, 557 MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); 558 ASSERT_NE(ptr2, MAP_FAILED); 559 ASSERT_TRUE(find_vma_procmap(procmap, ptr)); 560 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); 561 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size); 562 563 /* Now this VMA altogether. */ 564 ASSERT_EQ(munmap(ptr, 2 * page_size), 0); 565 566 /* Try the same operation as before, asserting this also merges fine. */ 567 ptr = mmap(&carveout[page_size], page_size, PROT_READ | PROT_WRITE, 568 MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); 569 ASSERT_NE(ptr, MAP_FAILED); 570 ptr2 = mmap(&carveout[2 * page_size], page_size, 571 PROT_READ | PROT_WRITE, 572 MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); 573 ASSERT_NE(ptr2, MAP_FAILED); 574 ASSERT_TRUE(find_vma_procmap(procmap, ptr)); 575 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); 576 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size); 577 } 578 579 TEST_HARNESS_MAIN 580