1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * GUP long-term page pinning tests. 4 * 5 * Copyright 2023, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <assert.h> 18 #include <sys/mman.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 #include <linux/magic.h> 22 #include <linux/memfd.h> 23 24 #include "local_config.h" 25 #ifdef LOCAL_CONFIG_HAVE_LIBURING 26 #include <liburing.h> 27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 28 29 #include "../../../../mm/gup_test.h" 30 #include "../kselftest.h" 31 #include "vm_util.h" 32 33 static size_t pagesize; 34 static int nr_hugetlbsizes; 35 static size_t hugetlbsizes[10]; 36 static int gup_fd; 37 38 static __fsword_t get_fs_type(int fd) 39 { 40 struct statfs fs; 41 int ret; 42 43 do { 44 ret = fstatfs(fd, &fs); 45 } while (ret && errno == EINTR); 46 47 return ret ? 0 : fs.f_type; 48 } 49 50 static bool fs_is_unknown(__fsword_t fs_type) 51 { 52 /* 53 * We only support some filesystems in our tests when dealing with 54 * R/W long-term pinning. For these filesystems, we can be fairly sure 55 * whether they support it or not. 56 */ 57 switch (fs_type) { 58 case TMPFS_MAGIC: 59 case HUGETLBFS_MAGIC: 60 case BTRFS_SUPER_MAGIC: 61 case EXT4_SUPER_MAGIC: 62 case XFS_SUPER_MAGIC: 63 return false; 64 default: 65 return true; 66 } 67 } 68 69 static bool fs_supports_writable_longterm_pinning(__fsword_t fs_type) 70 { 71 assert(!fs_is_unknown(fs_type)); 72 switch (fs_type) { 73 case TMPFS_MAGIC: 74 case HUGETLBFS_MAGIC: 75 return true; 76 default: 77 return false; 78 } 79 } 80 81 enum test_type { 82 TEST_TYPE_RO, 83 TEST_TYPE_RO_FAST, 84 TEST_TYPE_RW, 85 TEST_TYPE_RW_FAST, 86 #ifdef LOCAL_CONFIG_HAVE_LIBURING 87 TEST_TYPE_IOURING, 88 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 89 }; 90 91 static void do_test(int fd, size_t size, enum test_type type, bool shared) 92 { 93 __fsword_t fs_type = get_fs_type(fd); 94 bool should_work; 95 char *mem; 96 int result = KSFT_PASS; 97 int ret; 98 99 if (fd < 0) { 100 result = KSFT_FAIL; 101 goto report; 102 } 103 104 if (ftruncate(fd, size)) { 105 if (errno == ENOENT) { 106 skip_test_dodgy_fs("ftruncate()"); 107 } else { 108 ksft_print_msg("ftruncate() failed (%s)\n", 109 strerror(errno)); 110 result = KSFT_FAIL; 111 goto report; 112 } 113 return; 114 } 115 116 if (fallocate(fd, 0, 0, size)) { 117 /* 118 * Some filesystems (eg, NFSv3) don't support 119 * fallocate(), report this as a skip rather than a 120 * test failure. 121 */ 122 if (errno == EOPNOTSUPP) { 123 ksft_print_msg("fallocate() not supported by filesystem\n"); 124 result = KSFT_SKIP; 125 } else if (size == pagesize) { 126 ksft_print_msg("fallocate() failed (%s)\n", strerror(errno)); 127 result = KSFT_FAIL; 128 } else { 129 ksft_print_msg("need more free huge pages\n"); 130 result = KSFT_SKIP; 131 } 132 goto report; 133 } 134 135 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, 136 shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); 137 if (mem == MAP_FAILED) { 138 if (size == pagesize || shared) { 139 ksft_print_msg("mmap() failed (%s)\n", strerror(errno)); 140 result = KSFT_FAIL; 141 } else { 142 ksft_print_msg("need more free huge pages\n"); 143 result = KSFT_SKIP; 144 } 145 goto report; 146 } 147 148 /* Fault in the page such that GUP-fast can pin it directly. */ 149 memset(mem, 0, size); 150 151 switch (type) { 152 case TEST_TYPE_RO: 153 case TEST_TYPE_RO_FAST: 154 /* 155 * Cover more cases regarding unsharing decisions when 156 * long-term R/O pinning by mapping the page R/O. 157 */ 158 ret = mprotect(mem, size, PROT_READ); 159 if (ret) { 160 ksft_print_msg("mprotect() failed (%s)\n", strerror(errno)); 161 result = KSFT_FAIL; 162 goto munmap; 163 } 164 /* FALLTHROUGH */ 165 case TEST_TYPE_RW: 166 case TEST_TYPE_RW_FAST: { 167 struct pin_longterm_test args; 168 const bool fast = type == TEST_TYPE_RO_FAST || 169 type == TEST_TYPE_RW_FAST; 170 const bool rw = type == TEST_TYPE_RW || 171 type == TEST_TYPE_RW_FAST; 172 173 if (gup_fd < 0) { 174 ksft_print_msg("gup_test not available\n"); 175 result = KSFT_SKIP; 176 break; 177 } 178 179 if (rw && shared && fs_is_unknown(fs_type)) { 180 ksft_print_msg("Unknown filesystem\n"); 181 result = KSFT_SKIP; 182 return; 183 } 184 /* 185 * R/O pinning or pinning in a private mapping is always 186 * expected to work. Otherwise, we expect long-term R/W pinning 187 * to only succeed for special filesystems. 188 */ 189 should_work = !shared || !rw || 190 fs_supports_writable_longterm_pinning(fs_type); 191 192 args.addr = (__u64)(uintptr_t)mem; 193 args.size = size; 194 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 195 args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; 196 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 197 if (ret && errno == EINVAL) { 198 ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n"); 199 result = KSFT_SKIP; 200 break; 201 } else if (ret && errno == EFAULT) { 202 if (should_work) 203 result = KSFT_FAIL; 204 else 205 result = KSFT_PASS; 206 break; 207 } else if (ret) { 208 ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n", 209 strerror(errno)); 210 result = KSFT_FAIL; 211 break; 212 } 213 214 if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP)) 215 ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed (%s)\n", 216 strerror(errno)); 217 218 /* 219 * TODO: if the kernel ever supports long-term R/W pinning on 220 * some previously unsupported filesystems, we might want to 221 * perform some additional tests for possible data corruptions. 222 */ 223 if (should_work) 224 result = KSFT_PASS; 225 else 226 result = KSFT_FAIL; 227 break; 228 } 229 #ifdef LOCAL_CONFIG_HAVE_LIBURING 230 case TEST_TYPE_IOURING: { 231 struct io_uring ring; 232 struct iovec iov; 233 234 /* io_uring always pins pages writable. */ 235 if (shared && fs_is_unknown(fs_type)) { 236 ksft_print_msg("Unknown filesystem\n"); 237 result = KSFT_SKIP; 238 goto report; 239 } 240 should_work = !shared || 241 fs_supports_writable_longterm_pinning(fs_type); 242 243 /* Skip on errors, as we might just lack kernel support. */ 244 ret = io_uring_queue_init(1, &ring, 0); 245 if (ret < 0) { 246 ksft_print_msg("io_uring_queue_init() failed (%s)\n", 247 strerror(-ret)); 248 result = KSFT_SKIP; 249 break; 250 } 251 /* 252 * Register the range as a fixed buffer. This will FOLL_WRITE | 253 * FOLL_PIN | FOLL_LONGTERM the range. 254 */ 255 iov.iov_base = mem; 256 iov.iov_len = size; 257 ret = io_uring_register_buffers(&ring, &iov, 1); 258 /* Only new kernels return EFAULT. */ 259 if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || 260 errno == EFAULT)) { 261 if (should_work) { 262 ksft_print_msg("Should have failed (%s)\n", 263 strerror(errno)); 264 result = KSFT_FAIL; 265 } else { 266 result = KSFT_PASS; 267 } 268 } else if (ret) { 269 /* 270 * We might just lack support or have insufficient 271 * MEMLOCK limits. 272 */ 273 ksft_print_msg("io_uring_register_buffers() failed (%s)\n", 274 strerror(-ret)); 275 result = KSFT_SKIP; 276 } else { 277 if (should_work) { 278 result = KSFT_PASS; 279 } else { 280 ksft_print_msg("Should have worked\n"); 281 result = KSFT_FAIL; 282 } 283 io_uring_unregister_buffers(&ring); 284 } 285 286 io_uring_queue_exit(&ring); 287 break; 288 } 289 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 290 default: 291 assert(false); 292 } 293 294 munmap: 295 munmap(mem, size); 296 report: 297 log_test_result(result); 298 } 299 300 typedef void (*test_fn)(int fd, size_t size); 301 302 static void run_with_memfd(test_fn fn, const char *desc) 303 { 304 int fd; 305 306 log_test_start("%s ... with memfd", desc); 307 308 fd = memfd_create("test", 0); 309 if (fd < 0) { 310 ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); 311 log_test_result(KSFT_SKIP); 312 return; 313 } 314 315 fn(fd, pagesize); 316 close(fd); 317 } 318 319 static void run_with_tmpfile(test_fn fn, const char *desc) 320 { 321 FILE *file; 322 int fd; 323 324 log_test_start("%s ... with tmpfile", desc); 325 326 file = tmpfile(); 327 if (!file) { 328 ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno)); 329 fd = -1; 330 } else { 331 fd = fileno(file); 332 if (fd < 0) { 333 ksft_print_msg("fileno() failed (%s)\n", strerror(errno)); 334 } 335 } 336 337 fn(fd, pagesize); 338 339 if (file) 340 fclose(file); 341 } 342 343 static void run_with_local_tmpfile(test_fn fn, const char *desc) 344 { 345 char filename[] = __FILE__"_tmpfile_XXXXXX"; 346 int fd; 347 348 log_test_start("%s ... with local tmpfile", desc); 349 350 fd = mkstemp(filename); 351 if (fd < 0) 352 ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno)); 353 354 if (unlink(filename)) { 355 ksft_print_msg("unlink() failed (%s)\n", strerror(errno)); 356 close(fd); 357 fd = -1; 358 } 359 360 fn(fd, pagesize); 361 362 if (fd >= 0) 363 close(fd); 364 } 365 366 static void run_with_memfd_hugetlb(test_fn fn, const char *desc, 367 size_t hugetlbsize) 368 { 369 int flags = MFD_HUGETLB; 370 int fd; 371 372 log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, 373 hugetlbsize / 1024); 374 375 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 376 377 fd = memfd_create("test", flags); 378 if (fd < 0) { 379 ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); 380 log_test_result(KSFT_SKIP); 381 return; 382 } 383 384 fn(fd, hugetlbsize); 385 close(fd); 386 } 387 388 struct test_case { 389 const char *desc; 390 test_fn fn; 391 }; 392 393 static void test_shared_rw_pin(int fd, size_t size) 394 { 395 do_test(fd, size, TEST_TYPE_RW, true); 396 } 397 398 static void test_shared_rw_fast_pin(int fd, size_t size) 399 { 400 do_test(fd, size, TEST_TYPE_RW_FAST, true); 401 } 402 403 static void test_shared_ro_pin(int fd, size_t size) 404 { 405 do_test(fd, size, TEST_TYPE_RO, true); 406 } 407 408 static void test_shared_ro_fast_pin(int fd, size_t size) 409 { 410 do_test(fd, size, TEST_TYPE_RO_FAST, true); 411 } 412 413 static void test_private_rw_pin(int fd, size_t size) 414 { 415 do_test(fd, size, TEST_TYPE_RW, false); 416 } 417 418 static void test_private_rw_fast_pin(int fd, size_t size) 419 { 420 do_test(fd, size, TEST_TYPE_RW_FAST, false); 421 } 422 423 static void test_private_ro_pin(int fd, size_t size) 424 { 425 do_test(fd, size, TEST_TYPE_RO, false); 426 } 427 428 static void test_private_ro_fast_pin(int fd, size_t size) 429 { 430 do_test(fd, size, TEST_TYPE_RO_FAST, false); 431 } 432 433 #ifdef LOCAL_CONFIG_HAVE_LIBURING 434 static void test_shared_iouring(int fd, size_t size) 435 { 436 do_test(fd, size, TEST_TYPE_IOURING, true); 437 } 438 439 static void test_private_iouring(int fd, size_t size) 440 { 441 do_test(fd, size, TEST_TYPE_IOURING, false); 442 } 443 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 444 445 static const struct test_case test_cases[] = { 446 { 447 "R/W longterm GUP pin in MAP_SHARED file mapping", 448 test_shared_rw_pin, 449 }, 450 { 451 "R/W longterm GUP-fast pin in MAP_SHARED file mapping", 452 test_shared_rw_fast_pin, 453 }, 454 { 455 "R/O longterm GUP pin in MAP_SHARED file mapping", 456 test_shared_ro_pin, 457 }, 458 { 459 "R/O longterm GUP-fast pin in MAP_SHARED file mapping", 460 test_shared_ro_fast_pin, 461 }, 462 { 463 "R/W longterm GUP pin in MAP_PRIVATE file mapping", 464 test_private_rw_pin, 465 }, 466 { 467 "R/W longterm GUP-fast pin in MAP_PRIVATE file mapping", 468 test_private_rw_fast_pin, 469 }, 470 { 471 "R/O longterm GUP pin in MAP_PRIVATE file mapping", 472 test_private_ro_pin, 473 }, 474 { 475 "R/O longterm GUP-fast pin in MAP_PRIVATE file mapping", 476 test_private_ro_fast_pin, 477 }, 478 #ifdef LOCAL_CONFIG_HAVE_LIBURING 479 { 480 "io_uring fixed buffer with MAP_SHARED file mapping", 481 test_shared_iouring, 482 }, 483 { 484 "io_uring fixed buffer with MAP_PRIVATE file mapping", 485 test_private_iouring, 486 }, 487 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 488 }; 489 490 static void run_test_case(struct test_case const *test_case) 491 { 492 int i; 493 494 run_with_memfd(test_case->fn, test_case->desc); 495 run_with_tmpfile(test_case->fn, test_case->desc); 496 run_with_local_tmpfile(test_case->fn, test_case->desc); 497 for (i = 0; i < nr_hugetlbsizes; i++) 498 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 499 hugetlbsizes[i]); 500 } 501 502 static int tests_per_test_case(void) 503 { 504 return 3 + nr_hugetlbsizes; 505 } 506 507 int main(int argc, char **argv) 508 { 509 int i; 510 511 pagesize = getpagesize(); 512 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 513 ARRAY_SIZE(hugetlbsizes)); 514 515 ksft_print_header(); 516 ksft_set_plan(ARRAY_SIZE(test_cases) * tests_per_test_case()); 517 518 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 519 520 for (i = 0; i < ARRAY_SIZE(test_cases); i++) 521 run_test_case(&test_cases[i]); 522 523 ksft_finished(); 524 } 525