1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * GUP long-term page pinning tests. 4 * 5 * Copyright 2023, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <assert.h> 18 #include <sys/mman.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 #include <linux/magic.h> 22 #include <linux/memfd.h> 23 24 #include "local_config.h" 25 #ifdef LOCAL_CONFIG_HAVE_LIBURING 26 #include <liburing.h> 27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 28 29 #include "../../../../mm/gup_test.h" 30 #include "../kselftest.h" 31 #include "vm_util.h" 32 33 static size_t pagesize; 34 static int nr_hugetlbsizes; 35 static size_t hugetlbsizes[10]; 36 static int gup_fd; 37 38 static __fsword_t get_fs_type(int fd) 39 { 40 struct statfs fs; 41 int ret; 42 43 do { 44 ret = fstatfs(fd, &fs); 45 } while (ret && errno == EINTR); 46 47 return ret ? 0 : fs.f_type; 48 } 49 50 static bool fs_is_unknown(__fsword_t fs_type) 51 { 52 /* 53 * We only support some filesystems in our tests when dealing with 54 * R/W long-term pinning. For these filesystems, we can be fairly sure 55 * whether they support it or not. 56 */ 57 switch (fs_type) { 58 case TMPFS_MAGIC: 59 case HUGETLBFS_MAGIC: 60 case BTRFS_SUPER_MAGIC: 61 case EXT4_SUPER_MAGIC: 62 case XFS_SUPER_MAGIC: 63 return false; 64 default: 65 return true; 66 } 67 } 68 69 static bool fs_supports_writable_longterm_pinning(__fsword_t fs_type) 70 { 71 assert(!fs_is_unknown(fs_type)); 72 switch (fs_type) { 73 case TMPFS_MAGIC: 74 case HUGETLBFS_MAGIC: 75 return true; 76 default: 77 return false; 78 } 79 } 80 81 enum test_type { 82 TEST_TYPE_RO, 83 TEST_TYPE_RO_FAST, 84 TEST_TYPE_RW, 85 TEST_TYPE_RW_FAST, 86 #ifdef LOCAL_CONFIG_HAVE_LIBURING 87 TEST_TYPE_IOURING, 88 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 89 }; 90 91 static void do_test(int fd, size_t size, enum test_type type, bool shared) 92 { 93 __fsword_t fs_type = get_fs_type(fd); 94 bool should_work; 95 char *mem; 96 int result = KSFT_PASS; 97 int ret; 98 99 if (fd < 0) { 100 result = KSFT_FAIL; 101 goto report; 102 } 103 104 if (ftruncate(fd, size)) { 105 if (errno == ENOENT) { 106 skip_test_dodgy_fs("ftruncate()"); 107 } else { 108 ksft_print_msg("ftruncate() failed (%s)\n", 109 strerror(errno)); 110 result = KSFT_FAIL; 111 goto report; 112 } 113 return; 114 } 115 116 if (fallocate(fd, 0, 0, size)) { 117 if (size == pagesize) { 118 ksft_print_msg("fallocate() failed (%s)\n", strerror(errno)); 119 result = KSFT_FAIL; 120 } else { 121 ksft_print_msg("need more free huge pages\n"); 122 result = KSFT_SKIP; 123 } 124 goto report; 125 } 126 127 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, 128 shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); 129 if (mem == MAP_FAILED) { 130 if (size == pagesize || shared) { 131 ksft_print_msg("mmap() failed (%s)\n", strerror(errno)); 132 result = KSFT_FAIL; 133 } else { 134 ksft_print_msg("need more free huge pages\n"); 135 result = KSFT_SKIP; 136 } 137 goto report; 138 } 139 140 /* Fault in the page such that GUP-fast can pin it directly. */ 141 memset(mem, 0, size); 142 143 switch (type) { 144 case TEST_TYPE_RO: 145 case TEST_TYPE_RO_FAST: 146 /* 147 * Cover more cases regarding unsharing decisions when 148 * long-term R/O pinning by mapping the page R/O. 149 */ 150 ret = mprotect(mem, size, PROT_READ); 151 if (ret) { 152 ksft_print_msg("mprotect() failed (%s)\n", strerror(errno)); 153 result = KSFT_FAIL; 154 goto munmap; 155 } 156 /* FALLTHROUGH */ 157 case TEST_TYPE_RW: 158 case TEST_TYPE_RW_FAST: { 159 struct pin_longterm_test args; 160 const bool fast = type == TEST_TYPE_RO_FAST || 161 type == TEST_TYPE_RW_FAST; 162 const bool rw = type == TEST_TYPE_RW || 163 type == TEST_TYPE_RW_FAST; 164 165 if (gup_fd < 0) { 166 ksft_print_msg("gup_test not available\n"); 167 result = KSFT_SKIP; 168 break; 169 } 170 171 if (rw && shared && fs_is_unknown(fs_type)) { 172 ksft_print_msg("Unknown filesystem\n"); 173 result = KSFT_SKIP; 174 return; 175 } 176 /* 177 * R/O pinning or pinning in a private mapping is always 178 * expected to work. Otherwise, we expect long-term R/W pinning 179 * to only succeed for special filesystems. 180 */ 181 should_work = !shared || !rw || 182 fs_supports_writable_longterm_pinning(fs_type); 183 184 args.addr = (__u64)(uintptr_t)mem; 185 args.size = size; 186 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 187 args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; 188 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 189 if (ret && errno == EINVAL) { 190 ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n"); 191 result = KSFT_SKIP; 192 break; 193 } else if (ret && errno == EFAULT) { 194 if (should_work) 195 result = KSFT_FAIL; 196 else 197 result = KSFT_PASS; 198 break; 199 } else if (ret) { 200 ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n", 201 strerror(errno)); 202 result = KSFT_FAIL; 203 break; 204 } 205 206 if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP)) 207 ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed (%s)\n", 208 strerror(errno)); 209 210 /* 211 * TODO: if the kernel ever supports long-term R/W pinning on 212 * some previously unsupported filesystems, we might want to 213 * perform some additional tests for possible data corruptions. 214 */ 215 if (should_work) 216 result = KSFT_PASS; 217 else 218 result = KSFT_FAIL; 219 break; 220 } 221 #ifdef LOCAL_CONFIG_HAVE_LIBURING 222 case TEST_TYPE_IOURING: { 223 struct io_uring ring; 224 struct iovec iov; 225 226 /* io_uring always pins pages writable. */ 227 if (shared && fs_is_unknown(fs_type)) { 228 ksft_print_msg("Unknown filesystem\n"); 229 result = KSFT_SKIP; 230 goto report; 231 } 232 should_work = !shared || 233 fs_supports_writable_longterm_pinning(fs_type); 234 235 /* Skip on errors, as we might just lack kernel support. */ 236 ret = io_uring_queue_init(1, &ring, 0); 237 if (ret < 0) { 238 ksft_print_msg("io_uring_queue_init() failed (%s)\n", 239 strerror(-ret)); 240 result = KSFT_SKIP; 241 break; 242 } 243 /* 244 * Register the range as a fixed buffer. This will FOLL_WRITE | 245 * FOLL_PIN | FOLL_LONGTERM the range. 246 */ 247 iov.iov_base = mem; 248 iov.iov_len = size; 249 ret = io_uring_register_buffers(&ring, &iov, 1); 250 /* Only new kernels return EFAULT. */ 251 if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || 252 errno == EFAULT)) { 253 if (should_work) { 254 ksft_print_msg("Should have failed (%s)\n", 255 strerror(errno)); 256 result = KSFT_FAIL; 257 } else { 258 result = KSFT_PASS; 259 } 260 } else if (ret) { 261 /* 262 * We might just lack support or have insufficient 263 * MEMLOCK limits. 264 */ 265 ksft_print_msg("io_uring_register_buffers() failed (%s)\n", 266 strerror(-ret)); 267 result = KSFT_SKIP; 268 } else { 269 if (should_work) { 270 result = KSFT_PASS; 271 } else { 272 ksft_print_msg("Should have worked\n"); 273 result = KSFT_FAIL; 274 } 275 io_uring_unregister_buffers(&ring); 276 } 277 278 io_uring_queue_exit(&ring); 279 break; 280 } 281 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 282 default: 283 assert(false); 284 } 285 286 munmap: 287 munmap(mem, size); 288 report: 289 log_test_result(result); 290 } 291 292 typedef void (*test_fn)(int fd, size_t size); 293 294 static void run_with_memfd(test_fn fn, const char *desc) 295 { 296 int fd; 297 298 log_test_start("%s ... with memfd", desc); 299 300 fd = memfd_create("test", 0); 301 if (fd < 0) { 302 ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); 303 log_test_result(KSFT_SKIP); 304 return; 305 } 306 307 fn(fd, pagesize); 308 close(fd); 309 } 310 311 static void run_with_tmpfile(test_fn fn, const char *desc) 312 { 313 FILE *file; 314 int fd; 315 316 log_test_start("%s ... with tmpfile", desc); 317 318 file = tmpfile(); 319 if (!file) { 320 ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno)); 321 fd = -1; 322 } else { 323 fd = fileno(file); 324 if (fd < 0) { 325 ksft_print_msg("fileno() failed (%s)\n", strerror(errno)); 326 } 327 } 328 329 fn(fd, pagesize); 330 331 if (file) 332 fclose(file); 333 } 334 335 static void run_with_local_tmpfile(test_fn fn, const char *desc) 336 { 337 char filename[] = __FILE__"_tmpfile_XXXXXX"; 338 int fd; 339 340 log_test_start("%s ... with local tmpfile", desc); 341 342 fd = mkstemp(filename); 343 if (fd < 0) 344 ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno)); 345 346 if (unlink(filename)) { 347 ksft_print_msg("unlink() failed (%s)\n", strerror(errno)); 348 close(fd); 349 fd = -1; 350 } 351 352 fn(fd, pagesize); 353 354 if (fd >= 0) 355 close(fd); 356 } 357 358 static void run_with_memfd_hugetlb(test_fn fn, const char *desc, 359 size_t hugetlbsize) 360 { 361 int flags = MFD_HUGETLB; 362 int fd; 363 364 log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, 365 hugetlbsize / 1024); 366 367 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 368 369 fd = memfd_create("test", flags); 370 if (fd < 0) { 371 ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); 372 log_test_result(KSFT_SKIP); 373 return; 374 } 375 376 fn(fd, hugetlbsize); 377 close(fd); 378 } 379 380 struct test_case { 381 const char *desc; 382 test_fn fn; 383 }; 384 385 static void test_shared_rw_pin(int fd, size_t size) 386 { 387 do_test(fd, size, TEST_TYPE_RW, true); 388 } 389 390 static void test_shared_rw_fast_pin(int fd, size_t size) 391 { 392 do_test(fd, size, TEST_TYPE_RW_FAST, true); 393 } 394 395 static void test_shared_ro_pin(int fd, size_t size) 396 { 397 do_test(fd, size, TEST_TYPE_RO, true); 398 } 399 400 static void test_shared_ro_fast_pin(int fd, size_t size) 401 { 402 do_test(fd, size, TEST_TYPE_RO_FAST, true); 403 } 404 405 static void test_private_rw_pin(int fd, size_t size) 406 { 407 do_test(fd, size, TEST_TYPE_RW, false); 408 } 409 410 static void test_private_rw_fast_pin(int fd, size_t size) 411 { 412 do_test(fd, size, TEST_TYPE_RW_FAST, false); 413 } 414 415 static void test_private_ro_pin(int fd, size_t size) 416 { 417 do_test(fd, size, TEST_TYPE_RO, false); 418 } 419 420 static void test_private_ro_fast_pin(int fd, size_t size) 421 { 422 do_test(fd, size, TEST_TYPE_RO_FAST, false); 423 } 424 425 #ifdef LOCAL_CONFIG_HAVE_LIBURING 426 static void test_shared_iouring(int fd, size_t size) 427 { 428 do_test(fd, size, TEST_TYPE_IOURING, true); 429 } 430 431 static void test_private_iouring(int fd, size_t size) 432 { 433 do_test(fd, size, TEST_TYPE_IOURING, false); 434 } 435 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 436 437 static const struct test_case test_cases[] = { 438 { 439 "R/W longterm GUP pin in MAP_SHARED file mapping", 440 test_shared_rw_pin, 441 }, 442 { 443 "R/W longterm GUP-fast pin in MAP_SHARED file mapping", 444 test_shared_rw_fast_pin, 445 }, 446 { 447 "R/O longterm GUP pin in MAP_SHARED file mapping", 448 test_shared_ro_pin, 449 }, 450 { 451 "R/O longterm GUP-fast pin in MAP_SHARED file mapping", 452 test_shared_ro_fast_pin, 453 }, 454 { 455 "R/W longterm GUP pin in MAP_PRIVATE file mapping", 456 test_private_rw_pin, 457 }, 458 { 459 "R/W longterm GUP-fast pin in MAP_PRIVATE file mapping", 460 test_private_rw_fast_pin, 461 }, 462 { 463 "R/O longterm GUP pin in MAP_PRIVATE file mapping", 464 test_private_ro_pin, 465 }, 466 { 467 "R/O longterm GUP-fast pin in MAP_PRIVATE file mapping", 468 test_private_ro_fast_pin, 469 }, 470 #ifdef LOCAL_CONFIG_HAVE_LIBURING 471 { 472 "io_uring fixed buffer with MAP_SHARED file mapping", 473 test_shared_iouring, 474 }, 475 { 476 "io_uring fixed buffer with MAP_PRIVATE file mapping", 477 test_private_iouring, 478 }, 479 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 480 }; 481 482 static void run_test_case(struct test_case const *test_case) 483 { 484 int i; 485 486 run_with_memfd(test_case->fn, test_case->desc); 487 run_with_tmpfile(test_case->fn, test_case->desc); 488 run_with_local_tmpfile(test_case->fn, test_case->desc); 489 for (i = 0; i < nr_hugetlbsizes; i++) 490 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 491 hugetlbsizes[i]); 492 } 493 494 static int tests_per_test_case(void) 495 { 496 return 3 + nr_hugetlbsizes; 497 } 498 499 int main(int argc, char **argv) 500 { 501 int i; 502 503 pagesize = getpagesize(); 504 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 505 ARRAY_SIZE(hugetlbsizes)); 506 507 ksft_print_header(); 508 ksft_set_plan(ARRAY_SIZE(test_cases) * tests_per_test_case()); 509 510 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 511 512 for (i = 0; i < ARRAY_SIZE(test_cases); i++) 513 run_test_case(&test_cases[i]); 514 515 ksft_finished(); 516 } 517