1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 /* 17 * The purpose of this test is to ensure that we honor several aspects of our 18 * lock ordering. In particular we want to validate the following our starvation 19 * properties, that is that blocking writers should take priority ahead of 20 * blocking readers and that the controller lock takes priority over various 21 * namespace locks. While we test all kinds of locks here, we only use the 22 * controller fd here to simplify the test design. 23 * 24 * To do this, we utilize our blocking locks. In particular, we take a first 25 * lock and then spin up threads that should all block on that. To deal with the 26 * inherit race of knowing when a thread is blocked or not, we utilize libproc 27 * and wait until the thread has the PR_ASLEEP flag set and that it's in an 28 * ioctl system call. This ensures that the folks that are present are added in 29 * the appropriate order. 30 */ 31 32 #include <err.h> 33 #include <stdlib.h> 34 #include <unistd.h> 35 #include <stdbool.h> 36 #include <sys/sysmacros.h> 37 #include <sys/debug.h> 38 #include <thread.h> 39 #include <synch.h> 40 #include <strings.h> 41 42 #include "nvme_ioctl_util.h" 43 44 /* 45 * Maximum number of threads that we'll spin up for locks. 46 */ 47 #define MAX_LOCKS 10 48 49 typedef struct { 50 thread_t loi_thread; 51 const nvme_ioctl_lock_t *loi_lock; 52 } lock_order_info_t; 53 54 static mutex_t lock_mutex; 55 static lock_order_info_t lock_results[MAX_LOCKS]; 56 static uint32_t lock_nextres; 57 static bool lock_valid; 58 59 typedef struct lock_order_test lock_order_test_t; 60 typedef bool (*lock_order_valif_f)(const lock_order_test_t *, uint32_t); 61 62 struct lock_order_test { 63 const char *lot_desc; 64 const nvme_ioctl_lock_t *lot_initlock; 65 const nvme_ioctl_lock_t *lot_locks[MAX_LOCKS]; 66 lock_order_valif_f lot_verif; 67 }; 68 69 static void 70 lock_verify_dump(void) 71 { 72 for (size_t i = 0; i < lock_nextres; i++) { 73 const nvme_ioctl_lock_t *lock = lock_results[i].loi_lock; 74 const char *targ = lock->nil_ent == NVME_LOCK_E_CTRL ? 75 "controller" : "namespace"; 76 const char *level = lock->nil_level == NVME_LOCK_L_READ ? 77 "read" : "write"; 78 (void) printf("\t[%zu] = { %s, %s }\n", i, targ, level); 79 } 80 } 81 82 /* 83 * Verify that a given number of writers in the test are all found ahead of any 84 * readers found in the test. 85 */ 86 static bool 87 lock_verify_write_before_read(const lock_order_test_t *test, uint32_t nthr) 88 { 89 bool pass = true; 90 size_t nwrite = 0; 91 size_t nread = 0; 92 93 for (size_t i = 0; i < MAX_LOCKS; i++) { 94 if (test->lot_locks[i] == NULL) 95 break; 96 if (test->lot_locks[i]->nil_level == NVME_LOCK_L_READ) { 97 nread++; 98 } else { 99 nwrite++; 100 } 101 } 102 VERIFY3U(nwrite + nread, ==, nthr); 103 104 mutex_enter(&lock_mutex); 105 for (size_t i = 0; i < nthr; i++) { 106 nvme_lock_level_t exp_level; 107 const char *str; 108 const lock_order_info_t *res = &lock_results[i]; 109 110 if (nwrite > 0) { 111 exp_level = NVME_LOCK_L_WRITE; 112 str = "WRITE"; 113 nwrite--; 114 } else { 115 exp_level = NVME_LOCK_L_READ; 116 str = "READ"; 117 nread--; 118 } 119 120 if (exp_level != res->loi_lock->nil_level) { 121 pass = false; 122 warnx("TEST FAILED: %s: lock %zu (tid %u, ent %u, " 123 "level %u) was the wrong level, expected level %u " 124 "(%s)", test->lot_desc, i, res->loi_thread, 125 res->loi_lock->nil_ent, res->loi_lock->nil_level, 126 exp_level, str); 127 } 128 } 129 VERIFY3U(nwrite, ==, 0); 130 VERIFY3U(nread, ==, 0); 131 132 if (!pass) { 133 lock_verify_dump(); 134 } 135 mutex_exit(&lock_mutex); 136 137 return (pass); 138 } 139 140 /* 141 * This verifies that all controller level locks should come in the order before 142 * the namespace locks. Note, this also calls the write before read checks and 143 * therefore assumes that we have an ordering that supports that. 144 */ 145 static bool 146 lock_verify_ctrl_before_ns(const lock_order_test_t *test, uint32_t nthr) 147 { 148 bool pass = true; 149 size_t nctrl = 0; 150 size_t nns = 0; 151 152 for (size_t i = 0; i < MAX_LOCKS; i++) { 153 if (test->lot_locks[i] == NULL) 154 break; 155 if (test->lot_locks[i]->nil_ent == NVME_LOCK_E_CTRL) { 156 nctrl++; 157 } else { 158 nns++; 159 } 160 } 161 VERIFY3U(nctrl + nns, ==, nthr); 162 163 mutex_enter(&lock_mutex); 164 for (size_t i = 0; i < nthr; i++) { 165 nvme_lock_ent_t exp_ent; 166 const char *str; 167 const lock_order_info_t *res = &lock_results[i]; 168 169 if (nctrl > 0) { 170 exp_ent = NVME_LOCK_E_CTRL; 171 str = "ctrl"; 172 nctrl--; 173 } else { 174 exp_ent = NVME_LOCK_E_NS; 175 str = "ns"; 176 nns--; 177 } 178 179 if (exp_ent != res->loi_lock->nil_ent) { 180 pass = false; 181 warnx("TEST FAILED: %s: lock %zu (tid %u, ent %u, " 182 "level %u) was the wrong entity, expected type %u " 183 "(%s)", test->lot_desc, i, res->loi_thread, 184 res->loi_lock->nil_ent, res->loi_lock->nil_level, 185 exp_ent, str); 186 } 187 } 188 189 VERIFY3U(nctrl, ==, 0); 190 VERIFY3U(nns, ==, 0); 191 192 if (!pass) { 193 lock_verify_dump(); 194 } 195 mutex_exit(&lock_mutex); 196 197 return (pass); 198 } 199 200 static bool 201 lock_verif_ent_level(const lock_order_test_t *test, uint32_t nthr) 202 { 203 bool pass = true; 204 205 if (!lock_verify_ctrl_before_ns(test, nthr)) 206 pass = false; 207 if (!lock_verify_write_before_read(test, nthr)) 208 pass = false; 209 return (pass); 210 } 211 212 /* 213 * The descriptions below are fashioned with the starting lock followed by what 214 * order we're testing. 215 */ 216 static const lock_order_test_t lock_order_tests[] = { { 217 .lot_desc = "ns(rd): pending ns writer doesn't allow more ns readers", 218 .lot_initlock = &nvme_test_ns_rdlock, 219 .lot_locks = { &nvme_test_ns_wrlock, &nvme_test_ns_rdlock }, 220 .lot_verif = lock_verify_write_before_read, 221 }, { 222 .lot_desc = "ns(wr): pending ns writer beats waiting ns reader", 223 .lot_initlock = &nvme_test_ns_wrlock, 224 .lot_locks = { &nvme_test_ns_rdlock, &nvme_test_ns_wrlock }, 225 .lot_verif = lock_verify_write_before_read, 226 }, { 227 .lot_desc = "ns(rd): all pend ns writers beat prior pend readers", 228 .lot_initlock = &nvme_test_ns_rdlock, 229 .lot_locks = { &nvme_test_ns_wrlock, &nvme_test_ns_rdlock, 230 &nvme_test_ns_rdlock, &nvme_test_ns_wrlock, &nvme_test_ns_rdlock, 231 &nvme_test_ns_wrlock }, 232 .lot_verif = lock_verify_write_before_read, 233 }, { 234 .lot_desc = "ns(rd): pending ctrl writer doesn't allow more ns readers", 235 .lot_initlock = &nvme_test_ns_rdlock, 236 .lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ns_rdlock, 237 &nvme_test_ns_rdlock }, 238 .lot_verif = lock_verify_write_before_read, 239 }, { 240 .lot_desc = "ns(wr): pending ctrl writer beats prior pend ns readers", 241 .lot_initlock = &nvme_test_ns_wrlock, 242 .lot_locks = { &nvme_test_ns_rdlock, &nvme_test_ns_rdlock, 243 &nvme_test_ctrl_wrlock, &nvme_test_ns_rdlock }, 244 .lot_verif = lock_verify_write_before_read, 245 }, { 246 .lot_desc = "ns(rd): pending ctrl writer doesn't allow ctrl readers", 247 .lot_initlock = &nvme_test_ns_rdlock, 248 .lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ctrl_rdlock, 249 &nvme_test_ctrl_rdlock }, 250 .lot_verif = lock_verify_write_before_read, 251 }, { 252 .lot_desc = "ns(rd): pending ctrl writer beats pending ns writer " 253 "and readers", 254 .lot_initlock = &nvme_test_ns_rdlock, 255 .lot_locks = { &nvme_test_ns_wrlock, &nvme_test_ns_rdlock, 256 &nvme_test_ctrl_wrlock, &nvme_test_ctrl_rdlock }, 257 .lot_verif = lock_verify_ctrl_before_ns, 258 }, { 259 .lot_desc = "ctrl(rd): pending ctrl writer blocks ns read", 260 .lot_initlock = &nvme_test_ctrl_rdlock, 261 .lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ns_rdlock, 262 &nvme_test_ns_rdlock }, 263 .lot_verif = lock_verif_ent_level, 264 }, { 265 .lot_desc = "ctrl(rd): pending ctrl writer blocks ns writer", 266 .lot_initlock = &nvme_test_ctrl_rdlock, 267 .lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ns_wrlock }, 268 .lot_verif = lock_verif_ent_level, 269 }, { 270 .lot_desc = "ctrl(rd): pending ctrl writer blocks ctrl reader", 271 .lot_initlock = &nvme_test_ctrl_rdlock, 272 .lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ctrl_rdlock }, 273 .lot_verif = lock_verify_write_before_read, 274 }, { 275 .lot_desc = "ctrl(wr): ctrl writer beats all pending readers", 276 .lot_initlock = &nvme_test_ctrl_wrlock, 277 .lot_locks = { &nvme_test_ctrl_rdlock, &nvme_test_ctrl_rdlock, 278 &nvme_test_ns_rdlock, &nvme_test_ns_rdlock, 279 &nvme_test_ctrl_wrlock }, 280 .lot_verif = lock_verify_write_before_read, 281 }, { 282 .lot_desc = "ctrl(wr): ns writer beats all pending ns readers", 283 .lot_initlock = &nvme_test_ctrl_wrlock, 284 .lot_locks = { &nvme_test_ns_rdlock, &nvme_test_ns_rdlock, 285 &nvme_test_ns_wrlock, &nvme_test_ns_rdlock, &nvme_test_ns_wrlock }, 286 .lot_verif = lock_verify_write_before_read, 287 } }; 288 289 static void * 290 lock_thread(void *arg) 291 { 292 const nvme_ioctl_lock_t *tmpl = arg; 293 nvme_ioctl_lock_t lock = *tmpl; 294 int ctrlfd = nvme_ioctl_test_get_fd(0); 295 const char *targ = tmpl->nil_ent == NVME_LOCK_E_CTRL ? 296 "controller" : "namespace"; 297 const char *level = tmpl->nil_level == NVME_LOCK_L_READ ? 298 "read" : "write"; 299 300 lock.nil_flags &= ~NVME_LOCK_F_DONT_BLOCK; 301 nvme_ioctl_test_lock(ctrlfd, &lock); 302 303 mutex_enter(&lock_mutex); 304 if (!lock_valid) { 305 errx(EXIT_FAILURE, "TEST FAILED: thread 0x%x managed to return " 306 "with held %s %s lock before main thread unlocked: test " 307 "cannot continue", thr_self(), targ, level); 308 } 309 VERIFY3U(lock_nextres, <, MAX_LOCKS); 310 lock_results[lock_nextres].loi_thread = thr_self(); 311 lock_results[lock_nextres].loi_lock = tmpl; 312 lock_nextres++; 313 mutex_exit(&lock_mutex); 314 315 VERIFY0(close(ctrlfd)); 316 317 thr_exit(NULL); 318 } 319 320 static bool 321 lock_order_test(const lock_order_test_t *test) 322 { 323 int ctrlfd; 324 uint32_t nthr = 0; 325 thread_t thrids[MAX_LOCKS]; 326 327 /* 328 * Ensure we have whatever lock we intend to create ahead of doing 329 * anything else. 330 */ 331 ctrlfd = nvme_ioctl_test_get_fd(0); 332 nvme_ioctl_test_lock(ctrlfd, test->lot_initlock); 333 334 mutex_enter(&lock_mutex); 335 (void) memset(&lock_results, 0, sizeof (lock_results)); 336 lock_nextres = 0; 337 lock_valid = false; 338 mutex_exit(&lock_mutex); 339 340 for (uint32_t i = 0; i < MAX_LOCKS; i++, nthr++) { 341 int err; 342 343 if (test->lot_locks[i] == NULL) 344 break; 345 346 err = thr_create(NULL, 0, lock_thread, 347 (void *)test->lot_locks[i], 0, &thrids[i]); 348 if (err != 0) { 349 errc(EXIT_FAILURE, err, "TEST FAILED: %s: cannot " 350 "continue because we failed to create thread %u", 351 test->lot_desc, i); 352 } 353 354 while (!nvme_ioctl_test_thr_blocked(thrids[i])) { 355 struct timespec sleep; 356 357 sleep.tv_sec = 0; 358 sleep.tv_nsec = MSEC2NSEC(10); 359 (void) nanosleep(&sleep, NULL); 360 } 361 } 362 363 /* 364 * Now that all threads have been launched, close our fd to allow them 365 * to run loose and wait for them. Indicate to them that now it is okay 366 * to get the lock. 367 */ 368 mutex_enter(&lock_mutex); 369 lock_valid = true; 370 mutex_exit(&lock_mutex); 371 VERIFY0(close(ctrlfd)); 372 for (uint32_t i = 0; i < nthr; i++) { 373 int err = thr_join(thrids[i], NULL, NULL); 374 if (err != 0) { 375 errc(EXIT_FAILURE, err, "TEST FAILED: %s: cannot " 376 "continue because we failed to join thread %u", 377 test->lot_desc, i); 378 } 379 } 380 mutex_enter(&lock_mutex); 381 VERIFY3U(lock_nextres, ==, nthr); 382 mutex_exit(&lock_mutex); 383 384 if (test->lot_verif(test, nthr)) { 385 (void) printf("TEST PASSED: %s\n", test->lot_desc); 386 return (true); 387 } 388 389 return (false); 390 } 391 392 int 393 main(void) 394 { 395 int ret = EXIT_SUCCESS; 396 397 VERIFY0(mutex_init(&lock_mutex, USYNC_THREAD | LOCK_ERRORCHECK, NULL)); 398 399 for (size_t i = 0; i < ARRAY_SIZE(lock_order_tests); i++) { 400 if (!lock_order_test(&lock_order_tests[i])) { 401 ret = EXIT_FAILURE; 402 } 403 } 404 405 VERIFY0(mutex_destroy(&lock_mutex)); 406 return (ret); 407 } 408