1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2024 Oxide Computer Company
14 */
15
16 /*
17 * The purpose of this test is to ensure that we honor several aspects of our
18 * lock ordering. In particular we want to validate the following our starvation
19 * properties, that is that blocking writers should take priority ahead of
20 * blocking readers and that the controller lock takes priority over various
21 * namespace locks. While we test all kinds of locks here, we only use the
22 * controller fd here to simplify the test design.
23 *
24 * To do this, we utilize our blocking locks. In particular, we take a first
25 * lock and then spin up threads that should all block on that. To deal with the
26 * inherit race of knowing when a thread is blocked or not, we utilize libproc
27 * and wait until the thread has the PR_ASLEEP flag set and that it's in an
28 * ioctl system call. This ensures that the folks that are present are added in
29 * the appropriate order.
30 */
31
32 #include <err.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <stdbool.h>
36 #include <sys/sysmacros.h>
37 #include <sys/debug.h>
38 #include <thread.h>
39 #include <synch.h>
40 #include <strings.h>
41
42 #include "nvme_ioctl_util.h"
43
44 /*
45 * Maximum number of threads that we'll spin up for locks.
46 */
47 #define MAX_LOCKS 10
48
49 typedef struct {
50 thread_t loi_thread;
51 const nvme_ioctl_lock_t *loi_lock;
52 } lock_order_info_t;
53
54 static mutex_t lock_mutex;
55 static lock_order_info_t lock_results[MAX_LOCKS];
56 static uint32_t lock_nextres;
57 static bool lock_valid;
58
59 typedef struct lock_order_test lock_order_test_t;
60 typedef bool (*lock_order_valif_f)(const lock_order_test_t *, uint32_t);
61
62 struct lock_order_test {
63 const char *lot_desc;
64 const nvme_ioctl_lock_t *lot_initlock;
65 const nvme_ioctl_lock_t *lot_locks[MAX_LOCKS];
66 lock_order_valif_f lot_verif;
67 };
68
69 static void
lock_verify_dump(void)70 lock_verify_dump(void)
71 {
72 for (size_t i = 0; i < lock_nextres; i++) {
73 const nvme_ioctl_lock_t *lock = lock_results[i].loi_lock;
74 const char *targ = lock->nil_ent == NVME_LOCK_E_CTRL ?
75 "controller" : "namespace";
76 const char *level = lock->nil_level == NVME_LOCK_L_READ ?
77 "read" : "write";
78 (void) printf("\t[%zu] = { %s, %s }\n", i, targ, level);
79 }
80 }
81
82 /*
83 * Verify that a given number of writers in the test are all found ahead of any
84 * readers found in the test.
85 */
86 static bool
lock_verify_write_before_read(const lock_order_test_t * test,uint32_t nthr)87 lock_verify_write_before_read(const lock_order_test_t *test, uint32_t nthr)
88 {
89 bool pass = true;
90 size_t nwrite = 0;
91 size_t nread = 0;
92
93 for (size_t i = 0; i < MAX_LOCKS; i++) {
94 if (test->lot_locks[i] == NULL)
95 break;
96 if (test->lot_locks[i]->nil_level == NVME_LOCK_L_READ) {
97 nread++;
98 } else {
99 nwrite++;
100 }
101 }
102 VERIFY3U(nwrite + nread, ==, nthr);
103
104 mutex_enter(&lock_mutex);
105 for (size_t i = 0; i < nthr; i++) {
106 nvme_lock_level_t exp_level;
107 const char *str;
108 const lock_order_info_t *res = &lock_results[i];
109
110 if (nwrite > 0) {
111 exp_level = NVME_LOCK_L_WRITE;
112 str = "WRITE";
113 nwrite--;
114 } else {
115 exp_level = NVME_LOCK_L_READ;
116 str = "READ";
117 nread--;
118 }
119
120 if (exp_level != res->loi_lock->nil_level) {
121 pass = false;
122 warnx("TEST FAILED: %s: lock %zu (tid %u, ent %u, "
123 "level %u) was the wrong level, expected level %u "
124 "(%s)", test->lot_desc, i, res->loi_thread,
125 res->loi_lock->nil_ent, res->loi_lock->nil_level,
126 exp_level, str);
127 }
128 }
129 VERIFY3U(nwrite, ==, 0);
130 VERIFY3U(nread, ==, 0);
131
132 if (!pass) {
133 lock_verify_dump();
134 }
135 mutex_exit(&lock_mutex);
136
137 return (pass);
138 }
139
140 /*
141 * This verifies that all controller level locks should come in the order before
142 * the namespace locks. Note, this also calls the write before read checks and
143 * therefore assumes that we have an ordering that supports that.
144 */
145 static bool
lock_verify_ctrl_before_ns(const lock_order_test_t * test,uint32_t nthr)146 lock_verify_ctrl_before_ns(const lock_order_test_t *test, uint32_t nthr)
147 {
148 bool pass = true;
149 size_t nctrl = 0;
150 size_t nns = 0;
151
152 for (size_t i = 0; i < MAX_LOCKS; i++) {
153 if (test->lot_locks[i] == NULL)
154 break;
155 if (test->lot_locks[i]->nil_ent == NVME_LOCK_E_CTRL) {
156 nctrl++;
157 } else {
158 nns++;
159 }
160 }
161 VERIFY3U(nctrl + nns, ==, nthr);
162
163 mutex_enter(&lock_mutex);
164 for (size_t i = 0; i < nthr; i++) {
165 nvme_lock_ent_t exp_ent;
166 const char *str;
167 const lock_order_info_t *res = &lock_results[i];
168
169 if (nctrl > 0) {
170 exp_ent = NVME_LOCK_E_CTRL;
171 str = "ctrl";
172 nctrl--;
173 } else {
174 exp_ent = NVME_LOCK_E_NS;
175 str = "ns";
176 nns--;
177 }
178
179 if (exp_ent != res->loi_lock->nil_ent) {
180 pass = false;
181 warnx("TEST FAILED: %s: lock %zu (tid %u, ent %u, "
182 "level %u) was the wrong entity, expected type %u "
183 "(%s)", test->lot_desc, i, res->loi_thread,
184 res->loi_lock->nil_ent, res->loi_lock->nil_level,
185 exp_ent, str);
186 }
187 }
188
189 VERIFY3U(nctrl, ==, 0);
190 VERIFY3U(nns, ==, 0);
191
192 if (!pass) {
193 lock_verify_dump();
194 }
195 mutex_exit(&lock_mutex);
196
197 return (pass);
198 }
199
200 static bool
lock_verif_ent_level(const lock_order_test_t * test,uint32_t nthr)201 lock_verif_ent_level(const lock_order_test_t *test, uint32_t nthr)
202 {
203 bool pass = true;
204
205 if (!lock_verify_ctrl_before_ns(test, nthr))
206 pass = false;
207 if (!lock_verify_write_before_read(test, nthr))
208 pass = false;
209 return (pass);
210 }
211
212 /*
213 * The descriptions below are fashioned with the starting lock followed by what
214 * order we're testing.
215 */
216 static const lock_order_test_t lock_order_tests[] = { {
217 .lot_desc = "ns(rd): pending ns writer doesn't allow more ns readers",
218 .lot_initlock = &nvme_test_ns_rdlock,
219 .lot_locks = { &nvme_test_ns_wrlock, &nvme_test_ns_rdlock },
220 .lot_verif = lock_verify_write_before_read,
221 }, {
222 .lot_desc = "ns(wr): pending ns writer beats waiting ns reader",
223 .lot_initlock = &nvme_test_ns_wrlock,
224 .lot_locks = { &nvme_test_ns_rdlock, &nvme_test_ns_wrlock },
225 .lot_verif = lock_verify_write_before_read,
226 }, {
227 .lot_desc = "ns(rd): all pend ns writers beat prior pend readers",
228 .lot_initlock = &nvme_test_ns_rdlock,
229 .lot_locks = { &nvme_test_ns_wrlock, &nvme_test_ns_rdlock,
230 &nvme_test_ns_rdlock, &nvme_test_ns_wrlock, &nvme_test_ns_rdlock,
231 &nvme_test_ns_wrlock },
232 .lot_verif = lock_verify_write_before_read,
233 }, {
234 .lot_desc = "ns(rd): pending ctrl writer doesn't allow more ns readers",
235 .lot_initlock = &nvme_test_ns_rdlock,
236 .lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ns_rdlock,
237 &nvme_test_ns_rdlock },
238 .lot_verif = lock_verify_write_before_read,
239 }, {
240 .lot_desc = "ns(wr): pending ctrl writer beats prior pend ns readers",
241 .lot_initlock = &nvme_test_ns_wrlock,
242 .lot_locks = { &nvme_test_ns_rdlock, &nvme_test_ns_rdlock,
243 &nvme_test_ctrl_wrlock, &nvme_test_ns_rdlock },
244 .lot_verif = lock_verify_write_before_read,
245 }, {
246 .lot_desc = "ns(rd): pending ctrl writer doesn't allow ctrl readers",
247 .lot_initlock = &nvme_test_ns_rdlock,
248 .lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ctrl_rdlock,
249 &nvme_test_ctrl_rdlock },
250 .lot_verif = lock_verify_write_before_read,
251 }, {
252 .lot_desc = "ns(rd): pending ctrl writer beats pending ns writer "
253 "and readers",
254 .lot_initlock = &nvme_test_ns_rdlock,
255 .lot_locks = { &nvme_test_ns_wrlock, &nvme_test_ns_rdlock,
256 &nvme_test_ctrl_wrlock, &nvme_test_ctrl_rdlock },
257 .lot_verif = lock_verify_ctrl_before_ns,
258 }, {
259 .lot_desc = "ctrl(rd): pending ctrl writer blocks ns read",
260 .lot_initlock = &nvme_test_ctrl_rdlock,
261 .lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ns_rdlock,
262 &nvme_test_ns_rdlock },
263 .lot_verif = lock_verif_ent_level,
264 }, {
265 .lot_desc = "ctrl(rd): pending ctrl writer blocks ns writer",
266 .lot_initlock = &nvme_test_ctrl_rdlock,
267 .lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ns_wrlock },
268 .lot_verif = lock_verif_ent_level,
269 }, {
270 .lot_desc = "ctrl(rd): pending ctrl writer blocks ctrl reader",
271 .lot_initlock = &nvme_test_ctrl_rdlock,
272 .lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ctrl_rdlock },
273 .lot_verif = lock_verify_write_before_read,
274 }, {
275 .lot_desc = "ctrl(wr): ctrl writer beats all pending readers",
276 .lot_initlock = &nvme_test_ctrl_wrlock,
277 .lot_locks = { &nvme_test_ctrl_rdlock, &nvme_test_ctrl_rdlock,
278 &nvme_test_ns_rdlock, &nvme_test_ns_rdlock,
279 &nvme_test_ctrl_wrlock },
280 .lot_verif = lock_verify_write_before_read,
281 }, {
282 .lot_desc = "ctrl(wr): ns writer beats all pending ns readers",
283 .lot_initlock = &nvme_test_ctrl_wrlock,
284 .lot_locks = { &nvme_test_ns_rdlock, &nvme_test_ns_rdlock,
285 &nvme_test_ns_wrlock, &nvme_test_ns_rdlock, &nvme_test_ns_wrlock },
286 .lot_verif = lock_verify_write_before_read,
287 } };
288
289 static void *
lock_thread(void * arg)290 lock_thread(void *arg)
291 {
292 const nvme_ioctl_lock_t *tmpl = arg;
293 nvme_ioctl_lock_t lock = *tmpl;
294 int ctrlfd = nvme_ioctl_test_get_fd(0);
295 const char *targ = tmpl->nil_ent == NVME_LOCK_E_CTRL ?
296 "controller" : "namespace";
297 const char *level = tmpl->nil_level == NVME_LOCK_L_READ ?
298 "read" : "write";
299
300 lock.nil_flags &= ~NVME_LOCK_F_DONT_BLOCK;
301 nvme_ioctl_test_lock(ctrlfd, &lock);
302
303 mutex_enter(&lock_mutex);
304 if (!lock_valid) {
305 errx(EXIT_FAILURE, "TEST FAILED: thread 0x%x managed to return "
306 "with held %s %s lock before main thread unlocked: test "
307 "cannot continue", thr_self(), targ, level);
308 }
309 VERIFY3U(lock_nextres, <, MAX_LOCKS);
310 lock_results[lock_nextres].loi_thread = thr_self();
311 lock_results[lock_nextres].loi_lock = tmpl;
312 lock_nextres++;
313 mutex_exit(&lock_mutex);
314
315 VERIFY0(close(ctrlfd));
316
317 thr_exit(NULL);
318 }
319
320 static bool
lock_order_test(const lock_order_test_t * test)321 lock_order_test(const lock_order_test_t *test)
322 {
323 int ctrlfd;
324 uint32_t nthr = 0;
325 thread_t thrids[MAX_LOCKS];
326
327 /*
328 * Ensure we have whatever lock we intend to create ahead of doing
329 * anything else.
330 */
331 ctrlfd = nvme_ioctl_test_get_fd(0);
332 nvme_ioctl_test_lock(ctrlfd, test->lot_initlock);
333
334 mutex_enter(&lock_mutex);
335 (void) memset(&lock_results, 0, sizeof (lock_results));
336 lock_nextres = 0;
337 lock_valid = false;
338 mutex_exit(&lock_mutex);
339
340 for (uint32_t i = 0; i < MAX_LOCKS; i++, nthr++) {
341 int err;
342
343 if (test->lot_locks[i] == NULL)
344 break;
345
346 err = thr_create(NULL, 0, lock_thread,
347 (void *)test->lot_locks[i], 0, &thrids[i]);
348 if (err != 0) {
349 errc(EXIT_FAILURE, err, "TEST FAILED: %s: cannot "
350 "continue because we failed to create thread %u",
351 test->lot_desc, i);
352 }
353
354 while (!nvme_ioctl_test_thr_blocked(thrids[i])) {
355 struct timespec sleep;
356
357 sleep.tv_sec = 0;
358 sleep.tv_nsec = MSEC2NSEC(10);
359 (void) nanosleep(&sleep, NULL);
360 }
361 }
362
363 /*
364 * Now that all threads have been launched, close our fd to allow them
365 * to run loose and wait for them. Indicate to them that now it is okay
366 * to get the lock.
367 */
368 mutex_enter(&lock_mutex);
369 lock_valid = true;
370 mutex_exit(&lock_mutex);
371 VERIFY0(close(ctrlfd));
372 for (uint32_t i = 0; i < nthr; i++) {
373 int err = thr_join(thrids[i], NULL, NULL);
374 if (err != 0) {
375 errc(EXIT_FAILURE, err, "TEST FAILED: %s: cannot "
376 "continue because we failed to join thread %u",
377 test->lot_desc, i);
378 }
379 }
380 mutex_enter(&lock_mutex);
381 VERIFY3U(lock_nextres, ==, nthr);
382 mutex_exit(&lock_mutex);
383
384 if (test->lot_verif(test, nthr)) {
385 (void) printf("TEST PASSED: %s\n", test->lot_desc);
386 return (true);
387 }
388
389 return (false);
390 }
391
392 int
main(void)393 main(void)
394 {
395 int ret = EXIT_SUCCESS;
396
397 VERIFY0(mutex_init(&lock_mutex, USYNC_THREAD | LOCK_ERRORCHECK, NULL));
398
399 for (size_t i = 0; i < ARRAY_SIZE(lock_order_tests); i++) {
400 if (!lock_order_test(&lock_order_tests[i])) {
401 ret = EXIT_FAILURE;
402 }
403 }
404
405 VERIFY0(mutex_destroy(&lock_mutex));
406 return (ret);
407 }
408