1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2023, 2024, Klara Inc.
23 */
24
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <stdbool.h>
28 #include <sys/param.h>
29 #include <stdlib.h>
30
31 /*
32 * This tests the vdev_disk page alignment check callback
33 * vdev_disk_check_alignment_cb(). For now, this test includes a copy of that
34 * function from module/os/linux/zfs/vdev_disk.c. If you change it here,
35 * remember to change it there too, and add tests data here to validate the
36 * change you're making.
37 */
38
39 struct page;
40
41 /*
42 * This is spl_pagesize() in userspace, which requires linking libspl, but
43 * would also then use the platform page size, which isn't what we want for
44 * a test. To keep the check callback the same as the real one, we just
45 * redefine it.
46 */
47 #undef PAGESIZE
48 #define PAGESIZE (4096)
49
50 typedef struct {
51 size_t blocksize;
52 int seen_first;
53 int seen_last;
54 } vdev_disk_check_alignment_t;
55
56 static int
vdev_disk_check_alignment_cb(struct page * page,size_t off,size_t len,void * priv)57 vdev_disk_check_alignment_cb(struct page *page, size_t off, size_t len,
58 void *priv)
59 {
60 (void) page;
61 vdev_disk_check_alignment_t *s = priv;
62
63 /*
64 * The cardinal rule: a single on-disk block must never cross an
65 * physical (order-0) page boundary, as the kernel expects to be able
66 * to split at both LBS and page boundaries.
67 *
68 * This implies various alignment rules for the blocks in this
69 * (possibly compound) page, which we can check for.
70 */
71
72 /*
73 * If the previous page did not end on a page boundary, then we
74 * can't proceed without creating a hole.
75 */
76 if (s->seen_last)
77 return (1);
78
79 /* This page must contain only whole LBS-sized blocks. */
80 if (!IS_P2ALIGNED(len, s->blocksize))
81 return (1);
82
83 /*
84 * If this is not the first page in the ABD, then the data must start
85 * on a page-aligned boundary (so the kernel can split on page
86 * boundaries without having to deal with a hole). If it is, then
87 * it can start on LBS-alignment.
88 */
89 if (s->seen_first) {
90 if (!IS_P2ALIGNED(off, PAGESIZE))
91 return (1);
92 } else {
93 if (!IS_P2ALIGNED(off, s->blocksize))
94 return (1);
95 s->seen_first = 1;
96 }
97
98 /*
99 * If this data does not end on a page-aligned boundary, then this
100 * must be the last page in the ABD, for the same reason.
101 */
102 s->seen_last = !IS_P2ALIGNED(off+len, PAGESIZE);
103
104 return (0);
105 }
106
107 typedef struct {
108 /* test name */
109 const char *name;
110
111 /* stored block size */
112 uint32_t blocksize;
113
114 /* amount of data to take */
115 size_t size;
116
117 /* [start offset in page, len to end of page or size] */
118 size_t pages[16][2];
119 } page_test_t;
120
121 static const page_test_t valid_tests[] = {
122 /* 512B block tests */
123 {
124 "512B blocks, 4K single page",
125 512, 0x1000, {
126 { 0x0, 0x1000 },
127 },
128 }, {
129 "512B blocks, 1K at start of page",
130 512, 0x400, {
131 { 0x0, 0x1000 },
132 },
133 }, {
134 "512B blocks, 1K at end of page",
135 512, 0x400, {
136 { 0x0c00, 0x0400 },
137 },
138 }, {
139 "512B blocks, 1K within page, 512B start offset",
140 512, 0x400, {
141 { 0x0200, 0x0e00 },
142 },
143 }, {
144 "512B blocks, 8K across 2x4K pages",
145 512, 0x2000, {
146 { 0x0, 0x1000 },
147 { 0x0, 0x1000 },
148 },
149 }, {
150 "512B blocks, 4K across two pages, 2K start offset",
151 512, 0x1000, {
152 { 0x0800, 0x0800 },
153 { 0x0, 0x0800 },
154 },
155 }, {
156 "512B blocks, 16K across 5x4K pages, 512B start offset",
157 512, 0x4000, {
158 { 0x0200, 0x0e00 },
159 { 0x0, 0x1000 },
160 { 0x0, 0x1000 },
161 { 0x0, 0x1000 },
162 { 0x0, 0x0200 },
163 },
164 }, {
165 "512B blocks, 64K data, 8x8K compound pages",
166 512, 0x10000, {
167 { 0x0, 0x2000 },
168 { 0x0, 0x2000 },
169 { 0x0, 0x2000 },
170 { 0x0, 0x2000 },
171 { 0x0, 0x2000 },
172 { 0x0, 0x2000 },
173 { 0x0, 0x2000 },
174 { 0x0, 0x2000 },
175 },
176 }, {
177 "512B blocks, 64K data, 9x8K compound pages, 512B start offset",
178 512, 0x10000, {
179 { 0x0200, 0x1e00 },
180 { 0x0, 0x2000 },
181 { 0x0, 0x2000 },
182 { 0x0, 0x2000 },
183 { 0x0, 0x2000 },
184 { 0x0, 0x2000 },
185 { 0x0, 0x2000 },
186 { 0x0, 0x2000 },
187 { 0x0, 0x0200 },
188 },
189 }, {
190 "512B blocks, 64K data, 2x16K compound pages, 8x4K pages",
191 512, 0x10000, {
192 { 0x0, 0x8000 },
193 { 0x0, 0x8000 },
194 { 0x0, 0x1000 },
195 { 0x0, 0x1000 },
196 { 0x0, 0x1000 },
197 { 0x0, 0x1000 },
198 { 0x0, 0x1000 },
199 { 0x0, 0x1000 },
200 { 0x0, 0x1000 },
201 { 0x0, 0x1000 },
202 },
203 }, {
204 "512B blocks, 64K data, mixed 4K/8K/16K pages",
205 512, 0x10000, {
206 { 0x0, 0x1000 },
207 { 0x0, 0x2000 },
208 { 0x0, 0x1000 },
209 { 0x0, 0x8000 },
210 { 0x0, 0x1000 },
211 { 0x0, 0x1000 },
212 { 0x0, 0x2000 },
213 { 0x0, 0x1000 },
214 { 0x0, 0x1000 },
215 { 0x0, 0x2000 },
216 },
217 }, {
218 "512B blocks, 64K data, mixed 4K/8K/16K pages, 1K start offset",
219 512, 0x10000, {
220 { 0x0400, 0x0c00 },
221 { 0x0, 0x1000 },
222 { 0x0, 0x1000 },
223 { 0x0, 0x1000 },
224 { 0x0, 0x2000 },
225 { 0x0, 0x2000 },
226 { 0x0, 0x1000 },
227 { 0x0, 0x8000 },
228 { 0x0, 0x1000 },
229 { 0x0, 0x0400 },
230 },
231 },
232
233 /* 4K block tests */
234 {
235 "4K blocks, 4K single page",
236 4096, 0x1000, {
237 { 0x0, 0x1000 },
238 },
239 }, {
240 "4K blocks, 8K across 2x4K pages",
241 4096, 0x2000, {
242 { 0x0, 0x1000 },
243 { 0x0, 0x1000 },
244 },
245 }, {
246 "4K blocks, 64K data, 8x8K compound pages",
247 4096, 0x10000, {
248 { 0x0, 0x2000 },
249 { 0x0, 0x2000 },
250 { 0x0, 0x2000 },
251 { 0x0, 0x2000 },
252 { 0x0, 0x2000 },
253 { 0x0, 0x2000 },
254 { 0x0, 0x2000 },
255 { 0x0, 0x2000 },
256 },
257 }, {
258 "4K blocks, 64K data, 2x16K compound pages, 8x4K pages",
259 4096, 0x10000, {
260 { 0x0, 0x8000 },
261 { 0x0, 0x8000 },
262 { 0x0, 0x1000 },
263 { 0x0, 0x1000 },
264 { 0x0, 0x1000 },
265 { 0x0, 0x1000 },
266 { 0x0, 0x1000 },
267 { 0x0, 0x1000 },
268 { 0x0, 0x1000 },
269 { 0x0, 0x1000 },
270 },
271 }, {
272 "4K blocks, 64K data, mixed 4K/8K/16K pages",
273 4096, 0x10000, {
274 { 0x0, 0x1000 },
275 { 0x0, 0x2000 },
276 { 0x0, 0x1000 },
277 { 0x0, 0x8000 },
278 { 0x0, 0x1000 },
279 { 0x0, 0x1000 },
280 { 0x0, 0x2000 },
281 { 0x0, 0x1000 },
282 { 0x0, 0x1000 },
283 { 0x0, 0x2000 },
284 },
285 },
286
287 { 0 },
288 };
289
290 static const page_test_t invalid_tests[] = {
291 /*
292 * Gang tests. Composed of lots of smaller allocations, rarely properly
293 * aligned.
294 */
295 {
296 "512B blocks, 16K data, 512 leader (gang block simulation)",
297 512, 0x8000, {
298 { 0x0, 0x0200 },
299 { 0x0, 0x1000 },
300 { 0x0, 0x1000 },
301 { 0x0, 0x1000 },
302 { 0x0, 0x0c00 },
303 },
304 }, {
305 "4K blocks, 32K data, 2 incompatible spans "
306 "(gang abd simulation)",
307 4096, 0x8000, {
308 { 0x0800, 0x0800 },
309 { 0x0, 0x1000 },
310 { 0x0, 0x1000 },
311 { 0x0, 0x1000 },
312 { 0x0, 0x0800 },
313 { 0x0800, 0x0800 },
314 { 0x0, 0x1000 },
315 { 0x0, 0x1000 },
316 { 0x0, 0x1000 },
317 { 0x0, 0x0800 },
318 },
319 },
320
321 /*
322 * Blocks must not span multiple physical pages. These tests used to
323 * be considered valid, but were since found to be invalid and were
324 * moved here.
325 */
326 {
327 "4K blocks, 4K across two pages, 2K start offset",
328 4096, 0x1000, {
329 { 0x0800, 0x0800 },
330 { 0x0, 0x0800 },
331 },
332 }, {
333 "4K blocks, 16K across 5x4K pages, 512B start offset",
334 4096, 0x4000, {
335 { 0x0200, 0x0e00 },
336 { 0x0, 0x1000 },
337 { 0x0, 0x1000 },
338 { 0x0, 0x1000 },
339 { 0x0, 0x0200 },
340 },
341 }, {
342 "4K blocks, 64K data, 9x8K compound pages, 512B start offset",
343 4096, 0x10000, {
344 { 0x0200, 0x1e00 },
345 { 0x0, 0x2000 },
346 { 0x0, 0x2000 },
347 { 0x0, 0x2000 },
348 { 0x0, 0x2000 },
349 { 0x0, 0x2000 },
350 { 0x0, 0x2000 },
351 { 0x0, 0x2000 },
352 { 0x0, 0x0200 },
353 },
354 }, {
355 "4K blocks, 64K data, mixed 4K/8K/16K pages, 1K start offset",
356 4096, 0x10000, {
357 { 0x0400, 0x0c00 },
358 { 0x0, 0x1000 },
359 { 0x0, 0x1000 },
360 { 0x0, 0x1000 },
361 { 0x0, 0x2000 },
362 { 0x0, 0x2000 },
363 { 0x0, 0x1000 },
364 { 0x0, 0x8000 },
365 { 0x0, 0x1000 },
366 { 0x0, 0x0400 },
367 },
368 },
369
370 /*
371 * This is the very typical case of a 4K block being allocated from
372 * the middle of a mixed-used slab backed by a higher-order compound
373 * page.
374 */
375 {
376 "4K blocks, 4K data from compound slab, 2K-align offset",
377 4096, 0x1000, {
378 { 0x1800, 0x6800 }
379 }
380 },
381
382 /*
383 * Blocks smaller than LBS should never be possible, but used to be by
384 * accident (see GH#16990). We test for and reject them just to be
385 * sure.
386 */
387 {
388 "4K blocks, 1K at end of page",
389 4096, 0x400, {
390 { 0x0c00, 0x0400 },
391 },
392 }, {
393 "4K blocks, 1K at start of page",
394 4096, 0x400, {
395 { 0x0, 0x1000 },
396 },
397 }, {
398 "4K blocks, 1K within page, 512B start offset",
399 4096, 0x400, {
400 { 0x0200, 0x0e00 },
401 },
402 },
403
404 { 0 },
405 };
406
407 static bool
run_test(const page_test_t * test,bool verbose)408 run_test(const page_test_t *test, bool verbose)
409 {
410 size_t rem = test->size;
411
412 vdev_disk_check_alignment_t s = {
413 .blocksize = test->blocksize,
414 };
415
416 for (int i = 0; test->pages[i][1] > 0; i++) {
417 size_t off = test->pages[i][0];
418 size_t len = test->pages[i][1];
419
420 size_t take = MIN(rem, len);
421
422 if (verbose)
423 printf(" page %d [off %lx len %lx], "
424 "rem %lx, take %lx\n",
425 i, off, len, rem, take);
426
427 if (vdev_disk_check_alignment_cb(NULL, off, take, &s)) {
428 if (verbose)
429 printf(" ABORT: misalignment detected, "
430 "rem %lx\n", rem);
431 return (false);
432 }
433
434 rem -= take;
435 if (rem == 0)
436 break;
437 }
438
439 if (rem > 0) {
440 if (verbose)
441 printf(" ABORT: ran out of pages, rem %lx\n", rem);
442 return (false);
443 }
444
445 return (true);
446 }
447
448 static void
run_test_set(const page_test_t * tests,bool want,int * ntests,int * npassed)449 run_test_set(const page_test_t *tests, bool want, int *ntests, int *npassed)
450 {
451 for (const page_test_t *test = &tests[0]; test->name; test++) {
452 bool pass = (run_test(test, false) == want);
453 if (pass) {
454 printf("%c %s: PASS\n", want ? '+' : '-', test->name);
455 (*npassed)++;
456 } else {
457 printf("%s: FAIL [expected %s, got %s]\n", test->name,
458 want ? "VALID" : "INVALID",
459 want ? "INVALID" : "VALID");
460 run_test(test, true);
461 }
462 (*ntests)++;
463 }
464 }
465
main(void)466 int main(void) {
467 int ntests = 0, npassed = 0;
468
469 run_test_set(valid_tests, true, &ntests, &npassed);
470 run_test_set(invalid_tests, false, &ntests, &npassed);
471
472 printf("\n%d/%d tests passed\n", npassed, ntests);
473
474 return (ntests == npassed ? 0 : 1);
475 }
476