1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 2023, 2024, Klara Inc.
24 */
25
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdbool.h>
29 #include <sys/param.h>
30 #include <stdlib.h>
31
32 /*
33 * This tests the vdev_disk page alignment check callback
34 * vdev_disk_check_alignment_cb(). For now, this test includes a copy of that
35 * function from module/os/linux/zfs/vdev_disk.c. If you change it here,
36 * remember to change it there too, and add tests data here to validate the
37 * change you're making.
38 */
39
40 struct page;
41
42 /*
43 * This is spl_pagesize() in userspace, which requires linking libspl, but
44 * would also then use the platform page size, which isn't what we want for
45 * a test. To keep the check callback the same as the real one, we just
46 * redefine it.
47 */
48 #undef PAGESIZE
49 #define PAGESIZE (4096)
50
51 typedef struct {
52 size_t blocksize;
53 int seen_first;
54 int seen_last;
55 } vdev_disk_check_alignment_t;
56
57 static int
vdev_disk_check_alignment_cb(struct page * page,size_t off,size_t len,void * priv)58 vdev_disk_check_alignment_cb(struct page *page, size_t off, size_t len,
59 void *priv)
60 {
61 (void) page;
62 vdev_disk_check_alignment_t *s = priv;
63
64 /*
65 * The cardinal rule: a single on-disk block must never cross an
66 * physical (order-0) page boundary, as the kernel expects to be able
67 * to split at both LBS and page boundaries.
68 *
69 * This implies various alignment rules for the blocks in this
70 * (possibly compound) page, which we can check for.
71 */
72
73 /*
74 * If the previous page did not end on a page boundary, then we
75 * can't proceed without creating a hole.
76 */
77 if (s->seen_last)
78 return (1);
79
80 /* This page must contain only whole LBS-sized blocks. */
81 if (!IS_P2ALIGNED(len, s->blocksize))
82 return (1);
83
84 /*
85 * If this is not the first page in the ABD, then the data must start
86 * on a page-aligned boundary (so the kernel can split on page
87 * boundaries without having to deal with a hole). If it is, then
88 * it can start on LBS-alignment.
89 */
90 if (s->seen_first) {
91 if (!IS_P2ALIGNED(off, PAGESIZE))
92 return (1);
93 } else {
94 if (!IS_P2ALIGNED(off, s->blocksize))
95 return (1);
96 s->seen_first = 1;
97 }
98
99 /*
100 * If this data does not end on a page-aligned boundary, then this
101 * must be the last page in the ABD, for the same reason.
102 */
103 s->seen_last = !IS_P2ALIGNED(off+len, PAGESIZE);
104
105 return (0);
106 }
107
108 typedef struct {
109 /* test name */
110 const char *name;
111
112 /* stored block size */
113 uint32_t blocksize;
114
115 /* amount of data to take */
116 size_t size;
117
118 /* [start offset in page, len to end of page or size] */
119 size_t pages[16][2];
120 } page_test_t;
121
122 static const page_test_t valid_tests[] = {
123 /* 512B block tests */
124 {
125 "512B blocks, 4K single page",
126 512, 0x1000, {
127 { 0x0, 0x1000 },
128 },
129 }, {
130 "512B blocks, 1K at start of page",
131 512, 0x400, {
132 { 0x0, 0x1000 },
133 },
134 }, {
135 "512B blocks, 1K at end of page",
136 512, 0x400, {
137 { 0x0c00, 0x0400 },
138 },
139 }, {
140 "512B blocks, 1K within page, 512B start offset",
141 512, 0x400, {
142 { 0x0200, 0x0e00 },
143 },
144 }, {
145 "512B blocks, 8K across 2x4K pages",
146 512, 0x2000, {
147 { 0x0, 0x1000 },
148 { 0x0, 0x1000 },
149 },
150 }, {
151 "512B blocks, 4K across two pages, 2K start offset",
152 512, 0x1000, {
153 { 0x0800, 0x0800 },
154 { 0x0, 0x0800 },
155 },
156 }, {
157 "512B blocks, 16K across 5x4K pages, 512B start offset",
158 512, 0x4000, {
159 { 0x0200, 0x0e00 },
160 { 0x0, 0x1000 },
161 { 0x0, 0x1000 },
162 { 0x0, 0x1000 },
163 { 0x0, 0x0200 },
164 },
165 }, {
166 "512B blocks, 64K data, 8x8K compound pages",
167 512, 0x10000, {
168 { 0x0, 0x2000 },
169 { 0x0, 0x2000 },
170 { 0x0, 0x2000 },
171 { 0x0, 0x2000 },
172 { 0x0, 0x2000 },
173 { 0x0, 0x2000 },
174 { 0x0, 0x2000 },
175 { 0x0, 0x2000 },
176 },
177 }, {
178 "512B blocks, 64K data, 9x8K compound pages, 512B start offset",
179 512, 0x10000, {
180 { 0x0200, 0x1e00 },
181 { 0x0, 0x2000 },
182 { 0x0, 0x2000 },
183 { 0x0, 0x2000 },
184 { 0x0, 0x2000 },
185 { 0x0, 0x2000 },
186 { 0x0, 0x2000 },
187 { 0x0, 0x2000 },
188 { 0x0, 0x0200 },
189 },
190 }, {
191 "512B blocks, 64K data, 2x16K compound pages, 8x4K pages",
192 512, 0x10000, {
193 { 0x0, 0x8000 },
194 { 0x0, 0x8000 },
195 { 0x0, 0x1000 },
196 { 0x0, 0x1000 },
197 { 0x0, 0x1000 },
198 { 0x0, 0x1000 },
199 { 0x0, 0x1000 },
200 { 0x0, 0x1000 },
201 { 0x0, 0x1000 },
202 { 0x0, 0x1000 },
203 },
204 }, {
205 "512B blocks, 64K data, mixed 4K/8K/16K pages",
206 512, 0x10000, {
207 { 0x0, 0x1000 },
208 { 0x0, 0x2000 },
209 { 0x0, 0x1000 },
210 { 0x0, 0x8000 },
211 { 0x0, 0x1000 },
212 { 0x0, 0x1000 },
213 { 0x0, 0x2000 },
214 { 0x0, 0x1000 },
215 { 0x0, 0x1000 },
216 { 0x0, 0x2000 },
217 },
218 }, {
219 "512B blocks, 64K data, mixed 4K/8K/16K pages, 1K start offset",
220 512, 0x10000, {
221 { 0x0400, 0x0c00 },
222 { 0x0, 0x1000 },
223 { 0x0, 0x1000 },
224 { 0x0, 0x1000 },
225 { 0x0, 0x2000 },
226 { 0x0, 0x2000 },
227 { 0x0, 0x1000 },
228 { 0x0, 0x8000 },
229 { 0x0, 0x1000 },
230 { 0x0, 0x0400 },
231 },
232 },
233
234 /* 4K block tests */
235 {
236 "4K blocks, 4K single page",
237 4096, 0x1000, {
238 { 0x0, 0x1000 },
239 },
240 }, {
241 "4K blocks, 8K across 2x4K pages",
242 4096, 0x2000, {
243 { 0x0, 0x1000 },
244 { 0x0, 0x1000 },
245 },
246 }, {
247 "4K blocks, 64K data, 8x8K compound pages",
248 4096, 0x10000, {
249 { 0x0, 0x2000 },
250 { 0x0, 0x2000 },
251 { 0x0, 0x2000 },
252 { 0x0, 0x2000 },
253 { 0x0, 0x2000 },
254 { 0x0, 0x2000 },
255 { 0x0, 0x2000 },
256 { 0x0, 0x2000 },
257 },
258 }, {
259 "4K blocks, 64K data, 2x16K compound pages, 8x4K pages",
260 4096, 0x10000, {
261 { 0x0, 0x8000 },
262 { 0x0, 0x8000 },
263 { 0x0, 0x1000 },
264 { 0x0, 0x1000 },
265 { 0x0, 0x1000 },
266 { 0x0, 0x1000 },
267 { 0x0, 0x1000 },
268 { 0x0, 0x1000 },
269 { 0x0, 0x1000 },
270 { 0x0, 0x1000 },
271 },
272 }, {
273 "4K blocks, 64K data, mixed 4K/8K/16K pages",
274 4096, 0x10000, {
275 { 0x0, 0x1000 },
276 { 0x0, 0x2000 },
277 { 0x0, 0x1000 },
278 { 0x0, 0x8000 },
279 { 0x0, 0x1000 },
280 { 0x0, 0x1000 },
281 { 0x0, 0x2000 },
282 { 0x0, 0x1000 },
283 { 0x0, 0x1000 },
284 { 0x0, 0x2000 },
285 },
286 },
287
288 { 0 },
289 };
290
291 static const page_test_t invalid_tests[] = {
292 /*
293 * Gang tests. Composed of lots of smaller allocations, rarely properly
294 * aligned.
295 */
296 {
297 "512B blocks, 16K data, 512 leader (gang block simulation)",
298 512, 0x8000, {
299 { 0x0, 0x0200 },
300 { 0x0, 0x1000 },
301 { 0x0, 0x1000 },
302 { 0x0, 0x1000 },
303 { 0x0, 0x0c00 },
304 },
305 }, {
306 "4K blocks, 32K data, 2 incompatible spans "
307 "(gang abd simulation)",
308 4096, 0x8000, {
309 { 0x0800, 0x0800 },
310 { 0x0, 0x1000 },
311 { 0x0, 0x1000 },
312 { 0x0, 0x1000 },
313 { 0x0, 0x0800 },
314 { 0x0800, 0x0800 },
315 { 0x0, 0x1000 },
316 { 0x0, 0x1000 },
317 { 0x0, 0x1000 },
318 { 0x0, 0x0800 },
319 },
320 },
321
322 /*
323 * Blocks must not span multiple physical pages. These tests used to
324 * be considered valid, but were since found to be invalid and were
325 * moved here.
326 */
327 {
328 "4K blocks, 4K across two pages, 2K start offset",
329 4096, 0x1000, {
330 { 0x0800, 0x0800 },
331 { 0x0, 0x0800 },
332 },
333 }, {
334 "4K blocks, 16K across 5x4K pages, 512B start offset",
335 4096, 0x4000, {
336 { 0x0200, 0x0e00 },
337 { 0x0, 0x1000 },
338 { 0x0, 0x1000 },
339 { 0x0, 0x1000 },
340 { 0x0, 0x0200 },
341 },
342 }, {
343 "4K blocks, 64K data, 9x8K compound pages, 512B start offset",
344 4096, 0x10000, {
345 { 0x0200, 0x1e00 },
346 { 0x0, 0x2000 },
347 { 0x0, 0x2000 },
348 { 0x0, 0x2000 },
349 { 0x0, 0x2000 },
350 { 0x0, 0x2000 },
351 { 0x0, 0x2000 },
352 { 0x0, 0x2000 },
353 { 0x0, 0x0200 },
354 },
355 }, {
356 "4K blocks, 64K data, mixed 4K/8K/16K pages, 1K start offset",
357 4096, 0x10000, {
358 { 0x0400, 0x0c00 },
359 { 0x0, 0x1000 },
360 { 0x0, 0x1000 },
361 { 0x0, 0x1000 },
362 { 0x0, 0x2000 },
363 { 0x0, 0x2000 },
364 { 0x0, 0x1000 },
365 { 0x0, 0x8000 },
366 { 0x0, 0x1000 },
367 { 0x0, 0x0400 },
368 },
369 },
370
371 /*
372 * This is the very typical case of a 4K block being allocated from
373 * the middle of a mixed-used slab backed by a higher-order compound
374 * page.
375 */
376 {
377 "4K blocks, 4K data from compound slab, 2K-align offset",
378 4096, 0x1000, {
379 { 0x1800, 0x6800 }
380 }
381 },
382
383 /*
384 * Blocks smaller than LBS should never be possible, but used to be by
385 * accident (see GH#16990). We test for and reject them just to be
386 * sure.
387 */
388 {
389 "4K blocks, 1K at end of page",
390 4096, 0x400, {
391 { 0x0c00, 0x0400 },
392 },
393 }, {
394 "4K blocks, 1K at start of page",
395 4096, 0x400, {
396 { 0x0, 0x1000 },
397 },
398 }, {
399 "4K blocks, 1K within page, 512B start offset",
400 4096, 0x400, {
401 { 0x0200, 0x0e00 },
402 },
403 },
404
405 { 0 },
406 };
407
408 static bool
run_test(const page_test_t * test,bool verbose)409 run_test(const page_test_t *test, bool verbose)
410 {
411 size_t rem = test->size;
412
413 vdev_disk_check_alignment_t s = {
414 .blocksize = test->blocksize,
415 };
416
417 for (int i = 0; test->pages[i][1] > 0; i++) {
418 size_t off = test->pages[i][0];
419 size_t len = test->pages[i][1];
420
421 size_t take = MIN(rem, len);
422
423 if (verbose)
424 printf(" page %d [off %zx len %zx], "
425 "rem %zx, take %zx\n",
426 i, off, len, rem, take);
427
428 if (vdev_disk_check_alignment_cb(NULL, off, take, &s)) {
429 if (verbose)
430 printf(" ABORT: misalignment detected, "
431 "rem %zx\n", rem);
432 return (false);
433 }
434
435 rem -= take;
436 if (rem == 0)
437 break;
438 }
439
440 if (rem > 0) {
441 if (verbose)
442 printf(" ABORT: ran out of pages, rem %zx\n", rem);
443 return (false);
444 }
445
446 return (true);
447 }
448
449 static void
run_test_set(const page_test_t * tests,bool want,int * ntests,int * npassed)450 run_test_set(const page_test_t *tests, bool want, int *ntests, int *npassed)
451 {
452 for (const page_test_t *test = &tests[0]; test->name; test++) {
453 bool pass = (run_test(test, false) == want);
454 if (pass) {
455 printf("%c %s: PASS\n", want ? '+' : '-', test->name);
456 (*npassed)++;
457 } else {
458 printf("%s: FAIL [expected %s, got %s]\n", test->name,
459 want ? "VALID" : "INVALID",
460 want ? "INVALID" : "VALID");
461 run_test(test, true);
462 }
463 (*ntests)++;
464 }
465 }
466
main(void)467 int main(void) {
468 int ntests = 0, npassed = 0;
469
470 run_test_set(valid_tests, true, &ntests, &npassed);
471 run_test_set(invalid_tests, false, &ntests, &npassed);
472
473 printf("\n%d/%d tests passed\n", npassed, ntests);
474
475 return (ntests == npassed ? 0 : 1);
476 }
477