1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2001 Sistina Software (UK) Limited.
4 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
5 *
6 * This file is released under the GPL.
7 */
8
9 #include "dm-core.h"
10 #include "dm-rq.h"
11
12 #include <linux/module.h>
13 #include <linux/vmalloc.h>
14 #include <linux/blkdev.h>
15 #include <linux/blk-integrity.h>
16 #include <linux/namei.h>
17 #include <linux/ctype.h>
18 #include <linux/string.h>
19 #include <linux/slab.h>
20 #include <linux/interrupt.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/atomic.h>
24 #include <linux/blk-mq.h>
25 #include <linux/mount.h>
26 #include <linux/dax.h>
27
28 #define DM_MSG_PREFIX "table"
29
30 #define NODE_SIZE L1_CACHE_BYTES
31 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
32 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
33
34 /*
35 * Similar to ceiling(log_size(n))
36 */
int_log(unsigned int n,unsigned int base)37 static unsigned int int_log(unsigned int n, unsigned int base)
38 {
39 int result = 0;
40
41 while (n > 1) {
42 n = dm_div_up(n, base);
43 result++;
44 }
45
46 return result;
47 }
48
49 /*
50 * Calculate the index of the child node of the n'th node k'th key.
51 */
get_child(unsigned int n,unsigned int k)52 static inline unsigned int get_child(unsigned int n, unsigned int k)
53 {
54 return (n * CHILDREN_PER_NODE) + k;
55 }
56
57 /*
58 * Return the n'th node of level l from table t.
59 */
get_node(struct dm_table * t,unsigned int l,unsigned int n)60 static inline sector_t *get_node(struct dm_table *t,
61 unsigned int l, unsigned int n)
62 {
63 return t->index[l] + (n * KEYS_PER_NODE);
64 }
65
66 /*
67 * Return the highest key that you could lookup from the n'th
68 * node on level l of the btree.
69 */
high(struct dm_table * t,unsigned int l,unsigned int n)70 static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
71 {
72 for (; l < t->depth - 1; l++)
73 n = get_child(n, CHILDREN_PER_NODE - 1);
74
75 if (n >= t->counts[l])
76 return (sector_t) -1;
77
78 return get_node(t, l, n)[KEYS_PER_NODE - 1];
79 }
80
81 /*
82 * Fills in a level of the btree based on the highs of the level
83 * below it.
84 */
setup_btree_index(unsigned int l,struct dm_table * t)85 static int setup_btree_index(unsigned int l, struct dm_table *t)
86 {
87 unsigned int n, k;
88 sector_t *node;
89
90 for (n = 0U; n < t->counts[l]; n++) {
91 node = get_node(t, l, n);
92
93 for (k = 0U; k < KEYS_PER_NODE; k++)
94 node[k] = high(t, l + 1, get_child(n, k));
95 }
96
97 return 0;
98 }
99
100 /*
101 * highs, and targets are managed as dynamic arrays during a
102 * table load.
103 */
alloc_targets(struct dm_table * t,unsigned int num)104 static int alloc_targets(struct dm_table *t, unsigned int num)
105 {
106 sector_t *n_highs;
107 struct dm_target *n_targets;
108
109 /*
110 * Allocate both the target array and offset array at once.
111 */
112 n_highs = kvcalloc(num, sizeof(struct dm_target) + sizeof(sector_t),
113 GFP_KERNEL);
114 if (!n_highs)
115 return -ENOMEM;
116
117 n_targets = (struct dm_target *) (n_highs + num);
118
119 memset(n_highs, -1, sizeof(*n_highs) * num);
120
121 t->num_allocated = num;
122 t->highs = n_highs;
123 t->targets = n_targets;
124
125 return 0;
126 }
127
dm_table_create(struct dm_table ** result,blk_mode_t mode,unsigned int num_targets,struct mapped_device * md)128 int dm_table_create(struct dm_table **result, blk_mode_t mode,
129 unsigned int num_targets, struct mapped_device *md)
130 {
131 struct dm_table *t;
132
133 if (num_targets > DM_MAX_TARGETS)
134 return -EOVERFLOW;
135
136 t = kzalloc(sizeof(*t), GFP_KERNEL);
137
138 if (!t)
139 return -ENOMEM;
140
141 INIT_LIST_HEAD(&t->devices);
142 init_rwsem(&t->devices_lock);
143
144 if (!num_targets)
145 num_targets = KEYS_PER_NODE;
146
147 num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
148
149 if (!num_targets) {
150 kfree(t);
151 return -EOVERFLOW;
152 }
153
154 if (alloc_targets(t, num_targets)) {
155 kfree(t);
156 return -ENOMEM;
157 }
158
159 t->type = DM_TYPE_NONE;
160 t->mode = mode;
161 t->md = md;
162 t->flush_bypasses_map = true;
163 *result = t;
164 return 0;
165 }
166
free_devices(struct list_head * devices,struct mapped_device * md)167 static void free_devices(struct list_head *devices, struct mapped_device *md)
168 {
169 struct list_head *tmp, *next;
170
171 list_for_each_safe(tmp, next, devices) {
172 struct dm_dev_internal *dd =
173 list_entry(tmp, struct dm_dev_internal, list);
174 DMWARN("%s: dm_table_destroy: dm_put_device call missing for %s",
175 dm_device_name(md), dd->dm_dev->name);
176 dm_put_table_device(md, dd->dm_dev);
177 kfree(dd);
178 }
179 }
180
181 static void dm_table_destroy_crypto_profile(struct dm_table *t);
182
dm_table_destroy(struct dm_table * t)183 void dm_table_destroy(struct dm_table *t)
184 {
185 if (!t)
186 return;
187
188 /* free the indexes */
189 if (t->depth >= 2)
190 kvfree(t->index[t->depth - 2]);
191
192 /* free the targets */
193 for (unsigned int i = 0; i < t->num_targets; i++) {
194 struct dm_target *ti = dm_table_get_target(t, i);
195
196 if (ti->type->dtr)
197 ti->type->dtr(ti);
198
199 dm_put_target_type(ti->type);
200 }
201
202 kvfree(t->highs);
203
204 /* free the device list */
205 free_devices(&t->devices, t->md);
206
207 dm_free_md_mempools(t->mempools);
208
209 dm_table_destroy_crypto_profile(t);
210
211 kfree(t);
212 }
213
214 /*
215 * See if we've already got a device in the list.
216 */
find_device(struct list_head * l,dev_t dev)217 static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
218 {
219 struct dm_dev_internal *dd;
220
221 list_for_each_entry(dd, l, list)
222 if (dd->dm_dev->bdev->bd_dev == dev)
223 return dd;
224
225 return NULL;
226 }
227
228 /*
229 * If possible, this checks an area of a destination device is invalid.
230 */
device_area_is_invalid(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)231 static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
232 sector_t start, sector_t len, void *data)
233 {
234 struct queue_limits *limits = data;
235 struct block_device *bdev = dev->bdev;
236 sector_t dev_size = bdev_nr_sectors(bdev);
237 unsigned short logical_block_size_sectors =
238 limits->logical_block_size >> SECTOR_SHIFT;
239
240 if (!dev_size)
241 return 0;
242
243 if ((start >= dev_size) || (start + len > dev_size)) {
244 DMERR("%s: %pg too small for target: start=%llu, len=%llu, dev_size=%llu",
245 dm_device_name(ti->table->md), bdev,
246 (unsigned long long)start,
247 (unsigned long long)len,
248 (unsigned long long)dev_size);
249 return 1;
250 }
251
252 /*
253 * If the target is mapped to zoned block device(s), check
254 * that the zones are not partially mapped.
255 */
256 if (bdev_is_zoned(bdev)) {
257 unsigned int zone_sectors = bdev_zone_sectors(bdev);
258
259 if (!bdev_is_zone_aligned(bdev, start)) {
260 DMERR("%s: start=%llu not aligned to h/w zone size %u of %pg",
261 dm_device_name(ti->table->md),
262 (unsigned long long)start,
263 zone_sectors, bdev);
264 return 1;
265 }
266
267 /*
268 * Note: The last zone of a zoned block device may be smaller
269 * than other zones. So for a target mapping the end of a
270 * zoned block device with such a zone, len would not be zone
271 * aligned. We do not allow such last smaller zone to be part
272 * of the mapping here to ensure that mappings with multiple
273 * devices do not end up with a smaller zone in the middle of
274 * the sector range.
275 */
276 if (!bdev_is_zone_aligned(bdev, len)) {
277 DMERR("%s: len=%llu not aligned to h/w zone size %u of %pg",
278 dm_device_name(ti->table->md),
279 (unsigned long long)len,
280 zone_sectors, bdev);
281 return 1;
282 }
283 }
284
285 if (logical_block_size_sectors <= 1)
286 return 0;
287
288 if (start & (logical_block_size_sectors - 1)) {
289 DMERR("%s: start=%llu not aligned to h/w logical block size %u of %pg",
290 dm_device_name(ti->table->md),
291 (unsigned long long)start,
292 limits->logical_block_size, bdev);
293 return 1;
294 }
295
296 if (len & (logical_block_size_sectors - 1)) {
297 DMERR("%s: len=%llu not aligned to h/w logical block size %u of %pg",
298 dm_device_name(ti->table->md),
299 (unsigned long long)len,
300 limits->logical_block_size, bdev);
301 return 1;
302 }
303
304 return 0;
305 }
306
307 /*
308 * This upgrades the mode on an already open dm_dev, being
309 * careful to leave things as they were if we fail to reopen the
310 * device and not to touch the existing bdev field in case
311 * it is accessed concurrently.
312 */
upgrade_mode(struct dm_dev_internal * dd,blk_mode_t new_mode,struct mapped_device * md)313 static int upgrade_mode(struct dm_dev_internal *dd, blk_mode_t new_mode,
314 struct mapped_device *md)
315 {
316 int r;
317 struct dm_dev *old_dev, *new_dev;
318
319 old_dev = dd->dm_dev;
320
321 r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev,
322 dd->dm_dev->mode | new_mode, &new_dev);
323 if (r)
324 return r;
325
326 dd->dm_dev = new_dev;
327 dm_put_table_device(md, old_dev);
328
329 return 0;
330 }
331
332 /*
333 * Note: the __ref annotation is because this function can call the __init
334 * marked early_lookup_bdev when called during early boot code from dm-init.c.
335 */
dm_devt_from_path(const char * path,dev_t * dev_p)336 int __ref dm_devt_from_path(const char *path, dev_t *dev_p)
337 {
338 int r;
339 dev_t dev;
340 unsigned int major, minor;
341 char dummy;
342
343 if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) {
344 /* Extract the major/minor numbers */
345 dev = MKDEV(major, minor);
346 if (MAJOR(dev) != major || MINOR(dev) != minor)
347 return -EOVERFLOW;
348 } else {
349 r = lookup_bdev(path, &dev);
350 #ifndef MODULE
351 if (r && system_state < SYSTEM_RUNNING)
352 r = early_lookup_bdev(path, &dev);
353 #endif
354 if (r)
355 return r;
356 }
357 *dev_p = dev;
358 return 0;
359 }
360 EXPORT_SYMBOL(dm_devt_from_path);
361
362 /*
363 * Add a device to the list, or just increment the usage count if
364 * it's already present.
365 */
dm_get_device(struct dm_target * ti,const char * path,blk_mode_t mode,struct dm_dev ** result)366 int dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode,
367 struct dm_dev **result)
368 {
369 int r;
370 dev_t dev;
371 struct dm_dev_internal *dd;
372 struct dm_table *t = ti->table;
373
374 BUG_ON(!t);
375
376 r = dm_devt_from_path(path, &dev);
377 if (r)
378 return r;
379
380 if (dev == disk_devt(t->md->disk))
381 return -EINVAL;
382
383 down_write(&t->devices_lock);
384
385 dd = find_device(&t->devices, dev);
386 if (!dd) {
387 dd = kmalloc(sizeof(*dd), GFP_KERNEL);
388 if (!dd) {
389 r = -ENOMEM;
390 goto unlock_ret_r;
391 }
392
393 r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev);
394 if (r) {
395 kfree(dd);
396 goto unlock_ret_r;
397 }
398
399 refcount_set(&dd->count, 1);
400 list_add(&dd->list, &t->devices);
401 goto out;
402
403 } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
404 r = upgrade_mode(dd, mode, t->md);
405 if (r)
406 goto unlock_ret_r;
407 }
408 refcount_inc(&dd->count);
409 out:
410 up_write(&t->devices_lock);
411 *result = dd->dm_dev;
412 return 0;
413
414 unlock_ret_r:
415 up_write(&t->devices_lock);
416 return r;
417 }
418 EXPORT_SYMBOL(dm_get_device);
419
dm_set_device_limits(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)420 static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
421 sector_t start, sector_t len, void *data)
422 {
423 struct queue_limits *limits = data;
424 struct block_device *bdev = dev->bdev;
425 struct request_queue *q = bdev_get_queue(bdev);
426
427 if (unlikely(!q)) {
428 DMWARN("%s: Cannot set limits for nonexistent device %pg",
429 dm_device_name(ti->table->md), bdev);
430 return 0;
431 }
432
433 mutex_lock(&q->limits_lock);
434 /*
435 * BLK_FEAT_ATOMIC_WRITES is not inherited from the bottom device in
436 * blk_stack_limits(), so do it manually.
437 */
438 limits->features |= (q->limits.features & BLK_FEAT_ATOMIC_WRITES);
439
440 if (blk_stack_limits(limits, &q->limits,
441 get_start_sect(bdev) + start) < 0)
442 DMWARN("%s: adding target device %pg caused an alignment inconsistency: "
443 "physical_block_size=%u, logical_block_size=%u, "
444 "alignment_offset=%u, start=%llu",
445 dm_device_name(ti->table->md), bdev,
446 q->limits.physical_block_size,
447 q->limits.logical_block_size,
448 q->limits.alignment_offset,
449 (unsigned long long) start << SECTOR_SHIFT);
450
451 /*
452 * Only stack the integrity profile if the target doesn't have native
453 * integrity support.
454 */
455 if (!dm_target_has_integrity(ti->type))
456 queue_limits_stack_integrity_bdev(limits, bdev);
457 mutex_unlock(&q->limits_lock);
458 return 0;
459 }
460
461 /*
462 * Decrement a device's use count and remove it if necessary.
463 */
dm_put_device(struct dm_target * ti,struct dm_dev * d)464 void dm_put_device(struct dm_target *ti, struct dm_dev *d)
465 {
466 int found = 0;
467 struct dm_table *t = ti->table;
468 struct list_head *devices = &t->devices;
469 struct dm_dev_internal *dd;
470
471 down_write(&t->devices_lock);
472
473 list_for_each_entry(dd, devices, list) {
474 if (dd->dm_dev == d) {
475 found = 1;
476 break;
477 }
478 }
479 if (!found) {
480 DMERR("%s: device %s not in table devices list",
481 dm_device_name(t->md), d->name);
482 goto unlock_ret;
483 }
484 if (refcount_dec_and_test(&dd->count)) {
485 dm_put_table_device(t->md, d);
486 list_del(&dd->list);
487 kfree(dd);
488 }
489
490 unlock_ret:
491 up_write(&t->devices_lock);
492 }
493 EXPORT_SYMBOL(dm_put_device);
494
495 /*
496 * Checks to see if the target joins onto the end of the table.
497 */
adjoin(struct dm_table * t,struct dm_target * ti)498 static int adjoin(struct dm_table *t, struct dm_target *ti)
499 {
500 struct dm_target *prev;
501
502 if (!t->num_targets)
503 return !ti->begin;
504
505 prev = &t->targets[t->num_targets - 1];
506 return (ti->begin == (prev->begin + prev->len));
507 }
508
509 /*
510 * Used to dynamically allocate the arg array.
511 *
512 * We do first allocation with GFP_NOIO because dm-mpath and dm-thin must
513 * process messages even if some device is suspended. These messages have a
514 * small fixed number of arguments.
515 *
516 * On the other hand, dm-switch needs to process bulk data using messages and
517 * excessive use of GFP_NOIO could cause trouble.
518 */
realloc_argv(unsigned int * size,char ** old_argv)519 static char **realloc_argv(unsigned int *size, char **old_argv)
520 {
521 char **argv;
522 unsigned int new_size;
523 gfp_t gfp;
524
525 if (*size) {
526 new_size = *size * 2;
527 gfp = GFP_KERNEL;
528 } else {
529 new_size = 8;
530 gfp = GFP_NOIO;
531 }
532 argv = kmalloc_array(new_size, sizeof(*argv), gfp);
533 if (argv) {
534 if (old_argv)
535 memcpy(argv, old_argv, *size * sizeof(*argv));
536 *size = new_size;
537 }
538
539 kfree(old_argv);
540 return argv;
541 }
542
543 /*
544 * Destructively splits up the argument list to pass to ctr.
545 */
dm_split_args(int * argc,char *** argvp,char * input)546 int dm_split_args(int *argc, char ***argvp, char *input)
547 {
548 char *start, *end = input, *out, **argv = NULL;
549 unsigned int array_size = 0;
550
551 *argc = 0;
552
553 if (!input) {
554 *argvp = NULL;
555 return 0;
556 }
557
558 argv = realloc_argv(&array_size, argv);
559 if (!argv)
560 return -ENOMEM;
561
562 while (1) {
563 /* Skip whitespace */
564 start = skip_spaces(end);
565
566 if (!*start)
567 break; /* success, we hit the end */
568
569 /* 'out' is used to remove any back-quotes */
570 end = out = start;
571 while (*end) {
572 /* Everything apart from '\0' can be quoted */
573 if (*end == '\\' && *(end + 1)) {
574 *out++ = *(end + 1);
575 end += 2;
576 continue;
577 }
578
579 if (isspace(*end))
580 break; /* end of token */
581
582 *out++ = *end++;
583 }
584
585 /* have we already filled the array ? */
586 if ((*argc + 1) > array_size) {
587 argv = realloc_argv(&array_size, argv);
588 if (!argv)
589 return -ENOMEM;
590 }
591
592 /* we know this is whitespace */
593 if (*end)
594 end++;
595
596 /* terminate the string and put it in the array */
597 *out = '\0';
598 argv[*argc] = start;
599 (*argc)++;
600 }
601
602 *argvp = argv;
603 return 0;
604 }
605
dm_set_stacking_limits(struct queue_limits * limits)606 static void dm_set_stacking_limits(struct queue_limits *limits)
607 {
608 blk_set_stacking_limits(limits);
609 limits->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
610 }
611
612 /*
613 * Impose necessary and sufficient conditions on a devices's table such
614 * that any incoming bio which respects its logical_block_size can be
615 * processed successfully. If it falls across the boundary between
616 * two or more targets, the size of each piece it gets split into must
617 * be compatible with the logical_block_size of the target processing it.
618 */
validate_hardware_logical_block_alignment(struct dm_table * t,struct queue_limits * limits)619 static int validate_hardware_logical_block_alignment(struct dm_table *t,
620 struct queue_limits *limits)
621 {
622 /*
623 * This function uses arithmetic modulo the logical_block_size
624 * (in units of 512-byte sectors).
625 */
626 unsigned short device_logical_block_size_sects =
627 limits->logical_block_size >> SECTOR_SHIFT;
628
629 /*
630 * Offset of the start of the next table entry, mod logical_block_size.
631 */
632 unsigned short next_target_start = 0;
633
634 /*
635 * Given an aligned bio that extends beyond the end of a
636 * target, how many sectors must the next target handle?
637 */
638 unsigned short remaining = 0;
639
640 struct dm_target *ti;
641 struct queue_limits ti_limits;
642 unsigned int i;
643
644 /*
645 * Check each entry in the table in turn.
646 */
647 for (i = 0; i < t->num_targets; i++) {
648 ti = dm_table_get_target(t, i);
649
650 dm_set_stacking_limits(&ti_limits);
651
652 /* combine all target devices' limits */
653 if (ti->type->iterate_devices)
654 ti->type->iterate_devices(ti, dm_set_device_limits,
655 &ti_limits);
656
657 /*
658 * If the remaining sectors fall entirely within this
659 * table entry are they compatible with its logical_block_size?
660 */
661 if (remaining < ti->len &&
662 remaining & ((ti_limits.logical_block_size >>
663 SECTOR_SHIFT) - 1))
664 break; /* Error */
665
666 next_target_start =
667 (unsigned short) ((next_target_start + ti->len) &
668 (device_logical_block_size_sects - 1));
669 remaining = next_target_start ?
670 device_logical_block_size_sects - next_target_start : 0;
671 }
672
673 if (remaining) {
674 DMERR("%s: table line %u (start sect %llu len %llu) "
675 "not aligned to h/w logical block size %u",
676 dm_device_name(t->md), i,
677 (unsigned long long) ti->begin,
678 (unsigned long long) ti->len,
679 limits->logical_block_size);
680 return -EINVAL;
681 }
682
683 return 0;
684 }
685
dm_table_add_target(struct dm_table * t,const char * type,sector_t start,sector_t len,char * params)686 int dm_table_add_target(struct dm_table *t, const char *type,
687 sector_t start, sector_t len, char *params)
688 {
689 int r = -EINVAL, argc;
690 char **argv;
691 struct dm_target *ti;
692
693 if (t->singleton) {
694 DMERR("%s: target type %s must appear alone in table",
695 dm_device_name(t->md), t->targets->type->name);
696 return -EINVAL;
697 }
698
699 BUG_ON(t->num_targets >= t->num_allocated);
700
701 ti = t->targets + t->num_targets;
702 memset(ti, 0, sizeof(*ti));
703
704 if (!len) {
705 DMERR("%s: zero-length target", dm_device_name(t->md));
706 return -EINVAL;
707 }
708 if (start + len < start || start + len > LLONG_MAX >> SECTOR_SHIFT) {
709 DMERR("%s: too large device", dm_device_name(t->md));
710 return -EINVAL;
711 }
712
713 ti->type = dm_get_target_type(type);
714 if (!ti->type) {
715 DMERR("%s: %s: unknown target type", dm_device_name(t->md), type);
716 return -EINVAL;
717 }
718
719 if (dm_target_needs_singleton(ti->type)) {
720 if (t->num_targets) {
721 ti->error = "singleton target type must appear alone in table";
722 goto bad;
723 }
724 t->singleton = true;
725 }
726
727 if (dm_target_always_writeable(ti->type) &&
728 !(t->mode & BLK_OPEN_WRITE)) {
729 ti->error = "target type may not be included in a read-only table";
730 goto bad;
731 }
732
733 if (t->immutable_target_type) {
734 if (t->immutable_target_type != ti->type) {
735 ti->error = "immutable target type cannot be mixed with other target types";
736 goto bad;
737 }
738 } else if (dm_target_is_immutable(ti->type)) {
739 if (t->num_targets) {
740 ti->error = "immutable target type cannot be mixed with other target types";
741 goto bad;
742 }
743 t->immutable_target_type = ti->type;
744 }
745
746 ti->table = t;
747 ti->begin = start;
748 ti->len = len;
749 ti->error = "Unknown error";
750
751 /*
752 * Does this target adjoin the previous one ?
753 */
754 if (!adjoin(t, ti)) {
755 ti->error = "Gap in table";
756 goto bad;
757 }
758
759 r = dm_split_args(&argc, &argv, params);
760 if (r) {
761 ti->error = "couldn't split parameters";
762 goto bad;
763 }
764
765 r = ti->type->ctr(ti, argc, argv);
766 kfree(argv);
767 if (r)
768 goto bad;
769
770 t->highs[t->num_targets++] = ti->begin + ti->len - 1;
771
772 if (!ti->num_discard_bios && ti->discards_supported)
773 DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
774 dm_device_name(t->md), type);
775
776 if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
777 static_branch_enable(&swap_bios_enabled);
778
779 if (!ti->flush_bypasses_map)
780 t->flush_bypasses_map = false;
781
782 return 0;
783
784 bad:
785 DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, ti->error, ERR_PTR(r));
786 dm_put_target_type(ti->type);
787 return r;
788 }
789
790 /*
791 * Target argument parsing helpers.
792 */
validate_next_arg(const struct dm_arg * arg,struct dm_arg_set * arg_set,unsigned int * value,char ** error,unsigned int grouped)793 static int validate_next_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set,
794 unsigned int *value, char **error, unsigned int grouped)
795 {
796 const char *arg_str = dm_shift_arg(arg_set);
797 char dummy;
798
799 if (!arg_str ||
800 (sscanf(arg_str, "%u%c", value, &dummy) != 1) ||
801 (*value < arg->min) ||
802 (*value > arg->max) ||
803 (grouped && arg_set->argc < *value)) {
804 *error = arg->error;
805 return -EINVAL;
806 }
807
808 return 0;
809 }
810
dm_read_arg(const struct dm_arg * arg,struct dm_arg_set * arg_set,unsigned int * value,char ** error)811 int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set,
812 unsigned int *value, char **error)
813 {
814 return validate_next_arg(arg, arg_set, value, error, 0);
815 }
816 EXPORT_SYMBOL(dm_read_arg);
817
dm_read_arg_group(const struct dm_arg * arg,struct dm_arg_set * arg_set,unsigned int * value,char ** error)818 int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set,
819 unsigned int *value, char **error)
820 {
821 return validate_next_arg(arg, arg_set, value, error, 1);
822 }
823 EXPORT_SYMBOL(dm_read_arg_group);
824
dm_shift_arg(struct dm_arg_set * as)825 const char *dm_shift_arg(struct dm_arg_set *as)
826 {
827 char *r;
828
829 if (as->argc) {
830 as->argc--;
831 r = *as->argv;
832 as->argv++;
833 return r;
834 }
835
836 return NULL;
837 }
838 EXPORT_SYMBOL(dm_shift_arg);
839
dm_consume_args(struct dm_arg_set * as,unsigned int num_args)840 void dm_consume_args(struct dm_arg_set *as, unsigned int num_args)
841 {
842 BUG_ON(as->argc < num_args);
843 as->argc -= num_args;
844 as->argv += num_args;
845 }
846 EXPORT_SYMBOL(dm_consume_args);
847
__table_type_bio_based(enum dm_queue_mode table_type)848 static bool __table_type_bio_based(enum dm_queue_mode table_type)
849 {
850 return (table_type == DM_TYPE_BIO_BASED ||
851 table_type == DM_TYPE_DAX_BIO_BASED);
852 }
853
__table_type_request_based(enum dm_queue_mode table_type)854 static bool __table_type_request_based(enum dm_queue_mode table_type)
855 {
856 return table_type == DM_TYPE_REQUEST_BASED;
857 }
858
dm_table_set_type(struct dm_table * t,enum dm_queue_mode type)859 void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
860 {
861 t->type = type;
862 }
863 EXPORT_SYMBOL_GPL(dm_table_set_type);
864
865 /* validate the dax capability of the target device span */
device_not_dax_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)866 static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
867 sector_t start, sector_t len, void *data)
868 {
869 if (dev->dax_dev)
870 return false;
871
872 DMDEBUG("%pg: error: dax unsupported by block device", dev->bdev);
873 return true;
874 }
875
876 /* Check devices support synchronous DAX */
device_not_dax_synchronous_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)877 static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_dev *dev,
878 sector_t start, sector_t len, void *data)
879 {
880 return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
881 }
882
dm_table_supports_dax(struct dm_table * t,iterate_devices_callout_fn iterate_fn)883 static bool dm_table_supports_dax(struct dm_table *t,
884 iterate_devices_callout_fn iterate_fn)
885 {
886 /* Ensure that all targets support DAX. */
887 for (unsigned int i = 0; i < t->num_targets; i++) {
888 struct dm_target *ti = dm_table_get_target(t, i);
889
890 if (!ti->type->direct_access)
891 return false;
892
893 if (dm_target_is_wildcard(ti->type) ||
894 !ti->type->iterate_devices ||
895 ti->type->iterate_devices(ti, iterate_fn, NULL))
896 return false;
897 }
898
899 return true;
900 }
901
device_is_rq_stackable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)902 static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
903 sector_t start, sector_t len, void *data)
904 {
905 struct block_device *bdev = dev->bdev;
906 struct request_queue *q = bdev_get_queue(bdev);
907
908 /* request-based cannot stack on partitions! */
909 if (bdev_is_partition(bdev))
910 return false;
911
912 return queue_is_mq(q);
913 }
914
dm_table_determine_type(struct dm_table * t)915 static int dm_table_determine_type(struct dm_table *t)
916 {
917 unsigned int bio_based = 0, request_based = 0, hybrid = 0;
918 struct dm_target *ti;
919 struct list_head *devices = dm_table_get_devices(t);
920 enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
921
922 if (t->type != DM_TYPE_NONE) {
923 /* target already set the table's type */
924 if (t->type == DM_TYPE_BIO_BASED) {
925 /* possibly upgrade to a variant of bio-based */
926 goto verify_bio_based;
927 }
928 BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
929 goto verify_rq_based;
930 }
931
932 for (unsigned int i = 0; i < t->num_targets; i++) {
933 ti = dm_table_get_target(t, i);
934 if (dm_target_hybrid(ti))
935 hybrid = 1;
936 else if (dm_target_request_based(ti))
937 request_based = 1;
938 else
939 bio_based = 1;
940
941 if (bio_based && request_based) {
942 DMERR("Inconsistent table: different target types can't be mixed up");
943 return -EINVAL;
944 }
945 }
946
947 if (hybrid && !bio_based && !request_based) {
948 /*
949 * The targets can work either way.
950 * Determine the type from the live device.
951 * Default to bio-based if device is new.
952 */
953 if (__table_type_request_based(live_md_type))
954 request_based = 1;
955 else
956 bio_based = 1;
957 }
958
959 if (bio_based) {
960 verify_bio_based:
961 /* We must use this table as bio-based */
962 t->type = DM_TYPE_BIO_BASED;
963 if (dm_table_supports_dax(t, device_not_dax_capable) ||
964 (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
965 t->type = DM_TYPE_DAX_BIO_BASED;
966 }
967 return 0;
968 }
969
970 BUG_ON(!request_based); /* No targets in this table */
971
972 t->type = DM_TYPE_REQUEST_BASED;
973
974 verify_rq_based:
975 /*
976 * Request-based dm supports only tables that have a single target now.
977 * To support multiple targets, request splitting support is needed,
978 * and that needs lots of changes in the block-layer.
979 * (e.g. request completion process for partial completion.)
980 */
981 if (t->num_targets > 1) {
982 DMERR("request-based DM doesn't support multiple targets");
983 return -EINVAL;
984 }
985
986 if (list_empty(devices)) {
987 int srcu_idx;
988 struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
989
990 /* inherit live table's type */
991 if (live_table)
992 t->type = live_table->type;
993 dm_put_live_table(t->md, srcu_idx);
994 return 0;
995 }
996
997 ti = dm_table_get_immutable_target(t);
998 if (!ti) {
999 DMERR("table load rejected: immutable target is required");
1000 return -EINVAL;
1001 } else if (ti->max_io_len) {
1002 DMERR("table load rejected: immutable target that splits IO is not supported");
1003 return -EINVAL;
1004 }
1005
1006 /* Non-request-stackable devices can't be used for request-based dm */
1007 if (!ti->type->iterate_devices ||
1008 !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) {
1009 DMERR("table load rejected: including non-request-stackable devices");
1010 return -EINVAL;
1011 }
1012
1013 return 0;
1014 }
1015
dm_table_get_type(struct dm_table * t)1016 enum dm_queue_mode dm_table_get_type(struct dm_table *t)
1017 {
1018 return t->type;
1019 }
1020
dm_table_get_immutable_target_type(struct dm_table * t)1021 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
1022 {
1023 return t->immutable_target_type;
1024 }
1025
dm_table_get_immutable_target(struct dm_table * t)1026 struct dm_target *dm_table_get_immutable_target(struct dm_table *t)
1027 {
1028 /* Immutable target is implicitly a singleton */
1029 if (t->num_targets > 1 ||
1030 !dm_target_is_immutable(t->targets[0].type))
1031 return NULL;
1032
1033 return t->targets;
1034 }
1035
dm_table_get_wildcard_target(struct dm_table * t)1036 struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
1037 {
1038 for (unsigned int i = 0; i < t->num_targets; i++) {
1039 struct dm_target *ti = dm_table_get_target(t, i);
1040
1041 if (dm_target_is_wildcard(ti->type))
1042 return ti;
1043 }
1044
1045 return NULL;
1046 }
1047
dm_table_request_based(struct dm_table * t)1048 bool dm_table_request_based(struct dm_table *t)
1049 {
1050 return __table_type_request_based(dm_table_get_type(t));
1051 }
1052
dm_table_alloc_md_mempools(struct dm_table * t,struct mapped_device * md)1053 static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
1054 {
1055 enum dm_queue_mode type = dm_table_get_type(t);
1056 unsigned int per_io_data_size = 0, front_pad, io_front_pad;
1057 unsigned int min_pool_size = 0, pool_size;
1058 struct dm_md_mempools *pools;
1059 unsigned int bioset_flags = 0;
1060
1061 if (unlikely(type == DM_TYPE_NONE)) {
1062 DMERR("no table type is set, can't allocate mempools");
1063 return -EINVAL;
1064 }
1065
1066 pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
1067 if (!pools)
1068 return -ENOMEM;
1069
1070 if (type == DM_TYPE_REQUEST_BASED) {
1071 pool_size = dm_get_reserved_rq_based_ios();
1072 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
1073 goto init_bs;
1074 }
1075
1076 if (md->queue->limits.features & BLK_FEAT_POLL)
1077 bioset_flags |= BIOSET_PERCPU_CACHE;
1078
1079 for (unsigned int i = 0; i < t->num_targets; i++) {
1080 struct dm_target *ti = dm_table_get_target(t, i);
1081
1082 per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
1083 min_pool_size = max(min_pool_size, ti->num_flush_bios);
1084 }
1085 pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
1086 front_pad = roundup(per_io_data_size,
1087 __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
1088
1089 io_front_pad = roundup(per_io_data_size,
1090 __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
1091 if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags))
1092 goto out_free_pools;
1093 init_bs:
1094 if (bioset_init(&pools->bs, pool_size, front_pad, 0))
1095 goto out_free_pools;
1096
1097 t->mempools = pools;
1098 return 0;
1099
1100 out_free_pools:
1101 dm_free_md_mempools(pools);
1102 return -ENOMEM;
1103 }
1104
setup_indexes(struct dm_table * t)1105 static int setup_indexes(struct dm_table *t)
1106 {
1107 int i;
1108 unsigned int total = 0;
1109 sector_t *indexes;
1110
1111 /* allocate the space for *all* the indexes */
1112 for (i = t->depth - 2; i >= 0; i--) {
1113 t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
1114 total += t->counts[i];
1115 }
1116
1117 indexes = kvcalloc(total, NODE_SIZE, GFP_KERNEL);
1118 if (!indexes)
1119 return -ENOMEM;
1120
1121 /* set up internal nodes, bottom-up */
1122 for (i = t->depth - 2; i >= 0; i--) {
1123 t->index[i] = indexes;
1124 indexes += (KEYS_PER_NODE * t->counts[i]);
1125 setup_btree_index(i, t);
1126 }
1127
1128 return 0;
1129 }
1130
1131 /*
1132 * Builds the btree to index the map.
1133 */
dm_table_build_index(struct dm_table * t)1134 static int dm_table_build_index(struct dm_table *t)
1135 {
1136 int r = 0;
1137 unsigned int leaf_nodes;
1138
1139 /* how many indexes will the btree have ? */
1140 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
1141 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
1142
1143 /* leaf layer has already been set up */
1144 t->counts[t->depth - 1] = leaf_nodes;
1145 t->index[t->depth - 1] = t->highs;
1146
1147 if (t->depth >= 2)
1148 r = setup_indexes(t);
1149
1150 return r;
1151 }
1152
1153 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
1154
1155 struct dm_crypto_profile {
1156 struct blk_crypto_profile profile;
1157 struct mapped_device *md;
1158 };
1159
dm_keyslot_evict_callback(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1160 static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
1161 sector_t start, sector_t len, void *data)
1162 {
1163 const struct blk_crypto_key *key = data;
1164
1165 blk_crypto_evict_key(dev->bdev, key);
1166 return 0;
1167 }
1168
1169 /*
1170 * When an inline encryption key is evicted from a device-mapper device, evict
1171 * it from all the underlying devices.
1172 */
dm_keyslot_evict(struct blk_crypto_profile * profile,const struct blk_crypto_key * key,unsigned int slot)1173 static int dm_keyslot_evict(struct blk_crypto_profile *profile,
1174 const struct blk_crypto_key *key, unsigned int slot)
1175 {
1176 struct mapped_device *md =
1177 container_of(profile, struct dm_crypto_profile, profile)->md;
1178 struct dm_table *t;
1179 int srcu_idx;
1180
1181 t = dm_get_live_table(md, &srcu_idx);
1182 if (!t)
1183 goto put_live_table;
1184
1185 for (unsigned int i = 0; i < t->num_targets; i++) {
1186 struct dm_target *ti = dm_table_get_target(t, i);
1187
1188 if (!ti->type->iterate_devices)
1189 continue;
1190 ti->type->iterate_devices(ti, dm_keyslot_evict_callback,
1191 (void *)key);
1192 }
1193
1194 put_live_table:
1195 dm_put_live_table(md, srcu_idx);
1196 return 0;
1197 }
1198
1199 enum dm_wrappedkey_op {
1200 DERIVE_SW_SECRET,
1201 IMPORT_KEY,
1202 GENERATE_KEY,
1203 PREPARE_KEY,
1204 };
1205
1206 struct dm_wrappedkey_op_args {
1207 enum dm_wrappedkey_op op;
1208 int err;
1209 union {
1210 struct {
1211 const u8 *eph_key;
1212 size_t eph_key_size;
1213 u8 *sw_secret;
1214 } derive_sw_secret;
1215 struct {
1216 const u8 *raw_key;
1217 size_t raw_key_size;
1218 u8 *lt_key;
1219 } import_key;
1220 struct {
1221 u8 *lt_key;
1222 } generate_key;
1223 struct {
1224 const u8 *lt_key;
1225 size_t lt_key_size;
1226 u8 *eph_key;
1227 } prepare_key;
1228 };
1229 };
1230
dm_wrappedkey_op_callback(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1231 static int dm_wrappedkey_op_callback(struct dm_target *ti, struct dm_dev *dev,
1232 sector_t start, sector_t len, void *data)
1233 {
1234 struct dm_wrappedkey_op_args *args = data;
1235 struct block_device *bdev = dev->bdev;
1236 struct blk_crypto_profile *profile =
1237 bdev_get_queue(bdev)->crypto_profile;
1238 int err = -EOPNOTSUPP;
1239
1240 if (!args->err)
1241 return 0;
1242
1243 switch (args->op) {
1244 case DERIVE_SW_SECRET:
1245 err = blk_crypto_derive_sw_secret(
1246 bdev,
1247 args->derive_sw_secret.eph_key,
1248 args->derive_sw_secret.eph_key_size,
1249 args->derive_sw_secret.sw_secret);
1250 break;
1251 case IMPORT_KEY:
1252 err = blk_crypto_import_key(profile,
1253 args->import_key.raw_key,
1254 args->import_key.raw_key_size,
1255 args->import_key.lt_key);
1256 break;
1257 case GENERATE_KEY:
1258 err = blk_crypto_generate_key(profile,
1259 args->generate_key.lt_key);
1260 break;
1261 case PREPARE_KEY:
1262 err = blk_crypto_prepare_key(profile,
1263 args->prepare_key.lt_key,
1264 args->prepare_key.lt_key_size,
1265 args->prepare_key.eph_key);
1266 break;
1267 }
1268 args->err = err;
1269
1270 /* Try another device in case this fails. */
1271 return 0;
1272 }
1273
dm_exec_wrappedkey_op(struct blk_crypto_profile * profile,struct dm_wrappedkey_op_args * args)1274 static int dm_exec_wrappedkey_op(struct blk_crypto_profile *profile,
1275 struct dm_wrappedkey_op_args *args)
1276 {
1277 struct mapped_device *md =
1278 container_of(profile, struct dm_crypto_profile, profile)->md;
1279 struct dm_target *ti;
1280 struct dm_table *t;
1281 int srcu_idx;
1282 int i;
1283
1284 args->err = -EOPNOTSUPP;
1285
1286 t = dm_get_live_table(md, &srcu_idx);
1287 if (!t)
1288 goto out;
1289
1290 /*
1291 * blk-crypto currently has no support for multiple incompatible
1292 * implementations of wrapped inline crypto keys on a single system.
1293 * It was already checked earlier that support for wrapped keys was
1294 * declared on all underlying devices. Thus, all the underlying devices
1295 * should support all wrapped key operations and they should behave
1296 * identically, i.e. work with the same keys. So, just executing the
1297 * operation on the first device on which it works suffices for now.
1298 */
1299 for (i = 0; i < t->num_targets; i++) {
1300 ti = dm_table_get_target(t, i);
1301 if (!ti->type->iterate_devices)
1302 continue;
1303 ti->type->iterate_devices(ti, dm_wrappedkey_op_callback, args);
1304 if (!args->err)
1305 break;
1306 }
1307 out:
1308 dm_put_live_table(md, srcu_idx);
1309 return args->err;
1310 }
1311
dm_derive_sw_secret(struct blk_crypto_profile * profile,const u8 * eph_key,size_t eph_key_size,u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE])1312 static int dm_derive_sw_secret(struct blk_crypto_profile *profile,
1313 const u8 *eph_key, size_t eph_key_size,
1314 u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE])
1315 {
1316 struct dm_wrappedkey_op_args args = {
1317 .op = DERIVE_SW_SECRET,
1318 .derive_sw_secret = {
1319 .eph_key = eph_key,
1320 .eph_key_size = eph_key_size,
1321 .sw_secret = sw_secret,
1322 },
1323 };
1324 return dm_exec_wrappedkey_op(profile, &args);
1325 }
1326
dm_import_key(struct blk_crypto_profile * profile,const u8 * raw_key,size_t raw_key_size,u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])1327 static int dm_import_key(struct blk_crypto_profile *profile,
1328 const u8 *raw_key, size_t raw_key_size,
1329 u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
1330 {
1331 struct dm_wrappedkey_op_args args = {
1332 .op = IMPORT_KEY,
1333 .import_key = {
1334 .raw_key = raw_key,
1335 .raw_key_size = raw_key_size,
1336 .lt_key = lt_key,
1337 },
1338 };
1339 return dm_exec_wrappedkey_op(profile, &args);
1340 }
1341
dm_generate_key(struct blk_crypto_profile * profile,u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])1342 static int dm_generate_key(struct blk_crypto_profile *profile,
1343 u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
1344 {
1345 struct dm_wrappedkey_op_args args = {
1346 .op = GENERATE_KEY,
1347 .generate_key = {
1348 .lt_key = lt_key,
1349 },
1350 };
1351 return dm_exec_wrappedkey_op(profile, &args);
1352 }
1353
dm_prepare_key(struct blk_crypto_profile * profile,const u8 * lt_key,size_t lt_key_size,u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])1354 static int dm_prepare_key(struct blk_crypto_profile *profile,
1355 const u8 *lt_key, size_t lt_key_size,
1356 u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
1357 {
1358 struct dm_wrappedkey_op_args args = {
1359 .op = PREPARE_KEY,
1360 .prepare_key = {
1361 .lt_key = lt_key,
1362 .lt_key_size = lt_key_size,
1363 .eph_key = eph_key,
1364 },
1365 };
1366 return dm_exec_wrappedkey_op(profile, &args);
1367 }
1368
1369 static int
device_intersect_crypto_capabilities(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1370 device_intersect_crypto_capabilities(struct dm_target *ti, struct dm_dev *dev,
1371 sector_t start, sector_t len, void *data)
1372 {
1373 struct blk_crypto_profile *parent = data;
1374 struct blk_crypto_profile *child =
1375 bdev_get_queue(dev->bdev)->crypto_profile;
1376
1377 blk_crypto_intersect_capabilities(parent, child);
1378 return 0;
1379 }
1380
dm_destroy_crypto_profile(struct blk_crypto_profile * profile)1381 void dm_destroy_crypto_profile(struct blk_crypto_profile *profile)
1382 {
1383 struct dm_crypto_profile *dmcp = container_of(profile,
1384 struct dm_crypto_profile,
1385 profile);
1386
1387 if (!profile)
1388 return;
1389
1390 blk_crypto_profile_destroy(profile);
1391 kfree(dmcp);
1392 }
1393
dm_table_destroy_crypto_profile(struct dm_table * t)1394 static void dm_table_destroy_crypto_profile(struct dm_table *t)
1395 {
1396 dm_destroy_crypto_profile(t->crypto_profile);
1397 t->crypto_profile = NULL;
1398 }
1399
1400 /*
1401 * Constructs and initializes t->crypto_profile with a crypto profile that
1402 * represents the common set of crypto capabilities of the devices described by
1403 * the dm_table. However, if the constructed crypto profile doesn't support all
1404 * crypto capabilities that are supported by the current mapped_device, it
1405 * returns an error instead, since we don't support removing crypto capabilities
1406 * on table changes. Finally, if the constructed crypto profile is "empty" (has
1407 * no crypto capabilities at all), it just sets t->crypto_profile to NULL.
1408 */
dm_table_construct_crypto_profile(struct dm_table * t)1409 static int dm_table_construct_crypto_profile(struct dm_table *t)
1410 {
1411 struct dm_crypto_profile *dmcp;
1412 struct blk_crypto_profile *profile;
1413 unsigned int i;
1414 bool empty_profile = true;
1415
1416 dmcp = kmalloc(sizeof(*dmcp), GFP_KERNEL);
1417 if (!dmcp)
1418 return -ENOMEM;
1419 dmcp->md = t->md;
1420
1421 profile = &dmcp->profile;
1422 blk_crypto_profile_init(profile, 0);
1423 profile->ll_ops.keyslot_evict = dm_keyslot_evict;
1424 profile->max_dun_bytes_supported = UINT_MAX;
1425 memset(profile->modes_supported, 0xFF,
1426 sizeof(profile->modes_supported));
1427 profile->key_types_supported = ~0;
1428
1429 for (i = 0; i < t->num_targets; i++) {
1430 struct dm_target *ti = dm_table_get_target(t, i);
1431
1432 if (!dm_target_passes_crypto(ti->type)) {
1433 blk_crypto_intersect_capabilities(profile, NULL);
1434 break;
1435 }
1436 if (!ti->type->iterate_devices)
1437 continue;
1438 ti->type->iterate_devices(ti,
1439 device_intersect_crypto_capabilities,
1440 profile);
1441 }
1442
1443 if (profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED) {
1444 profile->ll_ops.derive_sw_secret = dm_derive_sw_secret;
1445 profile->ll_ops.import_key = dm_import_key;
1446 profile->ll_ops.generate_key = dm_generate_key;
1447 profile->ll_ops.prepare_key = dm_prepare_key;
1448 }
1449
1450 if (t->md->queue &&
1451 !blk_crypto_has_capabilities(profile,
1452 t->md->queue->crypto_profile)) {
1453 DMERR("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!");
1454 dm_destroy_crypto_profile(profile);
1455 return -EINVAL;
1456 }
1457
1458 /*
1459 * If the new profile doesn't actually support any crypto capabilities,
1460 * we may as well represent it with a NULL profile.
1461 */
1462 for (i = 0; i < ARRAY_SIZE(profile->modes_supported); i++) {
1463 if (profile->modes_supported[i]) {
1464 empty_profile = false;
1465 break;
1466 }
1467 }
1468
1469 if (empty_profile) {
1470 dm_destroy_crypto_profile(profile);
1471 profile = NULL;
1472 }
1473
1474 /*
1475 * t->crypto_profile is only set temporarily while the table is being
1476 * set up, and it gets set to NULL after the profile has been
1477 * transferred to the request_queue.
1478 */
1479 t->crypto_profile = profile;
1480
1481 return 0;
1482 }
1483
dm_update_crypto_profile(struct request_queue * q,struct dm_table * t)1484 static void dm_update_crypto_profile(struct request_queue *q,
1485 struct dm_table *t)
1486 {
1487 if (!t->crypto_profile)
1488 return;
1489
1490 /* Make the crypto profile less restrictive. */
1491 if (!q->crypto_profile) {
1492 blk_crypto_register(t->crypto_profile, q);
1493 } else {
1494 blk_crypto_update_capabilities(q->crypto_profile,
1495 t->crypto_profile);
1496 dm_destroy_crypto_profile(t->crypto_profile);
1497 }
1498 t->crypto_profile = NULL;
1499 }
1500
1501 #else /* CONFIG_BLK_INLINE_ENCRYPTION */
1502
dm_table_construct_crypto_profile(struct dm_table * t)1503 static int dm_table_construct_crypto_profile(struct dm_table *t)
1504 {
1505 return 0;
1506 }
1507
dm_destroy_crypto_profile(struct blk_crypto_profile * profile)1508 void dm_destroy_crypto_profile(struct blk_crypto_profile *profile)
1509 {
1510 }
1511
dm_table_destroy_crypto_profile(struct dm_table * t)1512 static void dm_table_destroy_crypto_profile(struct dm_table *t)
1513 {
1514 }
1515
dm_update_crypto_profile(struct request_queue * q,struct dm_table * t)1516 static void dm_update_crypto_profile(struct request_queue *q,
1517 struct dm_table *t)
1518 {
1519 }
1520
1521 #endif /* !CONFIG_BLK_INLINE_ENCRYPTION */
1522
1523 /*
1524 * Prepares the table for use by building the indices,
1525 * setting the type, and allocating mempools.
1526 */
dm_table_complete(struct dm_table * t)1527 int dm_table_complete(struct dm_table *t)
1528 {
1529 int r;
1530
1531 r = dm_table_determine_type(t);
1532 if (r) {
1533 DMERR("unable to determine table type");
1534 return r;
1535 }
1536
1537 r = dm_table_build_index(t);
1538 if (r) {
1539 DMERR("unable to build btrees");
1540 return r;
1541 }
1542
1543 r = dm_table_construct_crypto_profile(t);
1544 if (r) {
1545 DMERR("could not construct crypto profile.");
1546 return r;
1547 }
1548
1549 r = dm_table_alloc_md_mempools(t, t->md);
1550 if (r)
1551 DMERR("unable to allocate mempools");
1552
1553 return r;
1554 }
1555
1556 static DEFINE_MUTEX(_event_lock);
dm_table_event_callback(struct dm_table * t,void (* fn)(void *),void * context)1557 void dm_table_event_callback(struct dm_table *t,
1558 void (*fn)(void *), void *context)
1559 {
1560 mutex_lock(&_event_lock);
1561 t->event_fn = fn;
1562 t->event_context = context;
1563 mutex_unlock(&_event_lock);
1564 }
1565
dm_table_event(struct dm_table * t)1566 void dm_table_event(struct dm_table *t)
1567 {
1568 mutex_lock(&_event_lock);
1569 if (t->event_fn)
1570 t->event_fn(t->event_context);
1571 mutex_unlock(&_event_lock);
1572 }
1573 EXPORT_SYMBOL(dm_table_event);
1574
dm_table_get_size(struct dm_table * t)1575 inline sector_t dm_table_get_size(struct dm_table *t)
1576 {
1577 return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
1578 }
1579 EXPORT_SYMBOL(dm_table_get_size);
1580
1581 /*
1582 * Search the btree for the correct target.
1583 *
1584 * Caller should check returned pointer for NULL
1585 * to trap I/O beyond end of device.
1586 */
dm_table_find_target(struct dm_table * t,sector_t sector)1587 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
1588 {
1589 unsigned int l, n = 0, k = 0;
1590 sector_t *node;
1591
1592 if (unlikely(sector >= dm_table_get_size(t)))
1593 return NULL;
1594
1595 for (l = 0; l < t->depth; l++) {
1596 n = get_child(n, k);
1597 node = get_node(t, l, n);
1598
1599 for (k = 0; k < KEYS_PER_NODE; k++)
1600 if (node[k] >= sector)
1601 break;
1602 }
1603
1604 return &t->targets[(KEYS_PER_NODE * n) + k];
1605 }
1606
1607 /*
1608 * type->iterate_devices() should be called when the sanity check needs to
1609 * iterate and check all underlying data devices. iterate_devices() will
1610 * iterate all underlying data devices until it encounters a non-zero return
1611 * code, returned by whether the input iterate_devices_callout_fn, or
1612 * iterate_devices() itself internally.
1613 *
1614 * For some target type (e.g. dm-stripe), one call of iterate_devices() may
1615 * iterate multiple underlying devices internally, in which case a non-zero
1616 * return code returned by iterate_devices_callout_fn will stop the iteration
1617 * in advance.
1618 *
1619 * Cases requiring _any_ underlying device supporting some kind of attribute,
1620 * should use the iteration structure like dm_table_any_dev_attr(), or call
1621 * it directly. @func should handle semantics of positive examples, e.g.
1622 * capable of something.
1623 *
1624 * Cases requiring _all_ underlying devices supporting some kind of attribute,
1625 * should use the iteration structure like dm_table_supports_nowait() or
1626 * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that
1627 * uses an @anti_func that handle semantics of counter examples, e.g. not
1628 * capable of something. So: return !dm_table_any_dev_attr(t, anti_func, data);
1629 */
dm_table_any_dev_attr(struct dm_table * t,iterate_devices_callout_fn func,void * data)1630 static bool dm_table_any_dev_attr(struct dm_table *t,
1631 iterate_devices_callout_fn func, void *data)
1632 {
1633 for (unsigned int i = 0; i < t->num_targets; i++) {
1634 struct dm_target *ti = dm_table_get_target(t, i);
1635
1636 if (ti->type->iterate_devices &&
1637 ti->type->iterate_devices(ti, func, data))
1638 return true;
1639 }
1640
1641 return false;
1642 }
1643
count_device(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1644 static int count_device(struct dm_target *ti, struct dm_dev *dev,
1645 sector_t start, sector_t len, void *data)
1646 {
1647 unsigned int *num_devices = data;
1648
1649 (*num_devices)++;
1650
1651 return 0;
1652 }
1653
1654 /*
1655 * Check whether a table has no data devices attached using each
1656 * target's iterate_devices method.
1657 * Returns false if the result is unknown because a target doesn't
1658 * support iterate_devices.
1659 */
dm_table_has_no_data_devices(struct dm_table * t)1660 bool dm_table_has_no_data_devices(struct dm_table *t)
1661 {
1662 for (unsigned int i = 0; i < t->num_targets; i++) {
1663 struct dm_target *ti = dm_table_get_target(t, i);
1664 unsigned int num_devices = 0;
1665
1666 if (!ti->type->iterate_devices)
1667 return false;
1668
1669 ti->type->iterate_devices(ti, count_device, &num_devices);
1670 if (num_devices)
1671 return false;
1672 }
1673
1674 return true;
1675 }
1676
dm_table_is_wildcard(struct dm_table * t)1677 bool dm_table_is_wildcard(struct dm_table *t)
1678 {
1679 for (unsigned int i = 0; i < t->num_targets; i++) {
1680 struct dm_target *ti = dm_table_get_target(t, i);
1681
1682 if (!dm_target_is_wildcard(ti->type))
1683 return false;
1684 }
1685
1686 return true;
1687 }
1688
device_not_zoned(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1689 static int device_not_zoned(struct dm_target *ti, struct dm_dev *dev,
1690 sector_t start, sector_t len, void *data)
1691 {
1692 bool *zoned = data;
1693
1694 return bdev_is_zoned(dev->bdev) != *zoned;
1695 }
1696
device_is_zoned_model(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1697 static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev,
1698 sector_t start, sector_t len, void *data)
1699 {
1700 return bdev_is_zoned(dev->bdev);
1701 }
1702
1703 /*
1704 * Check the device zoned model based on the target feature flag. If the target
1705 * has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are
1706 * also accepted but all devices must have the same zoned model. If the target
1707 * has the DM_TARGET_MIXED_ZONED_MODEL feature set, the devices can have any
1708 * zoned model with all zoned devices having the same zone size.
1709 */
dm_table_supports_zoned(struct dm_table * t,bool zoned)1710 static bool dm_table_supports_zoned(struct dm_table *t, bool zoned)
1711 {
1712 for (unsigned int i = 0; i < t->num_targets; i++) {
1713 struct dm_target *ti = dm_table_get_target(t, i);
1714
1715 /*
1716 * For the wildcard target (dm-error), if we do not have a
1717 * backing device, we must always return false. If we have a
1718 * backing device, the result must depend on checking zoned
1719 * model, like for any other target. So for this, check directly
1720 * if the target backing device is zoned as we get "false" when
1721 * dm-error was set without a backing device.
1722 */
1723 if (dm_target_is_wildcard(ti->type) &&
1724 !ti->type->iterate_devices(ti, device_is_zoned_model, NULL))
1725 return false;
1726
1727 if (dm_target_supports_zoned_hm(ti->type)) {
1728 if (!ti->type->iterate_devices ||
1729 ti->type->iterate_devices(ti, device_not_zoned,
1730 &zoned))
1731 return false;
1732 } else if (!dm_target_supports_mixed_zoned_model(ti->type)) {
1733 if (zoned)
1734 return false;
1735 }
1736 }
1737
1738 return true;
1739 }
1740
device_not_matches_zone_sectors(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1741 static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev,
1742 sector_t start, sector_t len, void *data)
1743 {
1744 unsigned int *zone_sectors = data;
1745
1746 if (!bdev_is_zoned(dev->bdev))
1747 return 0;
1748 return bdev_zone_sectors(dev->bdev) != *zone_sectors;
1749 }
1750
1751 /*
1752 * Check consistency of zoned model and zone sectors across all targets. For
1753 * zone sectors, if the destination device is a zoned block device, it shall
1754 * have the specified zone_sectors.
1755 */
validate_hardware_zoned(struct dm_table * t,bool zoned,unsigned int zone_sectors)1756 static int validate_hardware_zoned(struct dm_table *t, bool zoned,
1757 unsigned int zone_sectors)
1758 {
1759 if (!zoned)
1760 return 0;
1761
1762 if (!dm_table_supports_zoned(t, zoned)) {
1763 DMERR("%s: zoned model is not consistent across all devices",
1764 dm_device_name(t->md));
1765 return -EINVAL;
1766 }
1767
1768 /* Check zone size validity and compatibility */
1769 if (!zone_sectors || !is_power_of_2(zone_sectors))
1770 return -EINVAL;
1771
1772 if (dm_table_any_dev_attr(t, device_not_matches_zone_sectors, &zone_sectors)) {
1773 DMERR("%s: zone sectors is not consistent across all zoned devices",
1774 dm_device_name(t->md));
1775 return -EINVAL;
1776 }
1777
1778 return 0;
1779 }
1780
1781 /*
1782 * Establish the new table's queue_limits and validate them.
1783 */
dm_calculate_queue_limits(struct dm_table * t,struct queue_limits * limits)1784 int dm_calculate_queue_limits(struct dm_table *t,
1785 struct queue_limits *limits)
1786 {
1787 struct queue_limits ti_limits;
1788 unsigned int zone_sectors = 0;
1789 bool zoned = false;
1790
1791 dm_set_stacking_limits(limits);
1792
1793 t->integrity_supported = true;
1794 for (unsigned int i = 0; i < t->num_targets; i++) {
1795 struct dm_target *ti = dm_table_get_target(t, i);
1796
1797 if (!dm_target_passes_integrity(ti->type))
1798 t->integrity_supported = false;
1799 }
1800
1801 for (unsigned int i = 0; i < t->num_targets; i++) {
1802 struct dm_target *ti = dm_table_get_target(t, i);
1803
1804 dm_set_stacking_limits(&ti_limits);
1805
1806 if (!ti->type->iterate_devices) {
1807 /* Set I/O hints portion of queue limits */
1808 if (ti->type->io_hints)
1809 ti->type->io_hints(ti, &ti_limits);
1810 goto combine_limits;
1811 }
1812
1813 /*
1814 * Combine queue limits of all the devices this target uses.
1815 */
1816 ti->type->iterate_devices(ti, dm_set_device_limits,
1817 &ti_limits);
1818
1819 if (!zoned && (ti_limits.features & BLK_FEAT_ZONED)) {
1820 /*
1821 * After stacking all limits, validate all devices
1822 * in table support this zoned model and zone sectors.
1823 */
1824 zoned = (ti_limits.features & BLK_FEAT_ZONED);
1825 zone_sectors = ti_limits.chunk_sectors;
1826 }
1827
1828 /* Set I/O hints portion of queue limits */
1829 if (ti->type->io_hints)
1830 ti->type->io_hints(ti, &ti_limits);
1831
1832 /*
1833 * Check each device area is consistent with the target's
1834 * overall queue limits.
1835 */
1836 if (ti->type->iterate_devices(ti, device_area_is_invalid,
1837 &ti_limits))
1838 return -EINVAL;
1839
1840 combine_limits:
1841 /*
1842 * Merge this target's queue limits into the overall limits
1843 * for the table.
1844 */
1845 if (blk_stack_limits(limits, &ti_limits, 0) < 0)
1846 DMWARN("%s: adding target device (start sect %llu len %llu) "
1847 "caused an alignment inconsistency",
1848 dm_device_name(t->md),
1849 (unsigned long long) ti->begin,
1850 (unsigned long long) ti->len);
1851
1852 if (t->integrity_supported ||
1853 dm_target_has_integrity(ti->type)) {
1854 if (!queue_limits_stack_integrity(limits, &ti_limits)) {
1855 DMWARN("%s: adding target device (start sect %llu len %llu) "
1856 "disabled integrity support due to incompatibility",
1857 dm_device_name(t->md),
1858 (unsigned long long) ti->begin,
1859 (unsigned long long) ti->len);
1860 t->integrity_supported = false;
1861 }
1862 }
1863 }
1864
1865 /*
1866 * Verify that the zoned model and zone sectors, as determined before
1867 * any .io_hints override, are the same across all devices in the table.
1868 * - this is especially relevant if .io_hints is emulating a disk-managed
1869 * zoned model on host-managed zoned block devices.
1870 * BUT...
1871 */
1872 if (limits->features & BLK_FEAT_ZONED) {
1873 /*
1874 * ...IF the above limits stacking determined a zoned model
1875 * validate that all of the table's devices conform to it.
1876 */
1877 zoned = limits->features & BLK_FEAT_ZONED;
1878 zone_sectors = limits->chunk_sectors;
1879 }
1880 if (validate_hardware_zoned(t, zoned, zone_sectors))
1881 return -EINVAL;
1882
1883 return validate_hardware_logical_block_alignment(t, limits);
1884 }
1885
1886 /*
1887 * Check if a target requires flush support even if none of the underlying
1888 * devices need it (e.g. to persist target-specific metadata).
1889 */
dm_table_supports_flush(struct dm_table * t)1890 static bool dm_table_supports_flush(struct dm_table *t)
1891 {
1892 for (unsigned int i = 0; i < t->num_targets; i++) {
1893 struct dm_target *ti = dm_table_get_target(t, i);
1894
1895 if (ti->num_flush_bios && ti->flush_supported)
1896 return true;
1897 }
1898
1899 return false;
1900 }
1901
device_dax_write_cache_enabled(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1902 static int device_dax_write_cache_enabled(struct dm_target *ti,
1903 struct dm_dev *dev, sector_t start,
1904 sector_t len, void *data)
1905 {
1906 struct dax_device *dax_dev = dev->dax_dev;
1907
1908 if (!dax_dev)
1909 return false;
1910
1911 if (dax_write_cache_enabled(dax_dev))
1912 return true;
1913 return false;
1914 }
1915
device_not_write_zeroes_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1916 static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev,
1917 sector_t start, sector_t len, void *data)
1918 {
1919 struct request_queue *q = bdev_get_queue(dev->bdev);
1920 int b;
1921
1922 mutex_lock(&q->limits_lock);
1923 b = !q->limits.max_write_zeroes_sectors;
1924 mutex_unlock(&q->limits_lock);
1925 return b;
1926 }
1927
dm_table_supports_write_zeroes(struct dm_table * t)1928 static bool dm_table_supports_write_zeroes(struct dm_table *t)
1929 {
1930 for (unsigned int i = 0; i < t->num_targets; i++) {
1931 struct dm_target *ti = dm_table_get_target(t, i);
1932
1933 if (!ti->num_write_zeroes_bios)
1934 return false;
1935
1936 if (!ti->type->iterate_devices ||
1937 ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL))
1938 return false;
1939 }
1940
1941 return true;
1942 }
1943
dm_table_supports_nowait(struct dm_table * t)1944 static bool dm_table_supports_nowait(struct dm_table *t)
1945 {
1946 for (unsigned int i = 0; i < t->num_targets; i++) {
1947 struct dm_target *ti = dm_table_get_target(t, i);
1948
1949 if (!dm_target_supports_nowait(ti->type))
1950 return false;
1951 }
1952
1953 return true;
1954 }
1955
device_not_discard_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1956 static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev,
1957 sector_t start, sector_t len, void *data)
1958 {
1959 return !bdev_max_discard_sectors(dev->bdev);
1960 }
1961
dm_table_supports_discards(struct dm_table * t)1962 static bool dm_table_supports_discards(struct dm_table *t)
1963 {
1964 for (unsigned int i = 0; i < t->num_targets; i++) {
1965 struct dm_target *ti = dm_table_get_target(t, i);
1966
1967 if (!ti->num_discard_bios)
1968 return false;
1969
1970 /*
1971 * Either the target provides discard support (as implied by setting
1972 * 'discards_supported') or it relies on _all_ data devices having
1973 * discard support.
1974 */
1975 if (!ti->discards_supported &&
1976 (!ti->type->iterate_devices ||
1977 ti->type->iterate_devices(ti, device_not_discard_capable, NULL)))
1978 return false;
1979 }
1980
1981 return true;
1982 }
1983
device_not_secure_erase_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1984 static int device_not_secure_erase_capable(struct dm_target *ti,
1985 struct dm_dev *dev, sector_t start,
1986 sector_t len, void *data)
1987 {
1988 return !bdev_max_secure_erase_sectors(dev->bdev);
1989 }
1990
dm_table_supports_secure_erase(struct dm_table * t)1991 static bool dm_table_supports_secure_erase(struct dm_table *t)
1992 {
1993 for (unsigned int i = 0; i < t->num_targets; i++) {
1994 struct dm_target *ti = dm_table_get_target(t, i);
1995
1996 if (!ti->num_secure_erase_bios)
1997 return false;
1998
1999 if (!ti->type->iterate_devices ||
2000 ti->type->iterate_devices(ti, device_not_secure_erase_capable, NULL))
2001 return false;
2002 }
2003
2004 return true;
2005 }
2006
device_not_atomic_write_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)2007 static int device_not_atomic_write_capable(struct dm_target *ti,
2008 struct dm_dev *dev, sector_t start,
2009 sector_t len, void *data)
2010 {
2011 return !bdev_can_atomic_write(dev->bdev);
2012 }
2013
dm_table_supports_atomic_writes(struct dm_table * t)2014 static bool dm_table_supports_atomic_writes(struct dm_table *t)
2015 {
2016 for (unsigned int i = 0; i < t->num_targets; i++) {
2017 struct dm_target *ti = dm_table_get_target(t, i);
2018
2019 if (!dm_target_supports_atomic_writes(ti->type))
2020 return false;
2021
2022 if (!ti->type->iterate_devices)
2023 return false;
2024
2025 if (ti->type->iterate_devices(ti,
2026 device_not_atomic_write_capable, NULL)) {
2027 return false;
2028 }
2029 }
2030 return true;
2031 }
2032
dm_table_supports_size_change(struct dm_table * t,sector_t old_size,sector_t new_size)2033 bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size,
2034 sector_t new_size)
2035 {
2036 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && dm_has_zone_plugs(t->md) &&
2037 old_size != new_size) {
2038 DMWARN("%s: device has zone write plug resources. "
2039 "Cannot change size",
2040 dm_device_name(t->md));
2041 return false;
2042 }
2043 return true;
2044 }
2045
dm_table_set_restrictions(struct dm_table * t,struct request_queue * q,struct queue_limits * limits)2046 int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
2047 struct queue_limits *limits)
2048 {
2049 int r;
2050 struct queue_limits old_limits;
2051
2052 if (!dm_table_supports_nowait(t))
2053 limits->features &= ~BLK_FEAT_NOWAIT;
2054
2055 /*
2056 * The current polling impementation does not support request based
2057 * stacking.
2058 */
2059 if (!__table_type_bio_based(t->type))
2060 limits->features &= ~BLK_FEAT_POLL;
2061
2062 if (!dm_table_supports_discards(t)) {
2063 limits->max_hw_discard_sectors = 0;
2064 limits->discard_granularity = 0;
2065 limits->discard_alignment = 0;
2066 }
2067
2068 if (!dm_table_supports_write_zeroes(t))
2069 limits->max_write_zeroes_sectors = 0;
2070
2071 if (!dm_table_supports_secure_erase(t))
2072 limits->max_secure_erase_sectors = 0;
2073
2074 if (dm_table_supports_flush(t))
2075 limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
2076
2077 if (dm_table_supports_dax(t, device_not_dax_capable))
2078 limits->features |= BLK_FEAT_DAX;
2079 else
2080 limits->features &= ~BLK_FEAT_DAX;
2081
2082 /* For a zoned table, setup the zone related queue attributes. */
2083 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
2084 if (limits->features & BLK_FEAT_ZONED) {
2085 r = dm_set_zones_restrictions(t, q, limits);
2086 if (r)
2087 return r;
2088 } else if (dm_has_zone_plugs(t->md)) {
2089 DMWARN("%s: device has zone write plug resources. "
2090 "Cannot switch to non-zoned table.",
2091 dm_device_name(t->md));
2092 return -EINVAL;
2093 }
2094 }
2095
2096 if (dm_table_supports_atomic_writes(t))
2097 limits->features |= BLK_FEAT_ATOMIC_WRITES;
2098
2099 old_limits = queue_limits_start_update(q);
2100 r = queue_limits_commit_update(q, limits);
2101 if (r)
2102 return r;
2103
2104 /*
2105 * Now that the limits are set, check the zones mapped by the table
2106 * and setup the resources for zone append emulation if necessary.
2107 */
2108 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
2109 (limits->features & BLK_FEAT_ZONED)) {
2110 r = dm_revalidate_zones(t, q);
2111 if (r) {
2112 queue_limits_set(q, &old_limits);
2113 return r;
2114 }
2115 }
2116
2117 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
2118 dm_finalize_zone_settings(t, limits);
2119
2120 if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
2121 set_dax_synchronous(t->md->dax_dev);
2122
2123 if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
2124 dax_write_cache(t->md->dax_dev, true);
2125
2126 dm_update_crypto_profile(q, t);
2127 return 0;
2128 }
2129
dm_table_get_devices(struct dm_table * t)2130 struct list_head *dm_table_get_devices(struct dm_table *t)
2131 {
2132 return &t->devices;
2133 }
2134
dm_table_get_mode(struct dm_table * t)2135 blk_mode_t dm_table_get_mode(struct dm_table *t)
2136 {
2137 return t->mode;
2138 }
2139 EXPORT_SYMBOL(dm_table_get_mode);
2140
2141 enum suspend_mode {
2142 PRESUSPEND,
2143 PRESUSPEND_UNDO,
2144 POSTSUSPEND,
2145 };
2146
suspend_targets(struct dm_table * t,enum suspend_mode mode)2147 static void suspend_targets(struct dm_table *t, enum suspend_mode mode)
2148 {
2149 lockdep_assert_held(&t->md->suspend_lock);
2150
2151 for (unsigned int i = 0; i < t->num_targets; i++) {
2152 struct dm_target *ti = dm_table_get_target(t, i);
2153
2154 switch (mode) {
2155 case PRESUSPEND:
2156 if (ti->type->presuspend)
2157 ti->type->presuspend(ti);
2158 break;
2159 case PRESUSPEND_UNDO:
2160 if (ti->type->presuspend_undo)
2161 ti->type->presuspend_undo(ti);
2162 break;
2163 case POSTSUSPEND:
2164 if (ti->type->postsuspend)
2165 ti->type->postsuspend(ti);
2166 break;
2167 }
2168 }
2169 }
2170
dm_table_presuspend_targets(struct dm_table * t)2171 void dm_table_presuspend_targets(struct dm_table *t)
2172 {
2173 if (!t)
2174 return;
2175
2176 suspend_targets(t, PRESUSPEND);
2177 }
2178
dm_table_presuspend_undo_targets(struct dm_table * t)2179 void dm_table_presuspend_undo_targets(struct dm_table *t)
2180 {
2181 if (!t)
2182 return;
2183
2184 suspend_targets(t, PRESUSPEND_UNDO);
2185 }
2186
dm_table_postsuspend_targets(struct dm_table * t)2187 void dm_table_postsuspend_targets(struct dm_table *t)
2188 {
2189 if (!t)
2190 return;
2191
2192 suspend_targets(t, POSTSUSPEND);
2193 }
2194
dm_table_resume_targets(struct dm_table * t)2195 int dm_table_resume_targets(struct dm_table *t)
2196 {
2197 unsigned int i;
2198 int r = 0;
2199
2200 lockdep_assert_held(&t->md->suspend_lock);
2201
2202 for (i = 0; i < t->num_targets; i++) {
2203 struct dm_target *ti = dm_table_get_target(t, i);
2204
2205 if (!ti->type->preresume)
2206 continue;
2207
2208 r = ti->type->preresume(ti);
2209 if (r) {
2210 DMERR("%s: %s: preresume failed, error = %d",
2211 dm_device_name(t->md), ti->type->name, r);
2212 return r;
2213 }
2214 }
2215
2216 for (i = 0; i < t->num_targets; i++) {
2217 struct dm_target *ti = dm_table_get_target(t, i);
2218
2219 if (ti->type->resume)
2220 ti->type->resume(ti);
2221 }
2222
2223 return 0;
2224 }
2225
dm_table_get_md(struct dm_table * t)2226 struct mapped_device *dm_table_get_md(struct dm_table *t)
2227 {
2228 return t->md;
2229 }
2230 EXPORT_SYMBOL(dm_table_get_md);
2231
dm_table_device_name(struct dm_table * t)2232 const char *dm_table_device_name(struct dm_table *t)
2233 {
2234 return dm_device_name(t->md);
2235 }
2236 EXPORT_SYMBOL_GPL(dm_table_device_name);
2237
dm_table_run_md_queue_async(struct dm_table * t)2238 void dm_table_run_md_queue_async(struct dm_table *t)
2239 {
2240 if (!dm_table_request_based(t))
2241 return;
2242
2243 if (t->md->queue)
2244 blk_mq_run_hw_queues(t->md->queue, true);
2245 }
2246 EXPORT_SYMBOL(dm_table_run_md_queue_async);
2247
2248