xref: /linux/drivers/md/dm-table.c (revision 3349e275067f94ffb4141989aed9cbae7409429b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2001 Sistina Software (UK) Limited.
4  * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
5  *
6  * This file is released under the GPL.
7  */
8 
9 #include "dm-core.h"
10 #include "dm-rq.h"
11 
12 #include <linux/module.h>
13 #include <linux/vmalloc.h>
14 #include <linux/blkdev.h>
15 #include <linux/blk-integrity.h>
16 #include <linux/namei.h>
17 #include <linux/ctype.h>
18 #include <linux/string.h>
19 #include <linux/slab.h>
20 #include <linux/interrupt.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/atomic.h>
24 #include <linux/blk-mq.h>
25 #include <linux/mount.h>
26 #include <linux/dax.h>
27 
28 #define DM_MSG_PREFIX "table"
29 
30 #define NODE_SIZE L1_CACHE_BYTES
31 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
32 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
33 
34 /*
35  * Similar to ceiling(log_size(n))
36  */
int_log(unsigned int n,unsigned int base)37 static unsigned int int_log(unsigned int n, unsigned int base)
38 {
39 	int result = 0;
40 
41 	while (n > 1) {
42 		n = dm_div_up(n, base);
43 		result++;
44 	}
45 
46 	return result;
47 }
48 
49 /*
50  * Calculate the index of the child node of the n'th node k'th key.
51  */
get_child(unsigned int n,unsigned int k)52 static inline unsigned int get_child(unsigned int n, unsigned int k)
53 {
54 	return (n * CHILDREN_PER_NODE) + k;
55 }
56 
57 /*
58  * Return the n'th node of level l from table t.
59  */
get_node(struct dm_table * t,unsigned int l,unsigned int n)60 static inline sector_t *get_node(struct dm_table *t,
61 				 unsigned int l, unsigned int n)
62 {
63 	return t->index[l] + (n * KEYS_PER_NODE);
64 }
65 
66 /*
67  * Return the highest key that you could lookup from the n'th
68  * node on level l of the btree.
69  */
high(struct dm_table * t,unsigned int l,unsigned int n)70 static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
71 {
72 	for (; l < t->depth - 1; l++)
73 		n = get_child(n, CHILDREN_PER_NODE - 1);
74 
75 	if (n >= t->counts[l])
76 		return (sector_t) -1;
77 
78 	return get_node(t, l, n)[KEYS_PER_NODE - 1];
79 }
80 
81 /*
82  * Fills in a level of the btree based on the highs of the level
83  * below it.
84  */
setup_btree_index(unsigned int l,struct dm_table * t)85 static int setup_btree_index(unsigned int l, struct dm_table *t)
86 {
87 	unsigned int n, k;
88 	sector_t *node;
89 
90 	for (n = 0U; n < t->counts[l]; n++) {
91 		node = get_node(t, l, n);
92 
93 		for (k = 0U; k < KEYS_PER_NODE; k++)
94 			node[k] = high(t, l + 1, get_child(n, k));
95 	}
96 
97 	return 0;
98 }
99 
100 /*
101  * highs, and targets are managed as dynamic arrays during a
102  * table load.
103  */
alloc_targets(struct dm_table * t,unsigned int num)104 static int alloc_targets(struct dm_table *t, unsigned int num)
105 {
106 	sector_t *n_highs;
107 	struct dm_target *n_targets;
108 
109 	/*
110 	 * Allocate both the target array and offset array at once.
111 	 */
112 	n_highs = kvcalloc(num, sizeof(struct dm_target) + sizeof(sector_t),
113 			   GFP_KERNEL);
114 	if (!n_highs)
115 		return -ENOMEM;
116 
117 	n_targets = (struct dm_target *) (n_highs + num);
118 
119 	memset(n_highs, -1, sizeof(*n_highs) * num);
120 
121 	t->num_allocated = num;
122 	t->highs = n_highs;
123 	t->targets = n_targets;
124 
125 	return 0;
126 }
127 
dm_table_create(struct dm_table ** result,blk_mode_t mode,unsigned int num_targets,struct mapped_device * md)128 int dm_table_create(struct dm_table **result, blk_mode_t mode,
129 		    unsigned int num_targets, struct mapped_device *md)
130 {
131 	struct dm_table *t;
132 
133 	if (num_targets > DM_MAX_TARGETS)
134 		return -EOVERFLOW;
135 
136 	t = kzalloc(sizeof(*t), GFP_KERNEL);
137 
138 	if (!t)
139 		return -ENOMEM;
140 
141 	INIT_LIST_HEAD(&t->devices);
142 	init_rwsem(&t->devices_lock);
143 
144 	if (!num_targets)
145 		num_targets = KEYS_PER_NODE;
146 
147 	num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
148 
149 	if (!num_targets) {
150 		kfree(t);
151 		return -EOVERFLOW;
152 	}
153 
154 	if (alloc_targets(t, num_targets)) {
155 		kfree(t);
156 		return -ENOMEM;
157 	}
158 
159 	t->type = DM_TYPE_NONE;
160 	t->mode = mode;
161 	t->md = md;
162 	t->flush_bypasses_map = true;
163 	*result = t;
164 	return 0;
165 }
166 
free_devices(struct list_head * devices,struct mapped_device * md)167 static void free_devices(struct list_head *devices, struct mapped_device *md)
168 {
169 	struct list_head *tmp, *next;
170 
171 	list_for_each_safe(tmp, next, devices) {
172 		struct dm_dev_internal *dd =
173 		    list_entry(tmp, struct dm_dev_internal, list);
174 		DMWARN("%s: dm_table_destroy: dm_put_device call missing for %s",
175 		       dm_device_name(md), dd->dm_dev->name);
176 		dm_put_table_device(md, dd->dm_dev);
177 		kfree(dd);
178 	}
179 }
180 
181 static void dm_table_destroy_crypto_profile(struct dm_table *t);
182 
dm_table_destroy(struct dm_table * t)183 void dm_table_destroy(struct dm_table *t)
184 {
185 	if (!t)
186 		return;
187 
188 	/* free the indexes */
189 	if (t->depth >= 2)
190 		kvfree(t->index[t->depth - 2]);
191 
192 	/* free the targets */
193 	for (unsigned int i = 0; i < t->num_targets; i++) {
194 		struct dm_target *ti = dm_table_get_target(t, i);
195 
196 		if (ti->type->dtr)
197 			ti->type->dtr(ti);
198 
199 		dm_put_target_type(ti->type);
200 	}
201 
202 	kvfree(t->highs);
203 
204 	/* free the device list */
205 	free_devices(&t->devices, t->md);
206 
207 	dm_free_md_mempools(t->mempools);
208 
209 	dm_table_destroy_crypto_profile(t);
210 
211 	kfree(t);
212 }
213 
214 /*
215  * See if we've already got a device in the list.
216  */
find_device(struct list_head * l,dev_t dev)217 static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
218 {
219 	struct dm_dev_internal *dd;
220 
221 	list_for_each_entry(dd, l, list)
222 		if (dd->dm_dev->bdev->bd_dev == dev)
223 			return dd;
224 
225 	return NULL;
226 }
227 
228 /*
229  * If possible, this checks an area of a destination device is invalid.
230  */
device_area_is_invalid(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)231 static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
232 				  sector_t start, sector_t len, void *data)
233 {
234 	struct queue_limits *limits = data;
235 	struct block_device *bdev = dev->bdev;
236 	sector_t dev_size = bdev_nr_sectors(bdev);
237 	unsigned short logical_block_size_sectors =
238 		limits->logical_block_size >> SECTOR_SHIFT;
239 
240 	if (!dev_size)
241 		return 0;
242 
243 	if ((start >= dev_size) || (start + len > dev_size)) {
244 		DMERR("%s: %pg too small for target: start=%llu, len=%llu, dev_size=%llu",
245 		      dm_device_name(ti->table->md), bdev,
246 		      (unsigned long long)start,
247 		      (unsigned long long)len,
248 		      (unsigned long long)dev_size);
249 		return 1;
250 	}
251 
252 	/*
253 	 * If the target is mapped to zoned block device(s), check
254 	 * that the zones are not partially mapped.
255 	 */
256 	if (bdev_is_zoned(bdev)) {
257 		unsigned int zone_sectors = bdev_zone_sectors(bdev);
258 
259 		if (!bdev_is_zone_aligned(bdev, start)) {
260 			DMERR("%s: start=%llu not aligned to h/w zone size %u of %pg",
261 			      dm_device_name(ti->table->md),
262 			      (unsigned long long)start,
263 			      zone_sectors, bdev);
264 			return 1;
265 		}
266 
267 		/*
268 		 * Note: The last zone of a zoned block device may be smaller
269 		 * than other zones. So for a target mapping the end of a
270 		 * zoned block device with such a zone, len would not be zone
271 		 * aligned. We do not allow such last smaller zone to be part
272 		 * of the mapping here to ensure that mappings with multiple
273 		 * devices do not end up with a smaller zone in the middle of
274 		 * the sector range.
275 		 */
276 		if (!bdev_is_zone_aligned(bdev, len)) {
277 			DMERR("%s: len=%llu not aligned to h/w zone size %u of %pg",
278 			      dm_device_name(ti->table->md),
279 			      (unsigned long long)len,
280 			      zone_sectors, bdev);
281 			return 1;
282 		}
283 	}
284 
285 	if (logical_block_size_sectors <= 1)
286 		return 0;
287 
288 	if (start & (logical_block_size_sectors - 1)) {
289 		DMERR("%s: start=%llu not aligned to h/w logical block size %u of %pg",
290 		      dm_device_name(ti->table->md),
291 		      (unsigned long long)start,
292 		      limits->logical_block_size, bdev);
293 		return 1;
294 	}
295 
296 	if (len & (logical_block_size_sectors - 1)) {
297 		DMERR("%s: len=%llu not aligned to h/w logical block size %u of %pg",
298 		      dm_device_name(ti->table->md),
299 		      (unsigned long long)len,
300 		      limits->logical_block_size, bdev);
301 		return 1;
302 	}
303 
304 	return 0;
305 }
306 
307 /*
308  * This upgrades the mode on an already open dm_dev, being
309  * careful to leave things as they were if we fail to reopen the
310  * device and not to touch the existing bdev field in case
311  * it is accessed concurrently.
312  */
upgrade_mode(struct dm_dev_internal * dd,blk_mode_t new_mode,struct mapped_device * md)313 static int upgrade_mode(struct dm_dev_internal *dd, blk_mode_t new_mode,
314 			struct mapped_device *md)
315 {
316 	int r;
317 	struct dm_dev *old_dev, *new_dev;
318 
319 	old_dev = dd->dm_dev;
320 
321 	r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev,
322 				dd->dm_dev->mode | new_mode, &new_dev);
323 	if (r)
324 		return r;
325 
326 	dd->dm_dev = new_dev;
327 	dm_put_table_device(md, old_dev);
328 
329 	return 0;
330 }
331 
332 /*
333  * Note: the __ref annotation is because this function can call the __init
334  * marked early_lookup_bdev when called during early boot code from dm-init.c.
335  */
dm_devt_from_path(const char * path,dev_t * dev_p)336 int __ref dm_devt_from_path(const char *path, dev_t *dev_p)
337 {
338 	int r;
339 	dev_t dev;
340 	unsigned int major, minor;
341 	char dummy;
342 
343 	if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) {
344 		/* Extract the major/minor numbers */
345 		dev = MKDEV(major, minor);
346 		if (MAJOR(dev) != major || MINOR(dev) != minor)
347 			return -EOVERFLOW;
348 	} else {
349 		r = lookup_bdev(path, &dev);
350 #ifndef MODULE
351 		if (r && system_state < SYSTEM_RUNNING)
352 			r = early_lookup_bdev(path, &dev);
353 #endif
354 		if (r)
355 			return r;
356 	}
357 	*dev_p = dev;
358 	return 0;
359 }
360 EXPORT_SYMBOL(dm_devt_from_path);
361 
362 /*
363  * Add a device to the list, or just increment the usage count if
364  * it's already present.
365  */
dm_get_device(struct dm_target * ti,const char * path,blk_mode_t mode,struct dm_dev ** result)366 int dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode,
367 		  struct dm_dev **result)
368 {
369 	int r;
370 	dev_t dev;
371 	struct dm_dev_internal *dd;
372 	struct dm_table *t = ti->table;
373 
374 	BUG_ON(!t);
375 
376 	r = dm_devt_from_path(path, &dev);
377 	if (r)
378 		return r;
379 
380 	if (dev == disk_devt(t->md->disk))
381 		return -EINVAL;
382 
383 	down_write(&t->devices_lock);
384 
385 	dd = find_device(&t->devices, dev);
386 	if (!dd) {
387 		dd = kmalloc(sizeof(*dd), GFP_KERNEL);
388 		if (!dd) {
389 			r = -ENOMEM;
390 			goto unlock_ret_r;
391 		}
392 
393 		r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev);
394 		if (r) {
395 			kfree(dd);
396 			goto unlock_ret_r;
397 		}
398 
399 		refcount_set(&dd->count, 1);
400 		list_add(&dd->list, &t->devices);
401 		goto out;
402 
403 	} else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
404 		r = upgrade_mode(dd, mode, t->md);
405 		if (r)
406 			goto unlock_ret_r;
407 	}
408 	refcount_inc(&dd->count);
409 out:
410 	up_write(&t->devices_lock);
411 	*result = dd->dm_dev;
412 	return 0;
413 
414 unlock_ret_r:
415 	up_write(&t->devices_lock);
416 	return r;
417 }
418 EXPORT_SYMBOL(dm_get_device);
419 
dm_set_device_limits(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)420 static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
421 				sector_t start, sector_t len, void *data)
422 {
423 	struct queue_limits *limits = data;
424 	struct block_device *bdev = dev->bdev;
425 	struct request_queue *q = bdev_get_queue(bdev);
426 
427 	if (unlikely(!q)) {
428 		DMWARN("%s: Cannot set limits for nonexistent device %pg",
429 		       dm_device_name(ti->table->md), bdev);
430 		return 0;
431 	}
432 
433 	mutex_lock(&q->limits_lock);
434 	/*
435 	 * BLK_FEAT_ATOMIC_WRITES is not inherited from the bottom device in
436 	 * blk_stack_limits(), so do it manually.
437 	 */
438 	limits->features |= (q->limits.features & BLK_FEAT_ATOMIC_WRITES);
439 
440 	if (blk_stack_limits(limits, &q->limits,
441 			get_start_sect(bdev) + start) < 0)
442 		DMWARN("%s: adding target device %pg caused an alignment inconsistency: "
443 		       "physical_block_size=%u, logical_block_size=%u, "
444 		       "alignment_offset=%u, start=%llu",
445 		       dm_device_name(ti->table->md), bdev,
446 		       q->limits.physical_block_size,
447 		       q->limits.logical_block_size,
448 		       q->limits.alignment_offset,
449 		       (unsigned long long) start << SECTOR_SHIFT);
450 
451 	/*
452 	 * Only stack the integrity profile if the target doesn't have native
453 	 * integrity support.
454 	 */
455 	if (!dm_target_has_integrity(ti->type))
456 		queue_limits_stack_integrity_bdev(limits, bdev);
457 	mutex_unlock(&q->limits_lock);
458 	return 0;
459 }
460 
461 /*
462  * Decrement a device's use count and remove it if necessary.
463  */
dm_put_device(struct dm_target * ti,struct dm_dev * d)464 void dm_put_device(struct dm_target *ti, struct dm_dev *d)
465 {
466 	int found = 0;
467 	struct dm_table *t = ti->table;
468 	struct list_head *devices = &t->devices;
469 	struct dm_dev_internal *dd;
470 
471 	down_write(&t->devices_lock);
472 
473 	list_for_each_entry(dd, devices, list) {
474 		if (dd->dm_dev == d) {
475 			found = 1;
476 			break;
477 		}
478 	}
479 	if (!found) {
480 		DMERR("%s: device %s not in table devices list",
481 		      dm_device_name(t->md), d->name);
482 		goto unlock_ret;
483 	}
484 	if (refcount_dec_and_test(&dd->count)) {
485 		dm_put_table_device(t->md, d);
486 		list_del(&dd->list);
487 		kfree(dd);
488 	}
489 
490 unlock_ret:
491 	up_write(&t->devices_lock);
492 }
493 EXPORT_SYMBOL(dm_put_device);
494 
495 /*
496  * Checks to see if the target joins onto the end of the table.
497  */
adjoin(struct dm_table * t,struct dm_target * ti)498 static int adjoin(struct dm_table *t, struct dm_target *ti)
499 {
500 	struct dm_target *prev;
501 
502 	if (!t->num_targets)
503 		return !ti->begin;
504 
505 	prev = &t->targets[t->num_targets - 1];
506 	return (ti->begin == (prev->begin + prev->len));
507 }
508 
509 /*
510  * Used to dynamically allocate the arg array.
511  *
512  * We do first allocation with GFP_NOIO because dm-mpath and dm-thin must
513  * process messages even if some device is suspended. These messages have a
514  * small fixed number of arguments.
515  *
516  * On the other hand, dm-switch needs to process bulk data using messages and
517  * excessive use of GFP_NOIO could cause trouble.
518  */
realloc_argv(unsigned int * size,char ** old_argv)519 static char **realloc_argv(unsigned int *size, char **old_argv)
520 {
521 	char **argv;
522 	unsigned int new_size;
523 	gfp_t gfp;
524 
525 	if (*size) {
526 		new_size = *size * 2;
527 		gfp = GFP_KERNEL;
528 	} else {
529 		new_size = 8;
530 		gfp = GFP_NOIO;
531 	}
532 	argv = kmalloc_array(new_size, sizeof(*argv), gfp);
533 	if (argv) {
534 		if (old_argv)
535 			memcpy(argv, old_argv, *size * sizeof(*argv));
536 		*size = new_size;
537 	}
538 
539 	kfree(old_argv);
540 	return argv;
541 }
542 
543 /*
544  * Destructively splits up the argument list to pass to ctr.
545  */
dm_split_args(int * argc,char *** argvp,char * input)546 int dm_split_args(int *argc, char ***argvp, char *input)
547 {
548 	char *start, *end = input, *out, **argv = NULL;
549 	unsigned int array_size = 0;
550 
551 	*argc = 0;
552 
553 	if (!input) {
554 		*argvp = NULL;
555 		return 0;
556 	}
557 
558 	argv = realloc_argv(&array_size, argv);
559 	if (!argv)
560 		return -ENOMEM;
561 
562 	while (1) {
563 		/* Skip whitespace */
564 		start = skip_spaces(end);
565 
566 		if (!*start)
567 			break;	/* success, we hit the end */
568 
569 		/* 'out' is used to remove any back-quotes */
570 		end = out = start;
571 		while (*end) {
572 			/* Everything apart from '\0' can be quoted */
573 			if (*end == '\\' && *(end + 1)) {
574 				*out++ = *(end + 1);
575 				end += 2;
576 				continue;
577 			}
578 
579 			if (isspace(*end))
580 				break;	/* end of token */
581 
582 			*out++ = *end++;
583 		}
584 
585 		/* have we already filled the array ? */
586 		if ((*argc + 1) > array_size) {
587 			argv = realloc_argv(&array_size, argv);
588 			if (!argv)
589 				return -ENOMEM;
590 		}
591 
592 		/* we know this is whitespace */
593 		if (*end)
594 			end++;
595 
596 		/* terminate the string and put it in the array */
597 		*out = '\0';
598 		argv[*argc] = start;
599 		(*argc)++;
600 	}
601 
602 	*argvp = argv;
603 	return 0;
604 }
605 
dm_set_stacking_limits(struct queue_limits * limits)606 static void dm_set_stacking_limits(struct queue_limits *limits)
607 {
608 	blk_set_stacking_limits(limits);
609 	limits->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
610 }
611 
612 /*
613  * Impose necessary and sufficient conditions on a devices's table such
614  * that any incoming bio which respects its logical_block_size can be
615  * processed successfully.  If it falls across the boundary between
616  * two or more targets, the size of each piece it gets split into must
617  * be compatible with the logical_block_size of the target processing it.
618  */
validate_hardware_logical_block_alignment(struct dm_table * t,struct queue_limits * limits)619 static int validate_hardware_logical_block_alignment(struct dm_table *t,
620 						     struct queue_limits *limits)
621 {
622 	/*
623 	 * This function uses arithmetic modulo the logical_block_size
624 	 * (in units of 512-byte sectors).
625 	 */
626 	unsigned short device_logical_block_size_sects =
627 		limits->logical_block_size >> SECTOR_SHIFT;
628 
629 	/*
630 	 * Offset of the start of the next table entry, mod logical_block_size.
631 	 */
632 	unsigned short next_target_start = 0;
633 
634 	/*
635 	 * Given an aligned bio that extends beyond the end of a
636 	 * target, how many sectors must the next target handle?
637 	 */
638 	unsigned short remaining = 0;
639 
640 	struct dm_target *ti;
641 	struct queue_limits ti_limits;
642 	unsigned int i;
643 
644 	/*
645 	 * Check each entry in the table in turn.
646 	 */
647 	for (i = 0; i < t->num_targets; i++) {
648 		ti = dm_table_get_target(t, i);
649 
650 		dm_set_stacking_limits(&ti_limits);
651 
652 		/* combine all target devices' limits */
653 		if (ti->type->iterate_devices)
654 			ti->type->iterate_devices(ti, dm_set_device_limits,
655 						  &ti_limits);
656 
657 		/*
658 		 * If the remaining sectors fall entirely within this
659 		 * table entry are they compatible with its logical_block_size?
660 		 */
661 		if (remaining < ti->len &&
662 		    remaining & ((ti_limits.logical_block_size >>
663 				  SECTOR_SHIFT) - 1))
664 			break;	/* Error */
665 
666 		next_target_start =
667 		    (unsigned short) ((next_target_start + ti->len) &
668 				      (device_logical_block_size_sects - 1));
669 		remaining = next_target_start ?
670 		    device_logical_block_size_sects - next_target_start : 0;
671 	}
672 
673 	if (remaining) {
674 		DMERR("%s: table line %u (start sect %llu len %llu) "
675 		      "not aligned to h/w logical block size %u",
676 		      dm_device_name(t->md), i,
677 		      (unsigned long long) ti->begin,
678 		      (unsigned long long) ti->len,
679 		      limits->logical_block_size);
680 		return -EINVAL;
681 	}
682 
683 	return 0;
684 }
685 
dm_table_add_target(struct dm_table * t,const char * type,sector_t start,sector_t len,char * params)686 int dm_table_add_target(struct dm_table *t, const char *type,
687 			sector_t start, sector_t len, char *params)
688 {
689 	int r = -EINVAL, argc;
690 	char **argv;
691 	struct dm_target *ti;
692 
693 	if (t->singleton) {
694 		DMERR("%s: target type %s must appear alone in table",
695 		      dm_device_name(t->md), t->targets->type->name);
696 		return -EINVAL;
697 	}
698 
699 	BUG_ON(t->num_targets >= t->num_allocated);
700 
701 	ti = t->targets + t->num_targets;
702 	memset(ti, 0, sizeof(*ti));
703 
704 	if (!len) {
705 		DMERR("%s: zero-length target", dm_device_name(t->md));
706 		return -EINVAL;
707 	}
708 	if (start + len < start || start + len > LLONG_MAX >> SECTOR_SHIFT) {
709 		DMERR("%s: too large device", dm_device_name(t->md));
710 		return -EINVAL;
711 	}
712 
713 	ti->type = dm_get_target_type(type);
714 	if (!ti->type) {
715 		DMERR("%s: %s: unknown target type", dm_device_name(t->md), type);
716 		return -EINVAL;
717 	}
718 
719 	if (dm_target_needs_singleton(ti->type)) {
720 		if (t->num_targets) {
721 			ti->error = "singleton target type must appear alone in table";
722 			goto bad;
723 		}
724 		t->singleton = true;
725 	}
726 
727 	if (dm_target_always_writeable(ti->type) &&
728 	    !(t->mode & BLK_OPEN_WRITE)) {
729 		ti->error = "target type may not be included in a read-only table";
730 		goto bad;
731 	}
732 
733 	if (t->immutable_target_type) {
734 		if (t->immutable_target_type != ti->type) {
735 			ti->error = "immutable target type cannot be mixed with other target types";
736 			goto bad;
737 		}
738 	} else if (dm_target_is_immutable(ti->type)) {
739 		if (t->num_targets) {
740 			ti->error = "immutable target type cannot be mixed with other target types";
741 			goto bad;
742 		}
743 		t->immutable_target_type = ti->type;
744 	}
745 
746 	ti->table = t;
747 	ti->begin = start;
748 	ti->len = len;
749 	ti->error = "Unknown error";
750 
751 	/*
752 	 * Does this target adjoin the previous one ?
753 	 */
754 	if (!adjoin(t, ti)) {
755 		ti->error = "Gap in table";
756 		goto bad;
757 	}
758 
759 	r = dm_split_args(&argc, &argv, params);
760 	if (r) {
761 		ti->error = "couldn't split parameters";
762 		goto bad;
763 	}
764 
765 	r = ti->type->ctr(ti, argc, argv);
766 	kfree(argv);
767 	if (r)
768 		goto bad;
769 
770 	t->highs[t->num_targets++] = ti->begin + ti->len - 1;
771 
772 	if (!ti->num_discard_bios && ti->discards_supported)
773 		DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
774 		       dm_device_name(t->md), type);
775 
776 	if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
777 		static_branch_enable(&swap_bios_enabled);
778 
779 	if (!ti->flush_bypasses_map)
780 		t->flush_bypasses_map = false;
781 
782 	return 0;
783 
784  bad:
785 	DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, ti->error, ERR_PTR(r));
786 	dm_put_target_type(ti->type);
787 	return r;
788 }
789 
790 /*
791  * Target argument parsing helpers.
792  */
validate_next_arg(const struct dm_arg * arg,struct dm_arg_set * arg_set,unsigned int * value,char ** error,unsigned int grouped)793 static int validate_next_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set,
794 			     unsigned int *value, char **error, unsigned int grouped)
795 {
796 	const char *arg_str = dm_shift_arg(arg_set);
797 	char dummy;
798 
799 	if (!arg_str ||
800 	    (sscanf(arg_str, "%u%c", value, &dummy) != 1) ||
801 	    (*value < arg->min) ||
802 	    (*value > arg->max) ||
803 	    (grouped && arg_set->argc < *value)) {
804 		*error = arg->error;
805 		return -EINVAL;
806 	}
807 
808 	return 0;
809 }
810 
dm_read_arg(const struct dm_arg * arg,struct dm_arg_set * arg_set,unsigned int * value,char ** error)811 int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set,
812 		unsigned int *value, char **error)
813 {
814 	return validate_next_arg(arg, arg_set, value, error, 0);
815 }
816 EXPORT_SYMBOL(dm_read_arg);
817 
dm_read_arg_group(const struct dm_arg * arg,struct dm_arg_set * arg_set,unsigned int * value,char ** error)818 int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set,
819 		      unsigned int *value, char **error)
820 {
821 	return validate_next_arg(arg, arg_set, value, error, 1);
822 }
823 EXPORT_SYMBOL(dm_read_arg_group);
824 
dm_shift_arg(struct dm_arg_set * as)825 const char *dm_shift_arg(struct dm_arg_set *as)
826 {
827 	char *r;
828 
829 	if (as->argc) {
830 		as->argc--;
831 		r = *as->argv;
832 		as->argv++;
833 		return r;
834 	}
835 
836 	return NULL;
837 }
838 EXPORT_SYMBOL(dm_shift_arg);
839 
dm_consume_args(struct dm_arg_set * as,unsigned int num_args)840 void dm_consume_args(struct dm_arg_set *as, unsigned int num_args)
841 {
842 	BUG_ON(as->argc < num_args);
843 	as->argc -= num_args;
844 	as->argv += num_args;
845 }
846 EXPORT_SYMBOL(dm_consume_args);
847 
__table_type_bio_based(enum dm_queue_mode table_type)848 static bool __table_type_bio_based(enum dm_queue_mode table_type)
849 {
850 	return (table_type == DM_TYPE_BIO_BASED ||
851 		table_type == DM_TYPE_DAX_BIO_BASED);
852 }
853 
__table_type_request_based(enum dm_queue_mode table_type)854 static bool __table_type_request_based(enum dm_queue_mode table_type)
855 {
856 	return table_type == DM_TYPE_REQUEST_BASED;
857 }
858 
dm_table_set_type(struct dm_table * t,enum dm_queue_mode type)859 void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
860 {
861 	t->type = type;
862 }
863 EXPORT_SYMBOL_GPL(dm_table_set_type);
864 
865 /* validate the dax capability of the target device span */
device_not_dax_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)866 static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
867 			sector_t start, sector_t len, void *data)
868 {
869 	if (dev->dax_dev)
870 		return false;
871 
872 	DMDEBUG("%pg: error: dax unsupported by block device", dev->bdev);
873 	return true;
874 }
875 
876 /* Check devices support synchronous DAX */
device_not_dax_synchronous_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)877 static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_dev *dev,
878 					      sector_t start, sector_t len, void *data)
879 {
880 	return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
881 }
882 
dm_table_supports_dax(struct dm_table * t,iterate_devices_callout_fn iterate_fn)883 static bool dm_table_supports_dax(struct dm_table *t,
884 				  iterate_devices_callout_fn iterate_fn)
885 {
886 	/* Ensure that all targets support DAX. */
887 	for (unsigned int i = 0; i < t->num_targets; i++) {
888 		struct dm_target *ti = dm_table_get_target(t, i);
889 
890 		if (!ti->type->direct_access)
891 			return false;
892 
893 		if (dm_target_is_wildcard(ti->type) ||
894 		    !ti->type->iterate_devices ||
895 		    ti->type->iterate_devices(ti, iterate_fn, NULL))
896 			return false;
897 	}
898 
899 	return true;
900 }
901 
device_is_rq_stackable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)902 static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
903 				  sector_t start, sector_t len, void *data)
904 {
905 	struct block_device *bdev = dev->bdev;
906 	struct request_queue *q = bdev_get_queue(bdev);
907 
908 	/* request-based cannot stack on partitions! */
909 	if (bdev_is_partition(bdev))
910 		return false;
911 
912 	return queue_is_mq(q);
913 }
914 
dm_table_determine_type(struct dm_table * t)915 static int dm_table_determine_type(struct dm_table *t)
916 {
917 	unsigned int bio_based = 0, request_based = 0, hybrid = 0;
918 	struct dm_target *ti;
919 	struct list_head *devices = dm_table_get_devices(t);
920 	enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
921 
922 	if (t->type != DM_TYPE_NONE) {
923 		/* target already set the table's type */
924 		if (t->type == DM_TYPE_BIO_BASED) {
925 			/* possibly upgrade to a variant of bio-based */
926 			goto verify_bio_based;
927 		}
928 		BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
929 		goto verify_rq_based;
930 	}
931 
932 	for (unsigned int i = 0; i < t->num_targets; i++) {
933 		ti = dm_table_get_target(t, i);
934 		if (dm_target_hybrid(ti))
935 			hybrid = 1;
936 		else if (dm_target_request_based(ti))
937 			request_based = 1;
938 		else
939 			bio_based = 1;
940 
941 		if (bio_based && request_based) {
942 			DMERR("Inconsistent table: different target types can't be mixed up");
943 			return -EINVAL;
944 		}
945 	}
946 
947 	if (hybrid && !bio_based && !request_based) {
948 		/*
949 		 * The targets can work either way.
950 		 * Determine the type from the live device.
951 		 * Default to bio-based if device is new.
952 		 */
953 		if (__table_type_request_based(live_md_type))
954 			request_based = 1;
955 		else
956 			bio_based = 1;
957 	}
958 
959 	if (bio_based) {
960 verify_bio_based:
961 		/* We must use this table as bio-based */
962 		t->type = DM_TYPE_BIO_BASED;
963 		if (dm_table_supports_dax(t, device_not_dax_capable) ||
964 		    (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
965 			t->type = DM_TYPE_DAX_BIO_BASED;
966 		}
967 		return 0;
968 	}
969 
970 	BUG_ON(!request_based); /* No targets in this table */
971 
972 	t->type = DM_TYPE_REQUEST_BASED;
973 
974 verify_rq_based:
975 	/*
976 	 * Request-based dm supports only tables that have a single target now.
977 	 * To support multiple targets, request splitting support is needed,
978 	 * and that needs lots of changes in the block-layer.
979 	 * (e.g. request completion process for partial completion.)
980 	 */
981 	if (t->num_targets > 1) {
982 		DMERR("request-based DM doesn't support multiple targets");
983 		return -EINVAL;
984 	}
985 
986 	if (list_empty(devices)) {
987 		int srcu_idx;
988 		struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
989 
990 		/* inherit live table's type */
991 		if (live_table)
992 			t->type = live_table->type;
993 		dm_put_live_table(t->md, srcu_idx);
994 		return 0;
995 	}
996 
997 	ti = dm_table_get_immutable_target(t);
998 	if (!ti) {
999 		DMERR("table load rejected: immutable target is required");
1000 		return -EINVAL;
1001 	} else if (ti->max_io_len) {
1002 		DMERR("table load rejected: immutable target that splits IO is not supported");
1003 		return -EINVAL;
1004 	}
1005 
1006 	/* Non-request-stackable devices can't be used for request-based dm */
1007 	if (!ti->type->iterate_devices ||
1008 	    !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) {
1009 		DMERR("table load rejected: including non-request-stackable devices");
1010 		return -EINVAL;
1011 	}
1012 
1013 	return 0;
1014 }
1015 
dm_table_get_type(struct dm_table * t)1016 enum dm_queue_mode dm_table_get_type(struct dm_table *t)
1017 {
1018 	return t->type;
1019 }
1020 
dm_table_get_immutable_target_type(struct dm_table * t)1021 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
1022 {
1023 	return t->immutable_target_type;
1024 }
1025 
dm_table_get_immutable_target(struct dm_table * t)1026 struct dm_target *dm_table_get_immutable_target(struct dm_table *t)
1027 {
1028 	/* Immutable target is implicitly a singleton */
1029 	if (t->num_targets > 1 ||
1030 	    !dm_target_is_immutable(t->targets[0].type))
1031 		return NULL;
1032 
1033 	return t->targets;
1034 }
1035 
dm_table_get_wildcard_target(struct dm_table * t)1036 struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
1037 {
1038 	for (unsigned int i = 0; i < t->num_targets; i++) {
1039 		struct dm_target *ti = dm_table_get_target(t, i);
1040 
1041 		if (dm_target_is_wildcard(ti->type))
1042 			return ti;
1043 	}
1044 
1045 	return NULL;
1046 }
1047 
dm_table_request_based(struct dm_table * t)1048 bool dm_table_request_based(struct dm_table *t)
1049 {
1050 	return __table_type_request_based(dm_table_get_type(t));
1051 }
1052 
dm_table_alloc_md_mempools(struct dm_table * t,struct mapped_device * md)1053 static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
1054 {
1055 	enum dm_queue_mode type = dm_table_get_type(t);
1056 	unsigned int per_io_data_size = 0, front_pad, io_front_pad;
1057 	unsigned int min_pool_size = 0, pool_size;
1058 	struct dm_md_mempools *pools;
1059 	unsigned int bioset_flags = 0;
1060 
1061 	if (unlikely(type == DM_TYPE_NONE)) {
1062 		DMERR("no table type is set, can't allocate mempools");
1063 		return -EINVAL;
1064 	}
1065 
1066 	pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
1067 	if (!pools)
1068 		return -ENOMEM;
1069 
1070 	if (type == DM_TYPE_REQUEST_BASED) {
1071 		pool_size = dm_get_reserved_rq_based_ios();
1072 		front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
1073 		goto init_bs;
1074 	}
1075 
1076 	if (md->queue->limits.features & BLK_FEAT_POLL)
1077 		bioset_flags |= BIOSET_PERCPU_CACHE;
1078 
1079 	for (unsigned int i = 0; i < t->num_targets; i++) {
1080 		struct dm_target *ti = dm_table_get_target(t, i);
1081 
1082 		per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
1083 		min_pool_size = max(min_pool_size, ti->num_flush_bios);
1084 	}
1085 	pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
1086 	front_pad = roundup(per_io_data_size,
1087 		__alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
1088 
1089 	io_front_pad = roundup(per_io_data_size,
1090 		__alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
1091 	if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags))
1092 		goto out_free_pools;
1093 init_bs:
1094 	if (bioset_init(&pools->bs, pool_size, front_pad, 0))
1095 		goto out_free_pools;
1096 
1097 	t->mempools = pools;
1098 	return 0;
1099 
1100 out_free_pools:
1101 	dm_free_md_mempools(pools);
1102 	return -ENOMEM;
1103 }
1104 
setup_indexes(struct dm_table * t)1105 static int setup_indexes(struct dm_table *t)
1106 {
1107 	int i;
1108 	unsigned int total = 0;
1109 	sector_t *indexes;
1110 
1111 	/* allocate the space for *all* the indexes */
1112 	for (i = t->depth - 2; i >= 0; i--) {
1113 		t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
1114 		total += t->counts[i];
1115 	}
1116 
1117 	indexes = kvcalloc(total, NODE_SIZE, GFP_KERNEL);
1118 	if (!indexes)
1119 		return -ENOMEM;
1120 
1121 	/* set up internal nodes, bottom-up */
1122 	for (i = t->depth - 2; i >= 0; i--) {
1123 		t->index[i] = indexes;
1124 		indexes += (KEYS_PER_NODE * t->counts[i]);
1125 		setup_btree_index(i, t);
1126 	}
1127 
1128 	return 0;
1129 }
1130 
1131 /*
1132  * Builds the btree to index the map.
1133  */
dm_table_build_index(struct dm_table * t)1134 static int dm_table_build_index(struct dm_table *t)
1135 {
1136 	int r = 0;
1137 	unsigned int leaf_nodes;
1138 
1139 	/* how many indexes will the btree have ? */
1140 	leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
1141 	t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
1142 
1143 	/* leaf layer has already been set up */
1144 	t->counts[t->depth - 1] = leaf_nodes;
1145 	t->index[t->depth - 1] = t->highs;
1146 
1147 	if (t->depth >= 2)
1148 		r = setup_indexes(t);
1149 
1150 	return r;
1151 }
1152 
1153 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
1154 
1155 struct dm_crypto_profile {
1156 	struct blk_crypto_profile profile;
1157 	struct mapped_device *md;
1158 };
1159 
dm_keyslot_evict_callback(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1160 static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
1161 				     sector_t start, sector_t len, void *data)
1162 {
1163 	const struct blk_crypto_key *key = data;
1164 
1165 	blk_crypto_evict_key(dev->bdev, key);
1166 	return 0;
1167 }
1168 
1169 /*
1170  * When an inline encryption key is evicted from a device-mapper device, evict
1171  * it from all the underlying devices.
1172  */
dm_keyslot_evict(struct blk_crypto_profile * profile,const struct blk_crypto_key * key,unsigned int slot)1173 static int dm_keyslot_evict(struct blk_crypto_profile *profile,
1174 			    const struct blk_crypto_key *key, unsigned int slot)
1175 {
1176 	struct mapped_device *md =
1177 		container_of(profile, struct dm_crypto_profile, profile)->md;
1178 	struct dm_table *t;
1179 	int srcu_idx;
1180 
1181 	t = dm_get_live_table(md, &srcu_idx);
1182 	if (!t)
1183 		goto put_live_table;
1184 
1185 	for (unsigned int i = 0; i < t->num_targets; i++) {
1186 		struct dm_target *ti = dm_table_get_target(t, i);
1187 
1188 		if (!ti->type->iterate_devices)
1189 			continue;
1190 		ti->type->iterate_devices(ti, dm_keyslot_evict_callback,
1191 					  (void *)key);
1192 	}
1193 
1194 put_live_table:
1195 	dm_put_live_table(md, srcu_idx);
1196 	return 0;
1197 }
1198 
1199 enum dm_wrappedkey_op {
1200 	DERIVE_SW_SECRET,
1201 	IMPORT_KEY,
1202 	GENERATE_KEY,
1203 	PREPARE_KEY,
1204 };
1205 
1206 struct dm_wrappedkey_op_args {
1207 	enum dm_wrappedkey_op op;
1208 	int err;
1209 	union {
1210 		struct {
1211 			const u8 *eph_key;
1212 			size_t eph_key_size;
1213 			u8 *sw_secret;
1214 		} derive_sw_secret;
1215 		struct {
1216 			const u8 *raw_key;
1217 			size_t raw_key_size;
1218 			u8 *lt_key;
1219 		} import_key;
1220 		struct {
1221 			u8 *lt_key;
1222 		} generate_key;
1223 		struct {
1224 			const u8 *lt_key;
1225 			size_t lt_key_size;
1226 			u8 *eph_key;
1227 		} prepare_key;
1228 	};
1229 };
1230 
dm_wrappedkey_op_callback(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1231 static int dm_wrappedkey_op_callback(struct dm_target *ti, struct dm_dev *dev,
1232 				     sector_t start, sector_t len, void *data)
1233 {
1234 	struct dm_wrappedkey_op_args *args = data;
1235 	struct block_device *bdev = dev->bdev;
1236 	struct blk_crypto_profile *profile =
1237 		bdev_get_queue(bdev)->crypto_profile;
1238 	int err = -EOPNOTSUPP;
1239 
1240 	if (!args->err)
1241 		return 0;
1242 
1243 	switch (args->op) {
1244 	case DERIVE_SW_SECRET:
1245 		err = blk_crypto_derive_sw_secret(
1246 					bdev,
1247 					args->derive_sw_secret.eph_key,
1248 					args->derive_sw_secret.eph_key_size,
1249 					args->derive_sw_secret.sw_secret);
1250 		break;
1251 	case IMPORT_KEY:
1252 		err = blk_crypto_import_key(profile,
1253 					    args->import_key.raw_key,
1254 					    args->import_key.raw_key_size,
1255 					    args->import_key.lt_key);
1256 		break;
1257 	case GENERATE_KEY:
1258 		err = blk_crypto_generate_key(profile,
1259 					      args->generate_key.lt_key);
1260 		break;
1261 	case PREPARE_KEY:
1262 		err = blk_crypto_prepare_key(profile,
1263 					     args->prepare_key.lt_key,
1264 					     args->prepare_key.lt_key_size,
1265 					     args->prepare_key.eph_key);
1266 		break;
1267 	}
1268 	args->err = err;
1269 
1270 	/* Try another device in case this fails. */
1271 	return 0;
1272 }
1273 
dm_exec_wrappedkey_op(struct blk_crypto_profile * profile,struct dm_wrappedkey_op_args * args)1274 static int dm_exec_wrappedkey_op(struct blk_crypto_profile *profile,
1275 				 struct dm_wrappedkey_op_args *args)
1276 {
1277 	struct mapped_device *md =
1278 		container_of(profile, struct dm_crypto_profile, profile)->md;
1279 	struct dm_target *ti;
1280 	struct dm_table *t;
1281 	int srcu_idx;
1282 	int i;
1283 
1284 	args->err = -EOPNOTSUPP;
1285 
1286 	t = dm_get_live_table(md, &srcu_idx);
1287 	if (!t)
1288 		goto out;
1289 
1290 	/*
1291 	 * blk-crypto currently has no support for multiple incompatible
1292 	 * implementations of wrapped inline crypto keys on a single system.
1293 	 * It was already checked earlier that support for wrapped keys was
1294 	 * declared on all underlying devices.  Thus, all the underlying devices
1295 	 * should support all wrapped key operations and they should behave
1296 	 * identically, i.e. work with the same keys.  So, just executing the
1297 	 * operation on the first device on which it works suffices for now.
1298 	 */
1299 	for (i = 0; i < t->num_targets; i++) {
1300 		ti = dm_table_get_target(t, i);
1301 		if (!ti->type->iterate_devices)
1302 			continue;
1303 		ti->type->iterate_devices(ti, dm_wrappedkey_op_callback, args);
1304 		if (!args->err)
1305 			break;
1306 	}
1307 out:
1308 	dm_put_live_table(md, srcu_idx);
1309 	return args->err;
1310 }
1311 
dm_derive_sw_secret(struct blk_crypto_profile * profile,const u8 * eph_key,size_t eph_key_size,u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE])1312 static int dm_derive_sw_secret(struct blk_crypto_profile *profile,
1313 			       const u8 *eph_key, size_t eph_key_size,
1314 			       u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE])
1315 {
1316 	struct dm_wrappedkey_op_args args = {
1317 		.op = DERIVE_SW_SECRET,
1318 		.derive_sw_secret = {
1319 			.eph_key = eph_key,
1320 			.eph_key_size = eph_key_size,
1321 			.sw_secret = sw_secret,
1322 		},
1323 	};
1324 	return dm_exec_wrappedkey_op(profile, &args);
1325 }
1326 
dm_import_key(struct blk_crypto_profile * profile,const u8 * raw_key,size_t raw_key_size,u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])1327 static int dm_import_key(struct blk_crypto_profile *profile,
1328 			 const u8 *raw_key, size_t raw_key_size,
1329 			 u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
1330 {
1331 	struct dm_wrappedkey_op_args args = {
1332 		.op = IMPORT_KEY,
1333 		.import_key = {
1334 			.raw_key = raw_key,
1335 			.raw_key_size = raw_key_size,
1336 			.lt_key = lt_key,
1337 		},
1338 	};
1339 	return dm_exec_wrappedkey_op(profile, &args);
1340 }
1341 
dm_generate_key(struct blk_crypto_profile * profile,u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])1342 static int dm_generate_key(struct blk_crypto_profile *profile,
1343 			   u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
1344 {
1345 	struct dm_wrappedkey_op_args args = {
1346 		.op = GENERATE_KEY,
1347 		.generate_key = {
1348 			.lt_key = lt_key,
1349 		},
1350 	};
1351 	return dm_exec_wrappedkey_op(profile, &args);
1352 }
1353 
dm_prepare_key(struct blk_crypto_profile * profile,const u8 * lt_key,size_t lt_key_size,u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])1354 static int dm_prepare_key(struct blk_crypto_profile *profile,
1355 			  const u8 *lt_key, size_t lt_key_size,
1356 			  u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
1357 {
1358 	struct dm_wrappedkey_op_args args = {
1359 		.op = PREPARE_KEY,
1360 		.prepare_key = {
1361 			.lt_key = lt_key,
1362 			.lt_key_size = lt_key_size,
1363 			.eph_key = eph_key,
1364 		},
1365 	};
1366 	return dm_exec_wrappedkey_op(profile, &args);
1367 }
1368 
1369 static int
device_intersect_crypto_capabilities(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1370 device_intersect_crypto_capabilities(struct dm_target *ti, struct dm_dev *dev,
1371 				     sector_t start, sector_t len, void *data)
1372 {
1373 	struct blk_crypto_profile *parent = data;
1374 	struct blk_crypto_profile *child =
1375 		bdev_get_queue(dev->bdev)->crypto_profile;
1376 
1377 	blk_crypto_intersect_capabilities(parent, child);
1378 	return 0;
1379 }
1380 
dm_destroy_crypto_profile(struct blk_crypto_profile * profile)1381 void dm_destroy_crypto_profile(struct blk_crypto_profile *profile)
1382 {
1383 	struct dm_crypto_profile *dmcp = container_of(profile,
1384 						      struct dm_crypto_profile,
1385 						      profile);
1386 
1387 	if (!profile)
1388 		return;
1389 
1390 	blk_crypto_profile_destroy(profile);
1391 	kfree(dmcp);
1392 }
1393 
dm_table_destroy_crypto_profile(struct dm_table * t)1394 static void dm_table_destroy_crypto_profile(struct dm_table *t)
1395 {
1396 	dm_destroy_crypto_profile(t->crypto_profile);
1397 	t->crypto_profile = NULL;
1398 }
1399 
1400 /*
1401  * Constructs and initializes t->crypto_profile with a crypto profile that
1402  * represents the common set of crypto capabilities of the devices described by
1403  * the dm_table.  However, if the constructed crypto profile doesn't support all
1404  * crypto capabilities that are supported by the current mapped_device, it
1405  * returns an error instead, since we don't support removing crypto capabilities
1406  * on table changes.  Finally, if the constructed crypto profile is "empty" (has
1407  * no crypto capabilities at all), it just sets t->crypto_profile to NULL.
1408  */
dm_table_construct_crypto_profile(struct dm_table * t)1409 static int dm_table_construct_crypto_profile(struct dm_table *t)
1410 {
1411 	struct dm_crypto_profile *dmcp;
1412 	struct blk_crypto_profile *profile;
1413 	unsigned int i;
1414 	bool empty_profile = true;
1415 
1416 	dmcp = kmalloc(sizeof(*dmcp), GFP_KERNEL);
1417 	if (!dmcp)
1418 		return -ENOMEM;
1419 	dmcp->md = t->md;
1420 
1421 	profile = &dmcp->profile;
1422 	blk_crypto_profile_init(profile, 0);
1423 	profile->ll_ops.keyslot_evict = dm_keyslot_evict;
1424 	profile->max_dun_bytes_supported = UINT_MAX;
1425 	memset(profile->modes_supported, 0xFF,
1426 	       sizeof(profile->modes_supported));
1427 	profile->key_types_supported = ~0;
1428 
1429 	for (i = 0; i < t->num_targets; i++) {
1430 		struct dm_target *ti = dm_table_get_target(t, i);
1431 
1432 		if (!dm_target_passes_crypto(ti->type)) {
1433 			blk_crypto_intersect_capabilities(profile, NULL);
1434 			break;
1435 		}
1436 		if (!ti->type->iterate_devices)
1437 			continue;
1438 		ti->type->iterate_devices(ti,
1439 					  device_intersect_crypto_capabilities,
1440 					  profile);
1441 	}
1442 
1443 	if (profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED) {
1444 		profile->ll_ops.derive_sw_secret = dm_derive_sw_secret;
1445 		profile->ll_ops.import_key = dm_import_key;
1446 		profile->ll_ops.generate_key = dm_generate_key;
1447 		profile->ll_ops.prepare_key = dm_prepare_key;
1448 	}
1449 
1450 	if (t->md->queue &&
1451 	    !blk_crypto_has_capabilities(profile,
1452 					 t->md->queue->crypto_profile)) {
1453 		DMERR("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!");
1454 		dm_destroy_crypto_profile(profile);
1455 		return -EINVAL;
1456 	}
1457 
1458 	/*
1459 	 * If the new profile doesn't actually support any crypto capabilities,
1460 	 * we may as well represent it with a NULL profile.
1461 	 */
1462 	for (i = 0; i < ARRAY_SIZE(profile->modes_supported); i++) {
1463 		if (profile->modes_supported[i]) {
1464 			empty_profile = false;
1465 			break;
1466 		}
1467 	}
1468 
1469 	if (empty_profile) {
1470 		dm_destroy_crypto_profile(profile);
1471 		profile = NULL;
1472 	}
1473 
1474 	/*
1475 	 * t->crypto_profile is only set temporarily while the table is being
1476 	 * set up, and it gets set to NULL after the profile has been
1477 	 * transferred to the request_queue.
1478 	 */
1479 	t->crypto_profile = profile;
1480 
1481 	return 0;
1482 }
1483 
dm_update_crypto_profile(struct request_queue * q,struct dm_table * t)1484 static void dm_update_crypto_profile(struct request_queue *q,
1485 				     struct dm_table *t)
1486 {
1487 	if (!t->crypto_profile)
1488 		return;
1489 
1490 	/* Make the crypto profile less restrictive. */
1491 	if (!q->crypto_profile) {
1492 		blk_crypto_register(t->crypto_profile, q);
1493 	} else {
1494 		blk_crypto_update_capabilities(q->crypto_profile,
1495 					       t->crypto_profile);
1496 		dm_destroy_crypto_profile(t->crypto_profile);
1497 	}
1498 	t->crypto_profile = NULL;
1499 }
1500 
1501 #else /* CONFIG_BLK_INLINE_ENCRYPTION */
1502 
dm_table_construct_crypto_profile(struct dm_table * t)1503 static int dm_table_construct_crypto_profile(struct dm_table *t)
1504 {
1505 	return 0;
1506 }
1507 
dm_destroy_crypto_profile(struct blk_crypto_profile * profile)1508 void dm_destroy_crypto_profile(struct blk_crypto_profile *profile)
1509 {
1510 }
1511 
dm_table_destroy_crypto_profile(struct dm_table * t)1512 static void dm_table_destroy_crypto_profile(struct dm_table *t)
1513 {
1514 }
1515 
dm_update_crypto_profile(struct request_queue * q,struct dm_table * t)1516 static void dm_update_crypto_profile(struct request_queue *q,
1517 				     struct dm_table *t)
1518 {
1519 }
1520 
1521 #endif /* !CONFIG_BLK_INLINE_ENCRYPTION */
1522 
1523 /*
1524  * Prepares the table for use by building the indices,
1525  * setting the type, and allocating mempools.
1526  */
dm_table_complete(struct dm_table * t)1527 int dm_table_complete(struct dm_table *t)
1528 {
1529 	int r;
1530 
1531 	r = dm_table_determine_type(t);
1532 	if (r) {
1533 		DMERR("unable to determine table type");
1534 		return r;
1535 	}
1536 
1537 	r = dm_table_build_index(t);
1538 	if (r) {
1539 		DMERR("unable to build btrees");
1540 		return r;
1541 	}
1542 
1543 	r = dm_table_construct_crypto_profile(t);
1544 	if (r) {
1545 		DMERR("could not construct crypto profile.");
1546 		return r;
1547 	}
1548 
1549 	r = dm_table_alloc_md_mempools(t, t->md);
1550 	if (r)
1551 		DMERR("unable to allocate mempools");
1552 
1553 	return r;
1554 }
1555 
1556 static DEFINE_MUTEX(_event_lock);
dm_table_event_callback(struct dm_table * t,void (* fn)(void *),void * context)1557 void dm_table_event_callback(struct dm_table *t,
1558 			     void (*fn)(void *), void *context)
1559 {
1560 	mutex_lock(&_event_lock);
1561 	t->event_fn = fn;
1562 	t->event_context = context;
1563 	mutex_unlock(&_event_lock);
1564 }
1565 
dm_table_event(struct dm_table * t)1566 void dm_table_event(struct dm_table *t)
1567 {
1568 	mutex_lock(&_event_lock);
1569 	if (t->event_fn)
1570 		t->event_fn(t->event_context);
1571 	mutex_unlock(&_event_lock);
1572 }
1573 EXPORT_SYMBOL(dm_table_event);
1574 
dm_table_get_size(struct dm_table * t)1575 inline sector_t dm_table_get_size(struct dm_table *t)
1576 {
1577 	return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
1578 }
1579 EXPORT_SYMBOL(dm_table_get_size);
1580 
1581 /*
1582  * Search the btree for the correct target.
1583  *
1584  * Caller should check returned pointer for NULL
1585  * to trap I/O beyond end of device.
1586  */
dm_table_find_target(struct dm_table * t,sector_t sector)1587 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
1588 {
1589 	unsigned int l, n = 0, k = 0;
1590 	sector_t *node;
1591 
1592 	if (unlikely(sector >= dm_table_get_size(t)))
1593 		return NULL;
1594 
1595 	for (l = 0; l < t->depth; l++) {
1596 		n = get_child(n, k);
1597 		node = get_node(t, l, n);
1598 
1599 		for (k = 0; k < KEYS_PER_NODE; k++)
1600 			if (node[k] >= sector)
1601 				break;
1602 	}
1603 
1604 	return &t->targets[(KEYS_PER_NODE * n) + k];
1605 }
1606 
1607 /*
1608  * type->iterate_devices() should be called when the sanity check needs to
1609  * iterate and check all underlying data devices. iterate_devices() will
1610  * iterate all underlying data devices until it encounters a non-zero return
1611  * code, returned by whether the input iterate_devices_callout_fn, or
1612  * iterate_devices() itself internally.
1613  *
1614  * For some target type (e.g. dm-stripe), one call of iterate_devices() may
1615  * iterate multiple underlying devices internally, in which case a non-zero
1616  * return code returned by iterate_devices_callout_fn will stop the iteration
1617  * in advance.
1618  *
1619  * Cases requiring _any_ underlying device supporting some kind of attribute,
1620  * should use the iteration structure like dm_table_any_dev_attr(), or call
1621  * it directly. @func should handle semantics of positive examples, e.g.
1622  * capable of something.
1623  *
1624  * Cases requiring _all_ underlying devices supporting some kind of attribute,
1625  * should use the iteration structure like dm_table_supports_nowait() or
1626  * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that
1627  * uses an @anti_func that handle semantics of counter examples, e.g. not
1628  * capable of something. So: return !dm_table_any_dev_attr(t, anti_func, data);
1629  */
dm_table_any_dev_attr(struct dm_table * t,iterate_devices_callout_fn func,void * data)1630 static bool dm_table_any_dev_attr(struct dm_table *t,
1631 				  iterate_devices_callout_fn func, void *data)
1632 {
1633 	for (unsigned int i = 0; i < t->num_targets; i++) {
1634 		struct dm_target *ti = dm_table_get_target(t, i);
1635 
1636 		if (ti->type->iterate_devices &&
1637 		    ti->type->iterate_devices(ti, func, data))
1638 			return true;
1639 	}
1640 
1641 	return false;
1642 }
1643 
count_device(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1644 static int count_device(struct dm_target *ti, struct dm_dev *dev,
1645 			sector_t start, sector_t len, void *data)
1646 {
1647 	unsigned int *num_devices = data;
1648 
1649 	(*num_devices)++;
1650 
1651 	return 0;
1652 }
1653 
1654 /*
1655  * Check whether a table has no data devices attached using each
1656  * target's iterate_devices method.
1657  * Returns false if the result is unknown because a target doesn't
1658  * support iterate_devices.
1659  */
dm_table_has_no_data_devices(struct dm_table * t)1660 bool dm_table_has_no_data_devices(struct dm_table *t)
1661 {
1662 	for (unsigned int i = 0; i < t->num_targets; i++) {
1663 		struct dm_target *ti = dm_table_get_target(t, i);
1664 		unsigned int num_devices = 0;
1665 
1666 		if (!ti->type->iterate_devices)
1667 			return false;
1668 
1669 		ti->type->iterate_devices(ti, count_device, &num_devices);
1670 		if (num_devices)
1671 			return false;
1672 	}
1673 
1674 	return true;
1675 }
1676 
dm_table_is_wildcard(struct dm_table * t)1677 bool dm_table_is_wildcard(struct dm_table *t)
1678 {
1679 	for (unsigned int i = 0; i < t->num_targets; i++) {
1680 		struct dm_target *ti = dm_table_get_target(t, i);
1681 
1682 		if (!dm_target_is_wildcard(ti->type))
1683 			return false;
1684 	}
1685 
1686 	return true;
1687 }
1688 
device_not_zoned(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1689 static int device_not_zoned(struct dm_target *ti, struct dm_dev *dev,
1690 			    sector_t start, sector_t len, void *data)
1691 {
1692 	bool *zoned = data;
1693 
1694 	return bdev_is_zoned(dev->bdev) != *zoned;
1695 }
1696 
device_is_zoned_model(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1697 static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev,
1698 				 sector_t start, sector_t len, void *data)
1699 {
1700 	return bdev_is_zoned(dev->bdev);
1701 }
1702 
1703 /*
1704  * Check the device zoned model based on the target feature flag. If the target
1705  * has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are
1706  * also accepted but all devices must have the same zoned model. If the target
1707  * has the DM_TARGET_MIXED_ZONED_MODEL feature set, the devices can have any
1708  * zoned model with all zoned devices having the same zone size.
1709  */
dm_table_supports_zoned(struct dm_table * t,bool zoned)1710 static bool dm_table_supports_zoned(struct dm_table *t, bool zoned)
1711 {
1712 	for (unsigned int i = 0; i < t->num_targets; i++) {
1713 		struct dm_target *ti = dm_table_get_target(t, i);
1714 
1715 		/*
1716 		 * For the wildcard target (dm-error), if we do not have a
1717 		 * backing device, we must always return false. If we have a
1718 		 * backing device, the result must depend on checking zoned
1719 		 * model, like for any other target. So for this, check directly
1720 		 * if the target backing device is zoned as we get "false" when
1721 		 * dm-error was set without a backing device.
1722 		 */
1723 		if (dm_target_is_wildcard(ti->type) &&
1724 		    !ti->type->iterate_devices(ti, device_is_zoned_model, NULL))
1725 			return false;
1726 
1727 		if (dm_target_supports_zoned_hm(ti->type)) {
1728 			if (!ti->type->iterate_devices ||
1729 			    ti->type->iterate_devices(ti, device_not_zoned,
1730 						      &zoned))
1731 				return false;
1732 		} else if (!dm_target_supports_mixed_zoned_model(ti->type)) {
1733 			if (zoned)
1734 				return false;
1735 		}
1736 	}
1737 
1738 	return true;
1739 }
1740 
device_not_matches_zone_sectors(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1741 static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev,
1742 					   sector_t start, sector_t len, void *data)
1743 {
1744 	unsigned int *zone_sectors = data;
1745 
1746 	if (!bdev_is_zoned(dev->bdev))
1747 		return 0;
1748 	return bdev_zone_sectors(dev->bdev) != *zone_sectors;
1749 }
1750 
1751 /*
1752  * Check consistency of zoned model and zone sectors across all targets. For
1753  * zone sectors, if the destination device is a zoned block device, it shall
1754  * have the specified zone_sectors.
1755  */
validate_hardware_zoned(struct dm_table * t,bool zoned,unsigned int zone_sectors)1756 static int validate_hardware_zoned(struct dm_table *t, bool zoned,
1757 				   unsigned int zone_sectors)
1758 {
1759 	if (!zoned)
1760 		return 0;
1761 
1762 	if (!dm_table_supports_zoned(t, zoned)) {
1763 		DMERR("%s: zoned model is not consistent across all devices",
1764 		      dm_device_name(t->md));
1765 		return -EINVAL;
1766 	}
1767 
1768 	/* Check zone size validity and compatibility */
1769 	if (!zone_sectors || !is_power_of_2(zone_sectors))
1770 		return -EINVAL;
1771 
1772 	if (dm_table_any_dev_attr(t, device_not_matches_zone_sectors, &zone_sectors)) {
1773 		DMERR("%s: zone sectors is not consistent across all zoned devices",
1774 		      dm_device_name(t->md));
1775 		return -EINVAL;
1776 	}
1777 
1778 	return 0;
1779 }
1780 
1781 /*
1782  * Establish the new table's queue_limits and validate them.
1783  */
dm_calculate_queue_limits(struct dm_table * t,struct queue_limits * limits)1784 int dm_calculate_queue_limits(struct dm_table *t,
1785 			      struct queue_limits *limits)
1786 {
1787 	struct queue_limits ti_limits;
1788 	unsigned int zone_sectors = 0;
1789 	bool zoned = false;
1790 
1791 	dm_set_stacking_limits(limits);
1792 
1793 	t->integrity_supported = true;
1794 	for (unsigned int i = 0; i < t->num_targets; i++) {
1795 		struct dm_target *ti = dm_table_get_target(t, i);
1796 
1797 		if (!dm_target_passes_integrity(ti->type))
1798 			t->integrity_supported = false;
1799 	}
1800 
1801 	for (unsigned int i = 0; i < t->num_targets; i++) {
1802 		struct dm_target *ti = dm_table_get_target(t, i);
1803 
1804 		dm_set_stacking_limits(&ti_limits);
1805 
1806 		if (!ti->type->iterate_devices) {
1807 			/* Set I/O hints portion of queue limits */
1808 			if (ti->type->io_hints)
1809 				ti->type->io_hints(ti, &ti_limits);
1810 			goto combine_limits;
1811 		}
1812 
1813 		/*
1814 		 * Combine queue limits of all the devices this target uses.
1815 		 */
1816 		ti->type->iterate_devices(ti, dm_set_device_limits,
1817 					  &ti_limits);
1818 
1819 		if (!zoned && (ti_limits.features & BLK_FEAT_ZONED)) {
1820 			/*
1821 			 * After stacking all limits, validate all devices
1822 			 * in table support this zoned model and zone sectors.
1823 			 */
1824 			zoned = (ti_limits.features & BLK_FEAT_ZONED);
1825 			zone_sectors = ti_limits.chunk_sectors;
1826 		}
1827 
1828 		/* Set I/O hints portion of queue limits */
1829 		if (ti->type->io_hints)
1830 			ti->type->io_hints(ti, &ti_limits);
1831 
1832 		/*
1833 		 * Check each device area is consistent with the target's
1834 		 * overall queue limits.
1835 		 */
1836 		if (ti->type->iterate_devices(ti, device_area_is_invalid,
1837 					      &ti_limits))
1838 			return -EINVAL;
1839 
1840 combine_limits:
1841 		/*
1842 		 * Merge this target's queue limits into the overall limits
1843 		 * for the table.
1844 		 */
1845 		if (blk_stack_limits(limits, &ti_limits, 0) < 0)
1846 			DMWARN("%s: adding target device (start sect %llu len %llu) "
1847 			       "caused an alignment inconsistency",
1848 			       dm_device_name(t->md),
1849 			       (unsigned long long) ti->begin,
1850 			       (unsigned long long) ti->len);
1851 
1852 		if (t->integrity_supported ||
1853 		    dm_target_has_integrity(ti->type)) {
1854 			if (!queue_limits_stack_integrity(limits, &ti_limits)) {
1855 				DMWARN("%s: adding target device (start sect %llu len %llu) "
1856 				       "disabled integrity support due to incompatibility",
1857 				       dm_device_name(t->md),
1858 				       (unsigned long long) ti->begin,
1859 				       (unsigned long long) ti->len);
1860 				t->integrity_supported = false;
1861 			}
1862 		}
1863 	}
1864 
1865 	/*
1866 	 * Verify that the zoned model and zone sectors, as determined before
1867 	 * any .io_hints override, are the same across all devices in the table.
1868 	 * - this is especially relevant if .io_hints is emulating a disk-managed
1869 	 *   zoned model on host-managed zoned block devices.
1870 	 * BUT...
1871 	 */
1872 	if (limits->features & BLK_FEAT_ZONED) {
1873 		/*
1874 		 * ...IF the above limits stacking determined a zoned model
1875 		 * validate that all of the table's devices conform to it.
1876 		 */
1877 		zoned = limits->features & BLK_FEAT_ZONED;
1878 		zone_sectors = limits->chunk_sectors;
1879 	}
1880 	if (validate_hardware_zoned(t, zoned, zone_sectors))
1881 		return -EINVAL;
1882 
1883 	return validate_hardware_logical_block_alignment(t, limits);
1884 }
1885 
1886 /*
1887  * Check if a target requires flush support even if none of the underlying
1888  * devices need it (e.g. to persist target-specific metadata).
1889  */
dm_table_supports_flush(struct dm_table * t)1890 static bool dm_table_supports_flush(struct dm_table *t)
1891 {
1892 	for (unsigned int i = 0; i < t->num_targets; i++) {
1893 		struct dm_target *ti = dm_table_get_target(t, i);
1894 
1895 		if (ti->num_flush_bios && ti->flush_supported)
1896 			return true;
1897 	}
1898 
1899 	return false;
1900 }
1901 
device_dax_write_cache_enabled(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1902 static int device_dax_write_cache_enabled(struct dm_target *ti,
1903 					  struct dm_dev *dev, sector_t start,
1904 					  sector_t len, void *data)
1905 {
1906 	struct dax_device *dax_dev = dev->dax_dev;
1907 
1908 	if (!dax_dev)
1909 		return false;
1910 
1911 	if (dax_write_cache_enabled(dax_dev))
1912 		return true;
1913 	return false;
1914 }
1915 
device_not_write_zeroes_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1916 static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev,
1917 					   sector_t start, sector_t len, void *data)
1918 {
1919 	struct request_queue *q = bdev_get_queue(dev->bdev);
1920 	int b;
1921 
1922 	mutex_lock(&q->limits_lock);
1923 	b = !q->limits.max_write_zeroes_sectors;
1924 	mutex_unlock(&q->limits_lock);
1925 	return b;
1926 }
1927 
dm_table_supports_write_zeroes(struct dm_table * t)1928 static bool dm_table_supports_write_zeroes(struct dm_table *t)
1929 {
1930 	for (unsigned int i = 0; i < t->num_targets; i++) {
1931 		struct dm_target *ti = dm_table_get_target(t, i);
1932 
1933 		if (!ti->num_write_zeroes_bios)
1934 			return false;
1935 
1936 		if (!ti->type->iterate_devices ||
1937 		    ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL))
1938 			return false;
1939 	}
1940 
1941 	return true;
1942 }
1943 
dm_table_supports_nowait(struct dm_table * t)1944 static bool dm_table_supports_nowait(struct dm_table *t)
1945 {
1946 	for (unsigned int i = 0; i < t->num_targets; i++) {
1947 		struct dm_target *ti = dm_table_get_target(t, i);
1948 
1949 		if (!dm_target_supports_nowait(ti->type))
1950 			return false;
1951 	}
1952 
1953 	return true;
1954 }
1955 
device_not_discard_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1956 static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev,
1957 				      sector_t start, sector_t len, void *data)
1958 {
1959 	return !bdev_max_discard_sectors(dev->bdev);
1960 }
1961 
dm_table_supports_discards(struct dm_table * t)1962 static bool dm_table_supports_discards(struct dm_table *t)
1963 {
1964 	for (unsigned int i = 0; i < t->num_targets; i++) {
1965 		struct dm_target *ti = dm_table_get_target(t, i);
1966 
1967 		if (!ti->num_discard_bios)
1968 			return false;
1969 
1970 		/*
1971 		 * Either the target provides discard support (as implied by setting
1972 		 * 'discards_supported') or it relies on _all_ data devices having
1973 		 * discard support.
1974 		 */
1975 		if (!ti->discards_supported &&
1976 		    (!ti->type->iterate_devices ||
1977 		     ti->type->iterate_devices(ti, device_not_discard_capable, NULL)))
1978 			return false;
1979 	}
1980 
1981 	return true;
1982 }
1983 
device_not_secure_erase_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)1984 static int device_not_secure_erase_capable(struct dm_target *ti,
1985 					   struct dm_dev *dev, sector_t start,
1986 					   sector_t len, void *data)
1987 {
1988 	return !bdev_max_secure_erase_sectors(dev->bdev);
1989 }
1990 
dm_table_supports_secure_erase(struct dm_table * t)1991 static bool dm_table_supports_secure_erase(struct dm_table *t)
1992 {
1993 	for (unsigned int i = 0; i < t->num_targets; i++) {
1994 		struct dm_target *ti = dm_table_get_target(t, i);
1995 
1996 		if (!ti->num_secure_erase_bios)
1997 			return false;
1998 
1999 		if (!ti->type->iterate_devices ||
2000 		    ti->type->iterate_devices(ti, device_not_secure_erase_capable, NULL))
2001 			return false;
2002 	}
2003 
2004 	return true;
2005 }
2006 
device_not_atomic_write_capable(struct dm_target * ti,struct dm_dev * dev,sector_t start,sector_t len,void * data)2007 static int device_not_atomic_write_capable(struct dm_target *ti,
2008 			struct dm_dev *dev, sector_t start,
2009 			sector_t len, void *data)
2010 {
2011 	return !bdev_can_atomic_write(dev->bdev);
2012 }
2013 
dm_table_supports_atomic_writes(struct dm_table * t)2014 static bool dm_table_supports_atomic_writes(struct dm_table *t)
2015 {
2016 	for (unsigned int i = 0; i < t->num_targets; i++) {
2017 		struct dm_target *ti = dm_table_get_target(t, i);
2018 
2019 		if (!dm_target_supports_atomic_writes(ti->type))
2020 			return false;
2021 
2022 		if (!ti->type->iterate_devices)
2023 			return false;
2024 
2025 		if (ti->type->iterate_devices(ti,
2026 			device_not_atomic_write_capable, NULL)) {
2027 			return false;
2028 		}
2029 	}
2030 	return true;
2031 }
2032 
dm_table_supports_size_change(struct dm_table * t,sector_t old_size,sector_t new_size)2033 bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size,
2034 				   sector_t new_size)
2035 {
2036 	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && dm_has_zone_plugs(t->md) &&
2037 	    old_size != new_size) {
2038 		DMWARN("%s: device has zone write plug resources. "
2039 		       "Cannot change size",
2040 		       dm_device_name(t->md));
2041 		return false;
2042 	}
2043 	return true;
2044 }
2045 
dm_table_set_restrictions(struct dm_table * t,struct request_queue * q,struct queue_limits * limits)2046 int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
2047 			      struct queue_limits *limits)
2048 {
2049 	int r;
2050 	struct queue_limits old_limits;
2051 
2052 	if (!dm_table_supports_nowait(t))
2053 		limits->features &= ~BLK_FEAT_NOWAIT;
2054 
2055 	/*
2056 	 * The current polling impementation does not support request based
2057 	 * stacking.
2058 	 */
2059 	if (!__table_type_bio_based(t->type))
2060 		limits->features &= ~BLK_FEAT_POLL;
2061 
2062 	if (!dm_table_supports_discards(t)) {
2063 		limits->max_hw_discard_sectors = 0;
2064 		limits->discard_granularity = 0;
2065 		limits->discard_alignment = 0;
2066 	}
2067 
2068 	if (!dm_table_supports_write_zeroes(t))
2069 		limits->max_write_zeroes_sectors = 0;
2070 
2071 	if (!dm_table_supports_secure_erase(t))
2072 		limits->max_secure_erase_sectors = 0;
2073 
2074 	if (dm_table_supports_flush(t))
2075 		limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
2076 
2077 	if (dm_table_supports_dax(t, device_not_dax_capable))
2078 		limits->features |= BLK_FEAT_DAX;
2079 	else
2080 		limits->features &= ~BLK_FEAT_DAX;
2081 
2082 	/* For a zoned table, setup the zone related queue attributes. */
2083 	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
2084 		if (limits->features & BLK_FEAT_ZONED) {
2085 			r = dm_set_zones_restrictions(t, q, limits);
2086 			if (r)
2087 				return r;
2088 		} else if (dm_has_zone_plugs(t->md)) {
2089 			DMWARN("%s: device has zone write plug resources. "
2090 			       "Cannot switch to non-zoned table.",
2091 			       dm_device_name(t->md));
2092 			return -EINVAL;
2093 		}
2094 	}
2095 
2096 	if (dm_table_supports_atomic_writes(t))
2097 		limits->features |= BLK_FEAT_ATOMIC_WRITES;
2098 
2099 	old_limits = queue_limits_start_update(q);
2100 	r = queue_limits_commit_update(q, limits);
2101 	if (r)
2102 		return r;
2103 
2104 	/*
2105 	 * Now that the limits are set, check the zones mapped by the table
2106 	 * and setup the resources for zone append emulation if necessary.
2107 	 */
2108 	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
2109 	    (limits->features & BLK_FEAT_ZONED)) {
2110 		r = dm_revalidate_zones(t, q);
2111 		if (r) {
2112 			queue_limits_set(q, &old_limits);
2113 			return r;
2114 		}
2115 	}
2116 
2117 	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
2118 		dm_finalize_zone_settings(t, limits);
2119 
2120 	if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
2121 		set_dax_synchronous(t->md->dax_dev);
2122 
2123 	if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
2124 		dax_write_cache(t->md->dax_dev, true);
2125 
2126 	dm_update_crypto_profile(q, t);
2127 	return 0;
2128 }
2129 
dm_table_get_devices(struct dm_table * t)2130 struct list_head *dm_table_get_devices(struct dm_table *t)
2131 {
2132 	return &t->devices;
2133 }
2134 
dm_table_get_mode(struct dm_table * t)2135 blk_mode_t dm_table_get_mode(struct dm_table *t)
2136 {
2137 	return t->mode;
2138 }
2139 EXPORT_SYMBOL(dm_table_get_mode);
2140 
2141 enum suspend_mode {
2142 	PRESUSPEND,
2143 	PRESUSPEND_UNDO,
2144 	POSTSUSPEND,
2145 };
2146 
suspend_targets(struct dm_table * t,enum suspend_mode mode)2147 static void suspend_targets(struct dm_table *t, enum suspend_mode mode)
2148 {
2149 	lockdep_assert_held(&t->md->suspend_lock);
2150 
2151 	for (unsigned int i = 0; i < t->num_targets; i++) {
2152 		struct dm_target *ti = dm_table_get_target(t, i);
2153 
2154 		switch (mode) {
2155 		case PRESUSPEND:
2156 			if (ti->type->presuspend)
2157 				ti->type->presuspend(ti);
2158 			break;
2159 		case PRESUSPEND_UNDO:
2160 			if (ti->type->presuspend_undo)
2161 				ti->type->presuspend_undo(ti);
2162 			break;
2163 		case POSTSUSPEND:
2164 			if (ti->type->postsuspend)
2165 				ti->type->postsuspend(ti);
2166 			break;
2167 		}
2168 	}
2169 }
2170 
dm_table_presuspend_targets(struct dm_table * t)2171 void dm_table_presuspend_targets(struct dm_table *t)
2172 {
2173 	if (!t)
2174 		return;
2175 
2176 	suspend_targets(t, PRESUSPEND);
2177 }
2178 
dm_table_presuspend_undo_targets(struct dm_table * t)2179 void dm_table_presuspend_undo_targets(struct dm_table *t)
2180 {
2181 	if (!t)
2182 		return;
2183 
2184 	suspend_targets(t, PRESUSPEND_UNDO);
2185 }
2186 
dm_table_postsuspend_targets(struct dm_table * t)2187 void dm_table_postsuspend_targets(struct dm_table *t)
2188 {
2189 	if (!t)
2190 		return;
2191 
2192 	suspend_targets(t, POSTSUSPEND);
2193 }
2194 
dm_table_resume_targets(struct dm_table * t)2195 int dm_table_resume_targets(struct dm_table *t)
2196 {
2197 	unsigned int i;
2198 	int r = 0;
2199 
2200 	lockdep_assert_held(&t->md->suspend_lock);
2201 
2202 	for (i = 0; i < t->num_targets; i++) {
2203 		struct dm_target *ti = dm_table_get_target(t, i);
2204 
2205 		if (!ti->type->preresume)
2206 			continue;
2207 
2208 		r = ti->type->preresume(ti);
2209 		if (r) {
2210 			DMERR("%s: %s: preresume failed, error = %d",
2211 			      dm_device_name(t->md), ti->type->name, r);
2212 			return r;
2213 		}
2214 	}
2215 
2216 	for (i = 0; i < t->num_targets; i++) {
2217 		struct dm_target *ti = dm_table_get_target(t, i);
2218 
2219 		if (ti->type->resume)
2220 			ti->type->resume(ti);
2221 	}
2222 
2223 	return 0;
2224 }
2225 
dm_table_get_md(struct dm_table * t)2226 struct mapped_device *dm_table_get_md(struct dm_table *t)
2227 {
2228 	return t->md;
2229 }
2230 EXPORT_SYMBOL(dm_table_get_md);
2231 
dm_table_device_name(struct dm_table * t)2232 const char *dm_table_device_name(struct dm_table *t)
2233 {
2234 	return dm_device_name(t->md);
2235 }
2236 EXPORT_SYMBOL_GPL(dm_table_device_name);
2237 
dm_table_run_md_queue_async(struct dm_table * t)2238 void dm_table_run_md_queue_async(struct dm_table *t)
2239 {
2240 	if (!dm_table_request_based(t))
2241 		return;
2242 
2243 	if (t->md->queue)
2244 		blk_mq_run_hw_queues(t->md->queue, true);
2245 }
2246 EXPORT_SYMBOL(dm_table_run_md_queue_async);
2247 
2248