xref: /linux/drivers/md/dm-ioctl.c (revision 4b99990cdf9560e8a071640baf19f312e6ae02f4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
4  * Copyright (C) 2004 - 2006 Red Hat, Inc. All rights reserved.
5  *
6  * This file is released under the GPL.
7  */
8 
9 #include "dm-core.h"
10 #include "dm-ima.h"
11 #include <linux/module.h>
12 #include <linux/vmalloc.h>
13 #include <linux/miscdevice.h>
14 #include <linux/sched/mm.h>
15 #include <linux/init.h>
16 #include <linux/wait.h>
17 #include <linux/slab.h>
18 #include <linux/rbtree.h>
19 #include <linux/dm-ioctl.h>
20 #include <linux/hdreg.h>
21 #include <linux/compat.h>
22 #include <linux/nospec.h>
23 
24 #include <linux/uaccess.h>
25 #include <linux/ima.h>
26 
27 #define DM_MSG_PREFIX "ioctl"
28 #define DM_DRIVER_EMAIL "dm-devel@lists.linux.dev"
29 
30 struct dm_file {
31 	/*
32 	 * poll will wait until the global event number is greater than
33 	 * this value.
34 	 */
35 	volatile unsigned int global_event_nr;
36 };
37 
38 /*
39  *---------------------------------------------------------------
40  * The ioctl interface needs to be able to look up devices by
41  * name or uuid.
42  *---------------------------------------------------------------
43  */
44 struct hash_cell {
45 	struct rb_node name_node;
46 	struct rb_node uuid_node;
47 	bool name_set;
48 	bool uuid_set;
49 
50 	char *name;
51 	char *uuid;
52 	struct mapped_device *md;
53 	struct dm_table *new_map;
54 };
55 
56 struct vers_iter {
57 	size_t param_size;
58 	struct dm_target_versions *vers, *old_vers;
59 	char *end;
60 	uint32_t flags;
61 };
62 
63 
64 static struct rb_root name_rb_tree = RB_ROOT;
65 static struct rb_root uuid_rb_tree = RB_ROOT;
66 
67 #define DM_REMOVE_KEEP_OPEN_DEVICES	1
68 #define DM_REMOVE_MARK_DEFERRED		2
69 #define DM_REMOVE_ONLY_DEFERRED		4
70 #define DM_REMOVE_INTERRUPTIBLE		8
71 static int dm_hash_remove_all(unsigned flags);
72 
73 /*
74  * Guards access to both hash tables.
75  */
76 static DECLARE_RWSEM(_hash_lock);
77 
78 /*
79  * Protects use of mdptr to obtain hash cell name and uuid from mapped device.
80  */
81 static DEFINE_MUTEX(dm_hash_cells_mutex);
82 
83 static void dm_hash_exit(void)
84 {
85 	dm_hash_remove_all(0);
86 }
87 
88 /*
89  *---------------------------------------------------------------
90  * Code for looking up a device by name
91  *---------------------------------------------------------------
92  */
93 static struct hash_cell *__get_name_cell(const char *str)
94 {
95 	struct rb_node *n = name_rb_tree.rb_node;
96 
97 	while (n) {
98 		struct hash_cell *hc = container_of(n, struct hash_cell, name_node);
99 		int c;
100 
101 		c = strcmp(hc->name, str);
102 		if (!c) {
103 			dm_get(hc->md);
104 			return hc;
105 		}
106 		n = c >= 0 ? n->rb_left : n->rb_right;
107 	}
108 
109 	return NULL;
110 }
111 
112 static struct hash_cell *__get_uuid_cell(const char *str)
113 {
114 	struct rb_node *n = uuid_rb_tree.rb_node;
115 
116 	while (n) {
117 		struct hash_cell *hc = container_of(n, struct hash_cell, uuid_node);
118 		int c;
119 
120 		c = strcmp(hc->uuid, str);
121 		if (!c) {
122 			dm_get(hc->md);
123 			return hc;
124 		}
125 		n = c >= 0 ? n->rb_left : n->rb_right;
126 	}
127 
128 	return NULL;
129 }
130 
131 static void __unlink_name(struct hash_cell *hc)
132 {
133 	if (hc->name_set) {
134 		hc->name_set = false;
135 		rb_erase(&hc->name_node, &name_rb_tree);
136 	}
137 }
138 
139 static void __unlink_uuid(struct hash_cell *hc)
140 {
141 	if (hc->uuid_set) {
142 		hc->uuid_set = false;
143 		rb_erase(&hc->uuid_node, &uuid_rb_tree);
144 	}
145 }
146 
147 static void __link_name(struct hash_cell *new_hc)
148 {
149 	struct rb_node **n, *parent;
150 
151 	__unlink_name(new_hc);
152 
153 	new_hc->name_set = true;
154 
155 	n = &name_rb_tree.rb_node;
156 	parent = NULL;
157 
158 	while (*n) {
159 		struct hash_cell *hc = container_of(*n, struct hash_cell, name_node);
160 		int c;
161 
162 		c = strcmp(hc->name, new_hc->name);
163 		BUG_ON(!c);
164 		parent = *n;
165 		n = c >= 0 ? &hc->name_node.rb_left : &hc->name_node.rb_right;
166 	}
167 
168 	rb_link_node(&new_hc->name_node, parent, n);
169 	rb_insert_color(&new_hc->name_node, &name_rb_tree);
170 }
171 
172 static void __link_uuid(struct hash_cell *new_hc)
173 {
174 	struct rb_node **n, *parent;
175 
176 	__unlink_uuid(new_hc);
177 
178 	new_hc->uuid_set = true;
179 
180 	n = &uuid_rb_tree.rb_node;
181 	parent = NULL;
182 
183 	while (*n) {
184 		struct hash_cell *hc = container_of(*n, struct hash_cell, uuid_node);
185 		int c;
186 
187 		c = strcmp(hc->uuid, new_hc->uuid);
188 		BUG_ON(!c);
189 		parent = *n;
190 		n = c > 0 ? &hc->uuid_node.rb_left : &hc->uuid_node.rb_right;
191 	}
192 
193 	rb_link_node(&new_hc->uuid_node, parent, n);
194 	rb_insert_color(&new_hc->uuid_node, &uuid_rb_tree);
195 }
196 
197 static struct hash_cell *__get_dev_cell(uint64_t dev)
198 {
199 	struct mapped_device *md;
200 	struct hash_cell *hc;
201 
202 	md = dm_get_md(huge_decode_dev(dev));
203 	if (!md)
204 		return NULL;
205 
206 	hc = dm_get_mdptr(md);
207 	if (!hc) {
208 		dm_put(md);
209 		return NULL;
210 	}
211 
212 	return hc;
213 }
214 
215 /*
216  *---------------------------------------------------------------
217  * Inserting, removing and renaming a device.
218  *---------------------------------------------------------------
219  */
220 static struct hash_cell *alloc_cell(const char *name, const char *uuid,
221 				    struct mapped_device *md)
222 {
223 	struct hash_cell *hc;
224 
225 	hc = kmalloc_obj(*hc);
226 	if (!hc)
227 		return NULL;
228 
229 	hc->name = kstrdup(name, GFP_KERNEL);
230 	if (!hc->name) {
231 		kfree(hc);
232 		return NULL;
233 	}
234 
235 	if (!uuid)
236 		hc->uuid = NULL;
237 
238 	else {
239 		hc->uuid = kstrdup(uuid, GFP_KERNEL);
240 		if (!hc->uuid) {
241 			kfree(hc->name);
242 			kfree(hc);
243 			return NULL;
244 		}
245 	}
246 
247 	hc->name_set = hc->uuid_set = false;
248 	hc->md = md;
249 	hc->new_map = NULL;
250 	return hc;
251 }
252 
253 static void free_cell(struct hash_cell *hc)
254 {
255 	if (hc) {
256 		kfree(hc->name);
257 		kfree(hc->uuid);
258 		kfree(hc);
259 	}
260 }
261 
262 #ifdef CONFIG_IMA
263 
264 /*
265  * Called while holding to _hash_lock, to guarantee the ordering of the
266  * following dm_ima_measure_on_* functions, which should be called
267  * right after dropping the _hash_lock
268  */
269 static unsigned int dm_ima_init_context(struct hash_cell *hc,
270 					struct dm_ima_context *context,
271 				        bool need_idx)
272 {
273 	lockdep_assert_held(&_hash_lock);
274 
275 	if (unlikely(!context))
276 		return need_idx ? hc->md->ima.update_idx++ : 0;
277 
278 	context->update_idx = hc->md->ima.update_idx++;
279 	strcpy(context->dev_name, hc->name);
280 	strcpy(context->dev_uuid, hc->uuid ? : "");
281 
282 	return context->update_idx;
283 }
284 
285 /*
286  * Called by do_resume() to guarantee correct ordering, since do_resume()
287  * does not grab the _hash_lock when the table is not getting swapped or
288  * when actually swapping the active table
289  */
290 static bool dm_ima_need_measure(struct mapped_device *md,
291 				struct dm_table *table,
292 				struct dm_ima_context *context)
293 {
294 	int srcu_idx;
295 	struct hash_cell *hc;
296 	bool need_measure = false;
297 
298 	if (unlikely(!context))
299 		return false;
300 
301 	down_write(&_hash_lock);
302 	/* Check if the device has been removed */
303 	hc = dm_get_mdptr(md);
304 	if (hc) {
305 		/*
306 		 * If we have a table, we need to make sure that it's the
307 		 * active table. Otherwise we raced with another process
308 		 * setting the active table and it will do the measurement
309 		 */
310 		if (!table || dm_get_live_table(md, &srcu_idx) == table) {
311 			dm_ima_init_context(hc, context, false);
312 			need_measure = true;
313 		}
314 		if (table)
315 			dm_put_live_table(md, srcu_idx);
316 	}
317 	up_write(&_hash_lock);
318 
319 	return need_measure;
320 }
321 #else
322 static inline unsigned int dm_ima_init_context(struct hash_cell *hc,
323 					       struct dm_ima_context *context,
324 					       bool neex_idx)
325 {
326 	return 0;
327 }
328 static inline bool dm_ima_need_measure(struct mapped_device *md,
329 				       struct dm_table *table,
330 				       struct dm_ima_context *context)
331 {
332 	return false;
333 }
334 #endif
335 
336 /*
337  * The kdev_t and uuid of a device can never change once it is
338  * initially inserted.
339  */
340 static int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
341 {
342 	struct hash_cell *cell, *hc;
343 
344 	/*
345 	 * Allocate the new cells.
346 	 */
347 	cell = alloc_cell(name, uuid, md);
348 	if (!cell)
349 		return -ENOMEM;
350 
351 	/*
352 	 * Insert the cell into both hash tables.
353 	 */
354 	down_write(&_hash_lock);
355 	hc = __get_name_cell(name);
356 	if (hc) {
357 		dm_put(hc->md);
358 		goto bad;
359 	}
360 
361 	__link_name(cell);
362 
363 	if (uuid) {
364 		hc = __get_uuid_cell(uuid);
365 		if (hc) {
366 			__unlink_name(cell);
367 			dm_put(hc->md);
368 			goto bad;
369 		}
370 		__link_uuid(cell);
371 	}
372 	dm_get(md);
373 	mutex_lock(&dm_hash_cells_mutex);
374 	dm_set_mdptr(md, cell);
375 	mutex_unlock(&dm_hash_cells_mutex);
376 	up_write(&_hash_lock);
377 
378 	return 0;
379 
380  bad:
381 	up_write(&_hash_lock);
382 	free_cell(cell);
383 	return -EBUSY;
384 }
385 
386 static struct dm_table *__hash_remove(struct hash_cell *hc)
387 {
388 	struct dm_table *table;
389 	int srcu_idx;
390 
391 	lockdep_assert_held(&_hash_lock);
392 
393 	/* remove from the dev trees */
394 	__unlink_name(hc);
395 	__unlink_uuid(hc);
396 	mutex_lock(&dm_hash_cells_mutex);
397 	dm_set_mdptr(hc->md, NULL);
398 	mutex_unlock(&dm_hash_cells_mutex);
399 
400 	table = dm_get_live_table(hc->md, &srcu_idx);
401 	if (table)
402 		dm_table_event(table);
403 	dm_put_live_table(hc->md, srcu_idx);
404 
405 	table = NULL;
406 	if (hc->new_map)
407 		table = hc->new_map;
408 	dm_put(hc->md);
409 	free_cell(hc);
410 
411 	return table;
412 }
413 
414 static int dm_hash_remove_all(unsigned flags)
415 {
416 	int dev_skipped;
417 	struct rb_node *n;
418 	struct hash_cell *hc;
419 	struct mapped_device *md;
420 	struct dm_table *t;
421 	struct dm_ima_context *ima_context = NULL;
422 	unsigned int ima_idx;
423 
424 	dm_ima_alloc_context(&ima_context, true);
425 retry:
426 	dev_skipped = 0;
427 
428 	down_write(&_hash_lock);
429 
430 	for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
431 		if (flags & DM_REMOVE_INTERRUPTIBLE && fatal_signal_pending(current)) {
432 			up_write(&_hash_lock);
433 			dm_ima_free_context(ima_context);
434 			return -EINTR;
435 		}
436 
437 		hc = container_of(n, struct hash_cell, name_node);
438 		md = hc->md;
439 		dm_get(md);
440 
441 		if (flags & DM_REMOVE_KEEP_OPEN_DEVICES &&
442 		    dm_lock_for_deletion(md, !!(flags & DM_REMOVE_MARK_DEFERRED), !!(flags & DM_REMOVE_ONLY_DEFERRED))) {
443 			dm_put(md);
444 			dev_skipped++;
445 			continue;
446 		}
447 
448 		ima_idx = dm_ima_init_context(hc, ima_context, true);
449 		t = __hash_remove(hc);
450 
451 		up_write(&_hash_lock);
452 
453 		if (t) {
454 			dm_sync_table(md);
455 			dm_table_destroy(t);
456 		}
457 		dm_ima_measure_on_device_remove(md, true, ima_context, ima_idx);
458 		dm_put(md);
459 		if (likely(flags & DM_REMOVE_KEEP_OPEN_DEVICES))
460 			dm_destroy(md);
461 		else
462 			dm_destroy_immediate(md);
463 
464 		/*
465 		 * Some mapped devices may be using other mapped
466 		 * devices, so repeat until we make no further
467 		 * progress.  If a new mapped device is created
468 		 * here it will also get removed.
469 		 */
470 		goto retry;
471 	}
472 
473 	up_write(&_hash_lock);
474 
475 	if (dev_skipped && !(flags & DM_REMOVE_ONLY_DEFERRED))
476 		DMWARN("remove_all left %d open device(s)", dev_skipped);
477 
478 	dm_ima_free_context(ima_context);
479 	return 0;
480 }
481 
482 /*
483  * Set the uuid of a hash_cell that isn't already set.
484  */
485 static void __set_cell_uuid(struct hash_cell *hc, char *new_uuid)
486 {
487 	mutex_lock(&dm_hash_cells_mutex);
488 	hc->uuid = new_uuid;
489 	mutex_unlock(&dm_hash_cells_mutex);
490 
491 	__link_uuid(hc);
492 }
493 
494 /*
495  * Changes the name of a hash_cell and returns the old name for
496  * the caller to free.
497  */
498 static char *__change_cell_name(struct hash_cell *hc, char *new_name)
499 {
500 	char *old_name;
501 
502 	/*
503 	 * Rename and move the name cell.
504 	 */
505 	__unlink_name(hc);
506 	old_name = hc->name;
507 
508 	mutex_lock(&dm_hash_cells_mutex);
509 	hc->name = new_name;
510 	mutex_unlock(&dm_hash_cells_mutex);
511 
512 	__link_name(hc);
513 
514 	return old_name;
515 }
516 
517 static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
518 					    const char *new)
519 {
520 	char *new_data, *old_name = NULL;
521 	struct hash_cell *hc;
522 	struct dm_table *table;
523 	struct mapped_device *md;
524 	unsigned int change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0;
525 	int srcu_idx;
526 	struct dm_ima_context *ima_context = NULL;
527 
528 	/*
529 	 * duplicate new.
530 	 */
531 	new_data = kstrdup(new, GFP_KERNEL);
532 	if (!new_data)
533 		return ERR_PTR(-ENOMEM);
534 
535 	dm_ima_alloc_context(&ima_context, true);
536 	down_write(&_hash_lock);
537 
538 	/*
539 	 * Is new free ?
540 	 */
541 	if (change_uuid)
542 		hc = __get_uuid_cell(new);
543 	else
544 		hc = __get_name_cell(new);
545 
546 	if (hc) {
547 		DMERR("Unable to change %s on mapped device %s to one that already exists: %s",
548 		      change_uuid ? "uuid" : "name",
549 		      param->name, new);
550 		dm_put(hc->md);
551 		up_write(&_hash_lock);
552 		dm_ima_free_context(ima_context);
553 		kfree(new_data);
554 		return ERR_PTR(-EBUSY);
555 	}
556 
557 	/*
558 	 * Is there such a device as 'old' ?
559 	 */
560 	hc = __get_name_cell(param->name);
561 	if (!hc) {
562 		DMERR("Unable to rename non-existent device, %s to %s%s",
563 		      param->name, change_uuid ? "uuid " : "", new);
564 		up_write(&_hash_lock);
565 		dm_ima_free_context(ima_context);
566 		kfree(new_data);
567 		return ERR_PTR(-ENXIO);
568 	}
569 
570 	/*
571 	 * Does this device already have a uuid?
572 	 */
573 	if (change_uuid && hc->uuid) {
574 		DMERR("Unable to change uuid of mapped device %s to %s "
575 		      "because uuid is already set to %s",
576 		      param->name, new, hc->uuid);
577 		dm_put(hc->md);
578 		up_write(&_hash_lock);
579 		dm_ima_free_context(ima_context);
580 		kfree(new_data);
581 		return ERR_PTR(-EINVAL);
582 	}
583 
584 	if (change_uuid)
585 		__set_cell_uuid(hc, new_data);
586 	else
587 		old_name = __change_cell_name(hc, new_data);
588 
589 	/*
590 	 * Wake up any dm event waiters.
591 	 */
592 	table = dm_get_live_table(hc->md, &srcu_idx);
593 	if (table)
594 		dm_table_event(table);
595 	dm_put_live_table(hc->md, srcu_idx);
596 
597 	if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr, false))
598 		param->flags |= DM_UEVENT_GENERATED_FLAG;
599 
600 	md = hc->md;
601 
602 	dm_ima_init_context(hc, ima_context, false);
603 
604 	up_write(&_hash_lock);
605 	dm_ima_measure_on_device_rename(md, ima_context);
606 	dm_ima_free_context(ima_context);
607 	kfree(old_name);
608 
609 	return md;
610 }
611 
612 void dm_deferred_remove(void)
613 {
614 	dm_hash_remove_all(DM_REMOVE_KEEP_OPEN_DEVICES | DM_REMOVE_ONLY_DEFERRED);
615 }
616 
617 /*
618  *---------------------------------------------------------------
619  * Implementation of the ioctl commands
620  *---------------------------------------------------------------
621  */
622 /*
623  * All the ioctl commands get dispatched to functions with this
624  * prototype.
625  */
626 typedef int (*ioctl_fn)(struct file *filp, struct dm_ioctl *param, size_t param_size);
627 
628 static int remove_all(struct file *filp, struct dm_ioctl *param, size_t param_size)
629 {
630 	int r;
631 	int flags = DM_REMOVE_KEEP_OPEN_DEVICES | DM_REMOVE_INTERRUPTIBLE;
632 	if (param->flags & DM_DEFERRED_REMOVE)
633 		flags |= DM_REMOVE_MARK_DEFERRED;
634 	r = dm_hash_remove_all(flags);
635 	param->data_size = 0;
636 	return r;
637 }
638 
639 /*
640  * Round up the ptr to an 8-byte boundary.
641  */
642 #define ALIGN_MASK 7
643 static inline size_t align_val(size_t val)
644 {
645 	return (val + ALIGN_MASK) & ~ALIGN_MASK;
646 }
647 static inline void *align_ptr(void *ptr)
648 {
649 	return (void *)align_val((size_t)ptr);
650 }
651 
652 /*
653  * Retrieves the data payload buffer from an already allocated
654  * struct dm_ioctl.
655  */
656 static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
657 			       size_t *len)
658 {
659 	param->data_start = align_ptr(param + 1) - (void *) param;
660 
661 	if (param->data_start < param_size)
662 		*len = param_size - param->data_start;
663 	else
664 		*len = 0;
665 
666 	return ((void *) param) + param->data_start;
667 }
668 
669 static bool filter_device(struct hash_cell *hc, const char *pfx_name, const char *pfx_uuid)
670 {
671 	const char *val;
672 	size_t val_len, pfx_len;
673 
674 	val = hc->name;
675 	val_len = strlen(val);
676 	pfx_len = strnlen(pfx_name, DM_NAME_LEN);
677 	if (pfx_len > val_len)
678 		return false;
679 	if (memcmp(val, pfx_name, pfx_len))
680 		return false;
681 
682 	val = hc->uuid ? hc->uuid : "";
683 	val_len = strlen(val);
684 	pfx_len = strnlen(pfx_uuid, DM_UUID_LEN);
685 	if (pfx_len > val_len)
686 		return false;
687 	if (memcmp(val, pfx_uuid, pfx_len))
688 		return false;
689 
690 	return true;
691 }
692 
693 static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_size)
694 {
695 	struct rb_node *n;
696 	struct hash_cell *hc;
697 	size_t len, needed = 0;
698 	struct gendisk *disk;
699 	struct dm_name_list *orig_nl, *nl, *old_nl = NULL;
700 	uint32_t *event_nr;
701 
702 	down_write(&_hash_lock);
703 
704 	/*
705 	 * Loop through all the devices working out how much
706 	 * space we need.
707 	 */
708 	for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
709 		hc = container_of(n, struct hash_cell, name_node);
710 		if (!filter_device(hc, param->name, param->uuid))
711 			continue;
712 		needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1);
713 		needed += align_val(sizeof(uint32_t) * 2);
714 		if (param->flags & DM_UUID_FLAG && hc->uuid)
715 			needed += align_val(strlen(hc->uuid) + 1);
716 	}
717 
718 	/*
719 	 * Grab our output buffer.
720 	 */
721 	nl = orig_nl = get_result_buffer(param, param_size, &len);
722 	if (len < needed || len < sizeof(nl->dev)) {
723 		param->flags |= DM_BUFFER_FULL_FLAG;
724 		goto out;
725 	}
726 	param->data_size = param->data_start + needed;
727 
728 	nl->dev = 0;	/* Flags no data */
729 
730 	/*
731 	 * Now loop through filling out the names.
732 	 */
733 	for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
734 		void *uuid_ptr;
735 
736 		hc = container_of(n, struct hash_cell, name_node);
737 		if (!filter_device(hc, param->name, param->uuid))
738 			continue;
739 		if (old_nl)
740 			old_nl->next = (uint32_t) ((void *) nl -
741 						   (void *) old_nl);
742 		disk = dm_disk(hc->md);
743 		nl->dev = huge_encode_dev(disk_devt(disk));
744 		nl->next = 0;
745 		strcpy(nl->name, hc->name);
746 
747 		old_nl = nl;
748 		event_nr = align_ptr(nl->name + strlen(hc->name) + 1);
749 		event_nr[0] = dm_get_event_nr(hc->md);
750 		event_nr[1] = 0;
751 		uuid_ptr = align_ptr(event_nr + 2);
752 		if (param->flags & DM_UUID_FLAG) {
753 			if (hc->uuid) {
754 				event_nr[1] |= DM_NAME_LIST_FLAG_HAS_UUID;
755 				strcpy(uuid_ptr, hc->uuid);
756 				uuid_ptr = align_ptr(uuid_ptr + strlen(hc->uuid) + 1);
757 			} else {
758 				event_nr[1] |= DM_NAME_LIST_FLAG_DOESNT_HAVE_UUID;
759 			}
760 		}
761 		nl = uuid_ptr;
762 	}
763 	/*
764 	 * If mismatch happens, security may be compromised due to buffer
765 	 * overflow, so it's better to crash.
766 	 */
767 	BUG_ON((char *)nl - (char *)orig_nl != needed);
768 
769  out:
770 	up_write(&_hash_lock);
771 	return 0;
772 }
773 
774 static void list_version_get_needed(struct target_type *tt, void *needed_param)
775 {
776 	size_t *needed = needed_param;
777 
778 	*needed += sizeof(struct dm_target_versions);
779 	*needed += strlen(tt->name) + 1;
780 	*needed += ALIGN_MASK;
781 }
782 
783 static void list_version_get_info(struct target_type *tt, void *param)
784 {
785 	struct vers_iter *info = param;
786 
787 	/* Check space - it might have changed since the first iteration */
788 	if ((char *)info->vers + sizeof(tt->version) + strlen(tt->name) + 1 > info->end) {
789 		info->flags = DM_BUFFER_FULL_FLAG;
790 		return;
791 	}
792 
793 	if (info->old_vers)
794 		info->old_vers->next = (uint32_t) ((void *)info->vers - (void *)info->old_vers);
795 
796 	info->vers->version[0] = tt->version[0];
797 	info->vers->version[1] = tt->version[1];
798 	info->vers->version[2] = tt->version[2];
799 	info->vers->next = 0;
800 	strcpy(info->vers->name, tt->name);
801 
802 	info->old_vers = info->vers;
803 	info->vers = align_ptr((void *)(info->vers + 1) + strlen(tt->name) + 1);
804 }
805 
806 static int __list_versions(struct dm_ioctl *param, size_t param_size, const char *name)
807 {
808 	size_t len, needed = 0;
809 	struct dm_target_versions *vers;
810 	struct vers_iter iter_info;
811 	struct target_type *tt = NULL;
812 
813 	if (name) {
814 		tt = dm_get_target_type(name);
815 		if (!tt)
816 			return -EINVAL;
817 	}
818 
819 	/*
820 	 * Loop through all the devices working out how much
821 	 * space we need.
822 	 */
823 	if (!tt)
824 		dm_target_iterate(list_version_get_needed, &needed);
825 	else
826 		list_version_get_needed(tt, &needed);
827 
828 	/*
829 	 * Grab our output buffer.
830 	 */
831 	vers = get_result_buffer(param, param_size, &len);
832 	if (len < needed) {
833 		param->flags |= DM_BUFFER_FULL_FLAG;
834 		goto out;
835 	}
836 	param->data_size = param->data_start + needed;
837 
838 	iter_info.param_size = param_size;
839 	iter_info.old_vers = NULL;
840 	iter_info.vers = vers;
841 	iter_info.flags = 0;
842 	iter_info.end = (char *)vers + needed;
843 
844 	/*
845 	 * Now loop through filling out the names & versions.
846 	 */
847 	if (!tt)
848 		dm_target_iterate(list_version_get_info, &iter_info);
849 	else
850 		list_version_get_info(tt, &iter_info);
851 	param->flags |= iter_info.flags;
852 
853  out:
854 	if (tt)
855 		dm_put_target_type(tt);
856 	return 0;
857 }
858 
859 static int list_versions(struct file *filp, struct dm_ioctl *param, size_t param_size)
860 {
861 	return __list_versions(param, param_size, NULL);
862 }
863 
864 static int get_target_version(struct file *filp, struct dm_ioctl *param, size_t param_size)
865 {
866 	return __list_versions(param, param_size, param->name);
867 }
868 
869 static int check_name(const char *name)
870 {
871 	if (strchr(name, '/')) {
872 		DMERR("device name cannot contain '/'");
873 		return -EINVAL;
874 	}
875 
876 	if (strcmp(name, DM_CONTROL_NODE) == 0 ||
877 	    strcmp(name, ".") == 0 ||
878 	    strcmp(name, "..") == 0) {
879 		DMERR("device name cannot be \"%s\", \".\", or \"..\"", DM_CONTROL_NODE);
880 		return -EINVAL;
881 	}
882 
883 	return 0;
884 }
885 
886 /*
887  * On successful return, the caller must not attempt to acquire
888  * _hash_lock without first calling dm_put_live_table, because dm_table_destroy
889  * waits for this dm_put_live_table and could be called under this lock.
890  */
891 static struct dm_table *dm_get_inactive_table(struct mapped_device *md, int *srcu_idx)
892 {
893 	struct hash_cell *hc;
894 	struct dm_table *table = NULL;
895 
896 	/* increment rcu count, we don't care about the table pointer */
897 	dm_get_live_table(md, srcu_idx);
898 
899 	down_read(&_hash_lock);
900 	hc = dm_get_mdptr(md);
901 	if (!hc) {
902 		DMERR("device has been removed from the dev hash table.");
903 		goto out;
904 	}
905 
906 	table = hc->new_map;
907 
908 out:
909 	up_read(&_hash_lock);
910 
911 	return table;
912 }
913 
914 static struct dm_table *dm_get_live_or_inactive_table(struct mapped_device *md,
915 						      struct dm_ioctl *param,
916 						      int *srcu_idx)
917 {
918 	return (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) ?
919 		dm_get_inactive_table(md, srcu_idx) : dm_get_live_table(md, srcu_idx);
920 }
921 
922 /*
923  * Fills in a dm_ioctl structure, ready for sending back to
924  * userland.
925  */
926 static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
927 {
928 	struct gendisk *disk = dm_disk(md);
929 	struct dm_table *table;
930 	int srcu_idx;
931 
932 	param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
933 			  DM_ACTIVE_PRESENT_FLAG | DM_INTERNAL_SUSPEND_FLAG);
934 
935 	if (dm_suspended_md(md))
936 		param->flags |= DM_SUSPEND_FLAG;
937 
938 	if (dm_suspended_internally_md(md))
939 		param->flags |= DM_INTERNAL_SUSPEND_FLAG;
940 
941 	if (dm_test_deferred_remove_flag(md))
942 		param->flags |= DM_DEFERRED_REMOVE;
943 
944 	param->dev = huge_encode_dev(disk_devt(disk));
945 
946 	/*
947 	 * Yes, this will be out of date by the time it gets back
948 	 * to userland, but it is still very useful for
949 	 * debugging.
950 	 */
951 	param->open_count = dm_open_count(md);
952 
953 	param->event_nr = dm_get_event_nr(md);
954 	param->target_count = 0;
955 
956 	table = dm_get_live_table(md, &srcu_idx);
957 	if (table) {
958 		if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) {
959 			if (get_disk_ro(disk))
960 				param->flags |= DM_READONLY_FLAG;
961 			param->target_count = table->num_targets;
962 		}
963 
964 		param->flags |= DM_ACTIVE_PRESENT_FLAG;
965 	}
966 	dm_put_live_table(md, srcu_idx);
967 
968 	if (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) {
969 		int srcu_idx;
970 
971 		table = dm_get_inactive_table(md, &srcu_idx);
972 		if (table) {
973 			if (!(dm_table_get_mode(table) & BLK_OPEN_WRITE))
974 				param->flags |= DM_READONLY_FLAG;
975 			param->target_count = table->num_targets;
976 		}
977 		dm_put_live_table(md, srcu_idx);
978 	}
979 }
980 
981 static int dev_create(struct file *filp, struct dm_ioctl *param, size_t param_size)
982 {
983 	int r, m = DM_ANY_MINOR;
984 	struct mapped_device *md;
985 
986 	r = check_name(param->name);
987 	if (r)
988 		return r;
989 
990 	if (param->flags & DM_PERSISTENT_DEV_FLAG)
991 		m = MINOR(huge_decode_dev(param->dev));
992 
993 	r = dm_create(m, &md);
994 	if (r)
995 		return r;
996 
997 	r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
998 	if (r) {
999 		dm_put(md);
1000 		dm_destroy(md);
1001 		return r;
1002 	}
1003 
1004 	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
1005 
1006 	__dev_status(md, param);
1007 
1008 	dm_put(md);
1009 
1010 	return 0;
1011 }
1012 
1013 /*
1014  * Always use UUID for lookups if it's present, otherwise use name or dev.
1015  */
1016 static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
1017 {
1018 	struct hash_cell *hc = NULL;
1019 
1020 	if (*param->uuid) {
1021 		if (*param->name || param->dev) {
1022 			DMERR("Invalid ioctl structure: uuid %s, name %s, dev %llx",
1023 			      param->uuid, param->name, (unsigned long long)param->dev);
1024 			return NULL;
1025 		}
1026 
1027 		hc = __get_uuid_cell(param->uuid);
1028 		if (!hc)
1029 			return NULL;
1030 	} else if (*param->name) {
1031 		if (param->dev) {
1032 			DMERR("Invalid ioctl structure: name %s, dev %llx",
1033 			      param->name, (unsigned long long)param->dev);
1034 			return NULL;
1035 		}
1036 
1037 		hc = __get_name_cell(param->name);
1038 		if (!hc)
1039 			return NULL;
1040 	} else if (param->dev) {
1041 		hc = __get_dev_cell(param->dev);
1042 		if (!hc)
1043 			return NULL;
1044 	} else
1045 		return NULL;
1046 
1047 	/*
1048 	 * Sneakily write in both the name and the uuid
1049 	 * while we have the cell.
1050 	 */
1051 	strscpy(param->name, hc->name, sizeof(param->name));
1052 	if (hc->uuid)
1053 		strscpy(param->uuid, hc->uuid, sizeof(param->uuid));
1054 	else
1055 		param->uuid[0] = '\0';
1056 
1057 	if (hc->new_map)
1058 		param->flags |= DM_INACTIVE_PRESENT_FLAG;
1059 	else
1060 		param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
1061 
1062 	return hc;
1063 }
1064 
1065 static struct mapped_device *find_device(struct dm_ioctl *param)
1066 {
1067 	struct hash_cell *hc;
1068 	struct mapped_device *md = NULL;
1069 
1070 	down_read(&_hash_lock);
1071 	hc = __find_device_hash_cell(param);
1072 	if (hc)
1073 		md = hc->md;
1074 	up_read(&_hash_lock);
1075 
1076 	return md;
1077 }
1078 
1079 static int dev_remove(struct file *filp, struct dm_ioctl *param, size_t param_size)
1080 {
1081 	struct hash_cell *hc;
1082 	struct mapped_device *md;
1083 	int r;
1084 	struct dm_table *t;
1085 	struct dm_ima_context *ima_context = NULL;
1086 	unsigned int ima_idx;
1087 
1088 	dm_ima_alloc_context(&ima_context, true);
1089 	down_write(&_hash_lock);
1090 	hc = __find_device_hash_cell(param);
1091 
1092 	if (!hc) {
1093 		DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table.");
1094 		up_write(&_hash_lock);
1095 		dm_ima_free_context(ima_context);
1096 		return -ENXIO;
1097 	}
1098 
1099 	md = hc->md;
1100 
1101 	/*
1102 	 * Ensure the device is not open and nothing further can open it.
1103 	 */
1104 	r = dm_lock_for_deletion(md, !!(param->flags & DM_DEFERRED_REMOVE), false);
1105 	if (r) {
1106 		if (r == -EBUSY && param->flags & DM_DEFERRED_REMOVE) {
1107 			up_write(&_hash_lock);
1108 			dm_put(md);
1109 			dm_ima_free_context(ima_context);
1110 			return 0;
1111 		}
1112 		DMDEBUG_LIMIT("unable to remove open device %s", hc->name);
1113 		up_write(&_hash_lock);
1114 		dm_put(md);
1115 		dm_ima_free_context(ima_context);
1116 		return r;
1117 	}
1118 
1119 	ima_idx = dm_ima_init_context(hc, ima_context, true);
1120 	t = __hash_remove(hc);
1121 	up_write(&_hash_lock);
1122 
1123 	if (t) {
1124 		dm_sync_table(md);
1125 		dm_table_destroy(t);
1126 	}
1127 
1128 	param->flags &= ~DM_DEFERRED_REMOVE;
1129 
1130 	dm_ima_measure_on_device_remove(md, false, ima_context, ima_idx);
1131 	dm_ima_free_context(ima_context);
1132 
1133 	if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr, false))
1134 		param->flags |= DM_UEVENT_GENERATED_FLAG;
1135 
1136 	dm_put(md);
1137 	dm_destroy(md);
1138 	return 0;
1139 }
1140 
1141 /*
1142  * Check a string doesn't overrun the chunk of
1143  * memory we copied from userland.
1144  */
1145 static int invalid_str(char *str, void *end)
1146 {
1147 	while ((void *) str < end)
1148 		if (!*str++)
1149 			return 0;
1150 
1151 	return -EINVAL;
1152 }
1153 
1154 static int dev_rename(struct file *filp, struct dm_ioctl *param, size_t param_size)
1155 {
1156 	int r;
1157 	char *new_data = (char *) param + param->data_start;
1158 	struct mapped_device *md;
1159 	unsigned int change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0;
1160 
1161 	if (new_data < param->data ||
1162 	    invalid_str(new_data, (void *) param + param_size) || !*new_data ||
1163 	    strlen(new_data) > (change_uuid ? DM_UUID_LEN - 1 : DM_NAME_LEN - 1)) {
1164 		DMERR("Invalid new mapped device name or uuid string supplied.");
1165 		return -EINVAL;
1166 	}
1167 
1168 	if (!change_uuid) {
1169 		r = check_name(new_data);
1170 		if (r)
1171 			return r;
1172 	}
1173 
1174 	md = dm_hash_rename(param, new_data);
1175 	if (IS_ERR(md))
1176 		return PTR_ERR(md);
1177 
1178 	__dev_status(md, param);
1179 	dm_put(md);
1180 
1181 	return 0;
1182 }
1183 
1184 static int dev_set_geometry(struct file *filp, struct dm_ioctl *param, size_t param_size)
1185 {
1186 	int r = -EINVAL, x;
1187 	struct mapped_device *md;
1188 	struct hd_geometry geometry;
1189 	unsigned long indata[4];
1190 	char *geostr = (char *) param + param->data_start;
1191 	char dummy;
1192 
1193 	md = find_device(param);
1194 	if (!md)
1195 		return -ENXIO;
1196 
1197 	if (geostr < param->data ||
1198 	    invalid_str(geostr, (void *) param + param_size)) {
1199 		DMERR("Invalid geometry supplied.");
1200 		goto out;
1201 	}
1202 
1203 	x = sscanf(geostr, "%lu %lu %lu %lu%c", indata,
1204 		   indata + 1, indata + 2, indata + 3, &dummy);
1205 
1206 	if (x != 4) {
1207 		DMERR("Unable to interpret geometry settings.");
1208 		goto out;
1209 	}
1210 
1211 	if (indata[0] > 65535 || indata[1] > 255 || indata[2] > 255) {
1212 		DMERR("Geometry exceeds range limits.");
1213 		goto out;
1214 	}
1215 
1216 	geometry.cylinders = indata[0];
1217 	geometry.heads = indata[1];
1218 	geometry.sectors = indata[2];
1219 	geometry.start = indata[3];
1220 
1221 	r = dm_set_geometry(md, &geometry);
1222 
1223 	param->data_size = 0;
1224 
1225 out:
1226 	dm_put(md);
1227 	return r;
1228 }
1229 
1230 static int do_suspend(struct dm_ioctl *param)
1231 {
1232 	int r = 0;
1233 	unsigned int suspend_flags = DM_SUSPEND_LOCKFS_FLAG;
1234 	struct mapped_device *md;
1235 
1236 	md = find_device(param);
1237 	if (!md)
1238 		return -ENXIO;
1239 
1240 	if (param->flags & DM_SKIP_LOCKFS_FLAG)
1241 		suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
1242 	if (param->flags & DM_NOFLUSH_FLAG)
1243 		suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
1244 
1245 	if (!dm_suspended_md(md)) {
1246 		r = dm_suspend(md, suspend_flags);
1247 		if (r)
1248 			goto out;
1249 	}
1250 
1251 	__dev_status(md, param);
1252 
1253 out:
1254 	dm_put(md);
1255 
1256 	return r;
1257 }
1258 
1259 static int do_resume(struct dm_ioctl *param)
1260 {
1261 	int r = 0;
1262 	unsigned int suspend_flags = DM_SUSPEND_LOCKFS_FLAG;
1263 	struct hash_cell *hc;
1264 	struct mapped_device *md;
1265 	struct dm_table *new_map, *old_map = NULL;
1266 	bool need_resize_uevent = false;
1267 	struct dm_ima_context *ima_context = NULL;
1268 
1269 	dm_ima_alloc_context(&ima_context, true);
1270 	down_write(&_hash_lock);
1271 
1272 	hc = __find_device_hash_cell(param);
1273 	if (!hc) {
1274 		DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table.");
1275 		up_write(&_hash_lock);
1276 		dm_ima_free_context(ima_context);
1277 		return -ENXIO;
1278 	}
1279 
1280 	md = hc->md;
1281 
1282 	new_map = hc->new_map;
1283 	hc->new_map = NULL;
1284 	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
1285 	if (new_map)
1286 		dm_ima_init_context(hc, ima_context, false);
1287 	up_write(&_hash_lock);
1288 
1289 	/* Do we need to load a new map ? */
1290 	if (new_map) {
1291 		sector_t old_size, new_size;
1292 
1293 		dm_ima_context_table_op(md, ima_context, DM_IMA_TABLE_SAVE);
1294 		/* Suspend if it isn't already suspended */
1295 		if (param->flags & DM_SKIP_LOCKFS_FLAG)
1296 			suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
1297 		if (param->flags & DM_NOFLUSH_FLAG)
1298 			suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
1299 		if (!dm_suspended_md(md)) {
1300 			r = dm_suspend(md, suspend_flags);
1301 			if (r) {
1302 				down_write(&_hash_lock);
1303 				hc = dm_get_mdptr(md);
1304 				if (hc && !hc->new_map) {
1305 					hc->new_map = new_map;
1306 					new_map = NULL;
1307 					dm_ima_init_context(hc, ima_context,
1308 							    false);
1309 				} else {
1310 					r = -ENXIO;
1311 				}
1312 				up_write(&_hash_lock);
1313 				if (new_map) {
1314 					dm_sync_table(md);
1315 					dm_table_destroy(new_map);
1316 				} else
1317 					dm_ima_context_table_op(md, ima_context, DM_IMA_TABLE_RESTORE);
1318 				dm_ima_free_context(ima_context);
1319 				dm_put(md);
1320 				return r;
1321 			}
1322 		}
1323 
1324 		old_size = dm_get_size(md);
1325 		old_map = dm_swap_table(md, new_map);
1326 		if (IS_ERR(old_map)) {
1327 			dm_sync_table(md);
1328 			dm_table_destroy(new_map);
1329 			dm_ima_free_context(ima_context);
1330 			dm_put(md);
1331 			return PTR_ERR(old_map);
1332 		}
1333 		if (dm_ima_need_measure(md, new_map, ima_context))
1334 			dm_ima_measure_on_device_resume(md, true, ima_context);
1335 		new_size = dm_get_size(md);
1336 		if (old_size && new_size && old_size != new_size)
1337 			need_resize_uevent = true;
1338 
1339 		if (dm_table_get_mode(new_map) & BLK_OPEN_WRITE)
1340 			set_disk_ro(dm_disk(md), 0);
1341 		else
1342 			set_disk_ro(dm_disk(md), 1);
1343 	}
1344 
1345 	if (dm_suspended_md(md)) {
1346 		r = dm_resume(md);
1347 		if (!r) {
1348 			if (!new_map && dm_ima_need_measure(md, NULL,
1349 							    ima_context))
1350 				dm_ima_measure_on_device_resume(md, false,
1351 								ima_context);
1352 
1353 			if (!dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr, need_resize_uevent))
1354 				param->flags |= DM_UEVENT_GENERATED_FLAG;
1355 		}
1356 	}
1357 
1358 	/*
1359 	 * Since dm_swap_table synchronizes RCU, nobody should be in
1360 	 * read-side critical section already.
1361 	 */
1362 	if (old_map)
1363 		dm_table_destroy(old_map);
1364 
1365 	if (!r)
1366 		__dev_status(md, param);
1367 
1368 	dm_ima_free_context(ima_context);
1369 	dm_put(md);
1370 	return r;
1371 }
1372 
1373 /*
1374  * Set or unset the suspension state of a device.
1375  * If the device already is in the requested state we just return its status.
1376  */
1377 static int dev_suspend(struct file *filp, struct dm_ioctl *param, size_t param_size)
1378 {
1379 	if (param->flags & DM_SUSPEND_FLAG)
1380 		return do_suspend(param);
1381 
1382 	return do_resume(param);
1383 }
1384 
1385 /*
1386  * Copies device info back to user space, used by
1387  * the create and info ioctls.
1388  */
1389 static int dev_status(struct file *filp, struct dm_ioctl *param, size_t param_size)
1390 {
1391 	struct mapped_device *md;
1392 
1393 	md = find_device(param);
1394 	if (!md)
1395 		return -ENXIO;
1396 
1397 	__dev_status(md, param);
1398 	dm_put(md);
1399 
1400 	return 0;
1401 }
1402 
1403 /*
1404  * Build up the status struct for each target
1405  */
1406 static void retrieve_status(struct dm_table *table,
1407 			    struct dm_ioctl *param, size_t param_size)
1408 {
1409 	unsigned int i, num_targets;
1410 	struct dm_target_spec *spec;
1411 	char *outbuf, *outptr;
1412 	status_type_t type;
1413 	size_t remaining, len, used = 0;
1414 	unsigned int status_flags = 0;
1415 
1416 	outptr = outbuf = get_result_buffer(param, param_size, &len);
1417 
1418 	if (param->flags & DM_STATUS_TABLE_FLAG)
1419 		type = STATUSTYPE_TABLE;
1420 	else if (param->flags & DM_IMA_MEASUREMENT_FLAG)
1421 		type = STATUSTYPE_IMA;
1422 	else
1423 		type = STATUSTYPE_INFO;
1424 
1425 	/* Get all the target info */
1426 	num_targets = table->num_targets;
1427 	for (i = 0; i < num_targets; i++) {
1428 		struct dm_target *ti = dm_table_get_target(table, i);
1429 		size_t l;
1430 
1431 		remaining = len - (outptr - outbuf);
1432 		if (remaining <= sizeof(struct dm_target_spec)) {
1433 			param->flags |= DM_BUFFER_FULL_FLAG;
1434 			break;
1435 		}
1436 
1437 		spec = (struct dm_target_spec *) outptr;
1438 
1439 		spec->status = 0;
1440 		spec->sector_start = ti->begin;
1441 		spec->length = ti->len;
1442 		strscpy_pad(spec->target_type, ti->type->name,
1443 			sizeof(spec->target_type));
1444 
1445 		outptr += sizeof(struct dm_target_spec);
1446 		remaining = len - (outptr - outbuf);
1447 		if (remaining <= 0) {
1448 			param->flags |= DM_BUFFER_FULL_FLAG;
1449 			break;
1450 		}
1451 
1452 		/* Get the status/table string from the target driver */
1453 		if (ti->type->status) {
1454 			if (param->flags & DM_NOFLUSH_FLAG)
1455 				status_flags |= DM_STATUS_NOFLUSH_FLAG;
1456 			ti->type->status(ti, type, status_flags, outptr, remaining);
1457 		} else
1458 			outptr[0] = '\0';
1459 
1460 		l = strlen(outptr) + 1;
1461 		if (l == remaining) {
1462 			param->flags |= DM_BUFFER_FULL_FLAG;
1463 			break;
1464 		}
1465 
1466 		outptr += l;
1467 		used = param->data_start + (outptr - outbuf);
1468 
1469 		outptr = align_ptr(outptr);
1470 		if (!outptr || outptr > outbuf + len) {
1471 			param->flags |= DM_BUFFER_FULL_FLAG;
1472 			break;
1473 		}
1474 		spec->next = outptr - outbuf;
1475 	}
1476 
1477 	if (used)
1478 		param->data_size = used;
1479 
1480 	param->target_count = num_targets;
1481 }
1482 
1483 /*
1484  * Wait for a device to report an event
1485  */
1486 static int dev_wait(struct file *filp, struct dm_ioctl *param, size_t param_size)
1487 {
1488 	int r = 0;
1489 	struct mapped_device *md;
1490 	struct dm_table *table;
1491 	int srcu_idx;
1492 
1493 	md = find_device(param);
1494 	if (!md)
1495 		return -ENXIO;
1496 
1497 	/*
1498 	 * Wait for a notification event
1499 	 */
1500 	if (dm_wait_event(md, param->event_nr)) {
1501 		r = -ERESTARTSYS;
1502 		goto out;
1503 	}
1504 
1505 	/*
1506 	 * The userland program is going to want to know what
1507 	 * changed to trigger the event, so we may as well tell
1508 	 * him and save an ioctl.
1509 	 */
1510 	__dev_status(md, param);
1511 
1512 	table = dm_get_live_or_inactive_table(md, param, &srcu_idx);
1513 	if (table)
1514 		retrieve_status(table, param, param_size);
1515 	dm_put_live_table(md, srcu_idx);
1516 
1517 out:
1518 	dm_put(md);
1519 
1520 	return r;
1521 }
1522 
1523 /*
1524  * Remember the global event number and make it possible to poll
1525  * for further events.
1526  */
1527 static int dev_arm_poll(struct file *filp, struct dm_ioctl *param, size_t param_size)
1528 {
1529 	struct dm_file *priv = filp->private_data;
1530 
1531 	priv->global_event_nr = atomic_read(&dm_global_event_nr);
1532 
1533 	return 0;
1534 }
1535 
1536 static inline blk_mode_t get_mode(struct dm_ioctl *param)
1537 {
1538 	blk_mode_t mode = BLK_OPEN_READ | BLK_OPEN_WRITE;
1539 
1540 	if (param->flags & DM_READONLY_FLAG)
1541 		mode = BLK_OPEN_READ;
1542 
1543 	return mode;
1544 }
1545 
1546 static int next_target(struct dm_target_spec *last, uint32_t next, const char *end,
1547 		       struct dm_target_spec **spec, char **target_params)
1548 {
1549 	static_assert(__alignof__(struct dm_target_spec) <= 8,
1550 		"struct dm_target_spec must not require more than 8-byte alignment");
1551 
1552 	/*
1553 	 * Number of bytes remaining, starting with last. This is always
1554 	 * sizeof(struct dm_target_spec) or more, as otherwise *last was
1555 	 * out of bounds already.
1556 	 */
1557 	size_t remaining = end - (char *)last;
1558 
1559 	/*
1560 	 * There must be room for both the next target spec and the
1561 	 * NUL-terminator of the target itself.
1562 	 */
1563 	if (remaining - sizeof(struct dm_target_spec) <= next) {
1564 		DMERR("Target spec extends beyond end of parameters");
1565 		return -EINVAL;
1566 	}
1567 
1568 	if (next % __alignof__(struct dm_target_spec)) {
1569 		DMERR("Next dm_target_spec (offset %u) is not %zu-byte aligned",
1570 		      next, __alignof__(struct dm_target_spec));
1571 		return -EINVAL;
1572 	}
1573 
1574 	*spec = (struct dm_target_spec *) ((unsigned char *) last + next);
1575 	*target_params = (char *) (*spec + 1);
1576 
1577 	return 0;
1578 }
1579 
1580 static int populate_table(struct dm_table *table,
1581 			  struct dm_ioctl *param, size_t param_size)
1582 {
1583 	int r;
1584 	unsigned int i = 0;
1585 	struct dm_target_spec *spec = (struct dm_target_spec *) param;
1586 	uint32_t next = param->data_start;
1587 	const char *const end = (const char *) param + param_size;
1588 	char *target_params;
1589 	size_t min_size = sizeof(struct dm_ioctl);
1590 
1591 	if (!param->target_count) {
1592 		DMERR("%s: no targets specified", __func__);
1593 		return -EINVAL;
1594 	}
1595 
1596 	for (i = 0; i < param->target_count; i++) {
1597 		const char *nul_terminator;
1598 
1599 		if (next < min_size) {
1600 			DMERR("%s: next target spec (offset %u) overlaps %s",
1601 			      __func__, next, i ? "previous target" : "'struct dm_ioctl'");
1602 			return -EINVAL;
1603 		}
1604 
1605 		r = next_target(spec, next, end, &spec, &target_params);
1606 		if (r) {
1607 			DMERR("unable to find target");
1608 			return r;
1609 		}
1610 
1611 		nul_terminator = memchr(target_params, 0, (size_t)(end - target_params));
1612 		if (nul_terminator == NULL) {
1613 			DMERR("%s: target parameters not NUL-terminated", __func__);
1614 			return -EINVAL;
1615 		}
1616 
1617 		/* Add 1 for NUL terminator */
1618 		min_size = (size_t)(nul_terminator - (const char *)spec) + 1;
1619 
1620 		r = dm_table_add_target(table, spec->target_type,
1621 					(sector_t) spec->sector_start,
1622 					(sector_t) spec->length,
1623 					target_params);
1624 		if (r) {
1625 			DMERR("error adding target to table");
1626 			return r;
1627 		}
1628 
1629 		next = spec->next;
1630 	}
1631 
1632 	return dm_table_complete(table);
1633 }
1634 
1635 static bool is_valid_type(enum dm_queue_mode cur, enum dm_queue_mode new)
1636 {
1637 	if (cur == new ||
1638 	    (cur == DM_TYPE_BIO_BASED && new == DM_TYPE_DAX_BIO_BASED))
1639 		return true;
1640 
1641 	return false;
1642 }
1643 
1644 static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_size)
1645 {
1646 	int r, srcu_idx;
1647 	struct hash_cell *hc;
1648 	struct dm_table *t, *old_map = NULL;
1649 	struct mapped_device *md;
1650 	struct target_type *immutable_target_type;
1651 	struct dm_ima_context *ima_context = NULL;
1652 
1653 	md = find_device(param);
1654 	if (!md)
1655 		return -ENXIO;
1656 
1657 	r = dm_table_create(&t, get_mode(param), param->target_count, md);
1658 	if (r)
1659 		goto err;
1660 
1661 	/* Protect md->type and md->queue against concurrent table loads. */
1662 	dm_lock_md_type(md);
1663 	r = populate_table(t, param, param_size);
1664 	if (r)
1665 		goto err_unlock_md_type;
1666 
1667 	immutable_target_type = dm_get_immutable_target_type(md);
1668 	if (immutable_target_type &&
1669 	    (immutable_target_type != dm_table_get_immutable_target_type(t)) &&
1670 	    !dm_table_get_wildcard_target(t)) {
1671 		DMERR("can't replace immutable target type %s",
1672 		      immutable_target_type->name);
1673 		r = -EINVAL;
1674 		goto err_unlock_md_type;
1675 	}
1676 
1677 	if (dm_get_md_type(md) == DM_TYPE_NONE) {
1678 		/* setup md->queue to reflect md's type (may block) */
1679 		r = dm_setup_md_queue(md, t);
1680 		if (r) {
1681 			DMERR("unable to set up device queue for new table.");
1682 			goto err_unlock_md_type;
1683 		}
1684 	} else if (!is_valid_type(dm_get_md_type(md), dm_table_get_type(t))) {
1685 		DMERR("can't change device type (old=%u vs new=%u) after initial table load.",
1686 		      dm_get_md_type(md), dm_table_get_type(t));
1687 		r = -EINVAL;
1688 		goto err_unlock_md_type;
1689 	}
1690 
1691 	dm_unlock_md_type(md);
1692 
1693 	dm_ima_alloc_context(&ima_context, false);
1694 	/* stage inactive table */
1695 	down_write(&_hash_lock);
1696 	hc = dm_get_mdptr(md);
1697 	if (!hc) {
1698 		DMERR("device has been removed from the dev hash table.");
1699 		up_write(&_hash_lock);
1700 		dm_ima_free_context(ima_context);
1701 		r = -ENXIO;
1702 		goto err_destroy_table;
1703 	}
1704 
1705 	if (hc->new_map)
1706 		old_map = hc->new_map;
1707 	hc->new_map = t;
1708 	dm_ima_init_context(hc, ima_context, false);
1709 	/* Make sure new_map doesn't get freed before we measure it*/
1710 	dm_get_live_table(md, &srcu_idx);
1711 	up_write(&_hash_lock);
1712 
1713 	dm_ima_measure_on_table_load(t, ima_context);
1714 	dm_ima_free_context(ima_context);
1715 	dm_put_live_table(md, srcu_idx);
1716 
1717 	param->flags |= DM_INACTIVE_PRESENT_FLAG;
1718 	__dev_status(md, param);
1719 
1720 	if (old_map) {
1721 		dm_sync_table(md);
1722 		dm_table_destroy(old_map);
1723 	}
1724 
1725 	dm_put(md);
1726 
1727 	return 0;
1728 
1729 err_unlock_md_type:
1730 	dm_unlock_md_type(md);
1731 err_destroy_table:
1732 	dm_table_destroy(t);
1733 err:
1734 	dm_put(md);
1735 
1736 	return r;
1737 }
1738 
1739 static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_size)
1740 {
1741 	struct hash_cell *hc;
1742 	struct mapped_device *md;
1743 	struct dm_table *old_map = NULL;
1744 	struct dm_ima_context *ima_context = NULL;
1745 
1746 	dm_ima_alloc_context(&ima_context, true);
1747 	down_write(&_hash_lock);
1748 
1749 	hc = __find_device_hash_cell(param);
1750 	if (!hc) {
1751 		DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table.");
1752 		up_write(&_hash_lock);
1753 		dm_ima_free_context(ima_context);
1754 		return -ENXIO;
1755 	}
1756 
1757 	if (hc->new_map) {
1758 		old_map = hc->new_map;
1759 		hc->new_map = NULL;
1760 	}
1761 
1762 	dm_ima_init_context(hc, ima_context, false);
1763 	md = hc->md;
1764 	up_write(&_hash_lock);
1765 	dm_ima_measure_on_table_clear(md, ima_context);
1766 	dm_ima_free_context(ima_context);
1767 
1768 	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
1769 	__dev_status(md, param);
1770 
1771 	if (old_map) {
1772 		dm_sync_table(md);
1773 		dm_table_destroy(old_map);
1774 	}
1775 	dm_put(md);
1776 
1777 	return 0;
1778 }
1779 
1780 /*
1781  * Retrieves a list of devices used by a particular dm device.
1782  */
1783 static void retrieve_deps(struct dm_table *table,
1784 			  struct dm_ioctl *param, size_t param_size)
1785 {
1786 	unsigned int count = 0;
1787 	struct list_head *tmp;
1788 	size_t len, needed;
1789 	struct dm_dev_internal *dd;
1790 	struct dm_target_deps *deps;
1791 
1792 	deps = get_result_buffer(param, param_size, &len);
1793 
1794 	/*
1795 	 * Count the devices.
1796 	 */
1797 	list_for_each(tmp, dm_table_get_devices(table))
1798 		count++;
1799 
1800 	/*
1801 	 * Check we have enough space.
1802 	 */
1803 	needed = struct_size(deps, dev, count);
1804 	if (len < needed) {
1805 		param->flags |= DM_BUFFER_FULL_FLAG;
1806 		return;
1807 	}
1808 
1809 	/*
1810 	 * Fill in the devices.
1811 	 */
1812 	deps->count = count;
1813 	count = 0;
1814 	list_for_each_entry(dd, dm_table_get_devices(table), list)
1815 		deps->dev[count++] = huge_encode_dev(dd->dm_dev->bdev->bd_dev);
1816 
1817 	param->data_size = param->data_start + needed;
1818 }
1819 
1820 static int table_deps(struct file *filp, struct dm_ioctl *param, size_t param_size)
1821 {
1822 	struct mapped_device *md;
1823 	struct dm_table *table;
1824 	int srcu_idx;
1825 
1826 	md = find_device(param);
1827 	if (!md)
1828 		return -ENXIO;
1829 
1830 	__dev_status(md, param);
1831 
1832 	table = dm_get_live_or_inactive_table(md, param, &srcu_idx);
1833 	if (table)
1834 		retrieve_deps(table, param, param_size);
1835 	dm_put_live_table(md, srcu_idx);
1836 
1837 	dm_put(md);
1838 
1839 	return 0;
1840 }
1841 
1842 /*
1843  * Return the status of a device as a text string for each
1844  * target.
1845  */
1846 static int table_status(struct file *filp, struct dm_ioctl *param, size_t param_size)
1847 {
1848 	struct mapped_device *md;
1849 	struct dm_table *table;
1850 	int srcu_idx;
1851 
1852 	md = find_device(param);
1853 	if (!md)
1854 		return -ENXIO;
1855 
1856 	__dev_status(md, param);
1857 
1858 	table = dm_get_live_or_inactive_table(md, param, &srcu_idx);
1859 	if (table)
1860 		retrieve_status(table, param, param_size);
1861 	dm_put_live_table(md, srcu_idx);
1862 
1863 	dm_put(md);
1864 
1865 	return 0;
1866 }
1867 
1868 /*
1869  * Process device-mapper dependent messages.  Messages prefixed with '@'
1870  * are processed by the DM core.  All others are delivered to the target.
1871  * Returns a number <= 1 if message was processed by device mapper.
1872  * Returns 2 if message should be delivered to the target.
1873  */
1874 static int message_for_md(struct mapped_device *md, unsigned int argc, char **argv,
1875 			  char *result, unsigned int maxlen)
1876 {
1877 	int r;
1878 
1879 	if (**argv != '@')
1880 		return 2; /* no '@' prefix, deliver to target */
1881 
1882 	if (!strcasecmp(argv[0], "@cancel_deferred_remove")) {
1883 		if (argc != 1) {
1884 			DMERR("Invalid arguments for @cancel_deferred_remove");
1885 			return -EINVAL;
1886 		}
1887 		return dm_cancel_deferred_remove(md);
1888 	}
1889 
1890 	r = dm_stats_message(md, argc, argv, result, maxlen);
1891 	if (r < 2)
1892 		return r;
1893 
1894 	DMERR("Unsupported message sent to DM core: %s", argv[0]);
1895 	return -EINVAL;
1896 }
1897 
1898 /*
1899  * Pass a message to the target that's at the supplied device offset.
1900  */
1901 static int target_message(struct file *filp, struct dm_ioctl *param, size_t param_size)
1902 {
1903 	int r, argc;
1904 	char **argv;
1905 	struct mapped_device *md;
1906 	struct dm_table *table;
1907 	struct dm_target *ti;
1908 	struct dm_target_msg *tmsg = (void *) param + param->data_start;
1909 	size_t maxlen;
1910 	char *result = get_result_buffer(param, param_size, &maxlen);
1911 	int srcu_idx;
1912 
1913 	md = find_device(param);
1914 	if (!md)
1915 		return -ENXIO;
1916 
1917 	if (tmsg < (struct dm_target_msg *) param->data ||
1918 	    invalid_str(tmsg->message, (void *) param + param_size)) {
1919 		DMERR("Invalid target message parameters.");
1920 		r = -EINVAL;
1921 		goto out;
1922 	}
1923 
1924 	r = dm_split_args(&argc, &argv, tmsg->message);
1925 	if (r) {
1926 		DMERR("Failed to split target message parameters");
1927 		goto out;
1928 	}
1929 
1930 	if (!argc) {
1931 		DMERR("Empty message received.");
1932 		r = -EINVAL;
1933 		goto out_argv;
1934 	}
1935 
1936 	r = message_for_md(md, argc, argv, result, maxlen);
1937 	if (r <= 1)
1938 		goto out_argv;
1939 
1940 	table = dm_get_live_table(md, &srcu_idx);
1941 	if (!table) {
1942 		DMERR("The device has no table.");
1943 		r = -EINVAL;
1944 		goto out_table;
1945 	}
1946 
1947 	if (dm_deleting_md(md)) {
1948 		r = -ENXIO;
1949 		goto out_table;
1950 	}
1951 
1952 	ti = dm_table_find_target(table, tmsg->sector);
1953 	if (!ti) {
1954 		DMERR("Target message sector outside device.");
1955 		r = -EINVAL;
1956 	} else if (ti->type->message)
1957 		r = ti->type->message(ti, argc, argv, result, maxlen);
1958 	else {
1959 		DMERR("Target type does not support messages");
1960 		r = -EINVAL;
1961 	}
1962 
1963  out_table:
1964 	dm_put_live_table(md, srcu_idx);
1965  out_argv:
1966 	kfree(argv);
1967  out:
1968 	if (r >= 0)
1969 		__dev_status(md, param);
1970 
1971 	if (r == 1) {
1972 		param->flags |= DM_DATA_OUT_FLAG;
1973 		if (dm_message_test_buffer_overflow(result, maxlen))
1974 			param->flags |= DM_BUFFER_FULL_FLAG;
1975 		else
1976 			param->data_size = param->data_start + strlen(result) + 1;
1977 		r = 0;
1978 	}
1979 
1980 	dm_put(md);
1981 	return r;
1982 }
1983 
1984 /*
1985  * The ioctl parameter block consists of two parts, a dm_ioctl struct
1986  * followed by a data buffer.  This flag is set if the second part,
1987  * which has a variable size, is not used by the function processing
1988  * the ioctl.
1989  */
1990 #define IOCTL_FLAGS_NO_PARAMS		1
1991 #define IOCTL_FLAGS_ISSUE_GLOBAL_EVENT	2
1992 
1993 /*
1994  *---------------------------------------------------------------
1995  * Implementation of open/close/ioctl on the special char device.
1996  *---------------------------------------------------------------
1997  */
1998 static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
1999 {
2000 	static const struct {
2001 		int cmd;
2002 		int flags;
2003 		ioctl_fn fn;
2004 	} _ioctls[] = {
2005 		{DM_VERSION_CMD, 0, NULL}, /* version is dealt with elsewhere */
2006 		{DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, remove_all},
2007 		{DM_LIST_DEVICES_CMD, 0, list_devices},
2008 
2009 		{DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_create},
2010 		{DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_remove},
2011 		{DM_DEV_RENAME_CMD, IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_rename},
2012 		{DM_DEV_SUSPEND_CMD, IOCTL_FLAGS_NO_PARAMS, dev_suspend},
2013 		{DM_DEV_STATUS_CMD, IOCTL_FLAGS_NO_PARAMS, dev_status},
2014 		{DM_DEV_WAIT_CMD, 0, dev_wait},
2015 
2016 		{DM_TABLE_LOAD_CMD, 0, table_load},
2017 		{DM_TABLE_CLEAR_CMD, IOCTL_FLAGS_NO_PARAMS, table_clear},
2018 		{DM_TABLE_DEPS_CMD, 0, table_deps},
2019 		{DM_TABLE_STATUS_CMD, 0, table_status},
2020 
2021 		{DM_LIST_VERSIONS_CMD, 0, list_versions},
2022 
2023 		{DM_TARGET_MSG_CMD, 0, target_message},
2024 		{DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry},
2025 		{DM_DEV_ARM_POLL_CMD, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
2026 		{DM_GET_TARGET_VERSION_CMD, 0, get_target_version},
2027 		{DM_MPATH_PROBE_PATHS_CMD, 0, NULL}, /* block device ioctl */
2028 	};
2029 
2030 	if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
2031 		return NULL;
2032 
2033 	cmd = array_index_nospec(cmd, ARRAY_SIZE(_ioctls));
2034 	*ioctl_flags = _ioctls[cmd].flags;
2035 	return _ioctls[cmd].fn;
2036 }
2037 
2038 /*
2039  * As well as checking the version compatibility this always
2040  * copies the kernel interface version out.
2041  */
2042 static int check_version(unsigned int cmd, struct dm_ioctl __user *user,
2043 			 struct dm_ioctl *kernel_params)
2044 {
2045 	int r = 0;
2046 
2047 	/* Make certain version is first member of dm_ioctl struct */
2048 	BUILD_BUG_ON(offsetof(struct dm_ioctl, version) != 0);
2049 
2050 	if (copy_from_user(kernel_params->version, user->version, sizeof(kernel_params->version)))
2051 		return -EFAULT;
2052 
2053 	if ((kernel_params->version[0] != DM_VERSION_MAJOR) ||
2054 	    (kernel_params->version[1] > DM_VERSION_MINOR)) {
2055 		DMERR_LIMIT("ioctl interface mismatch: kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
2056 		      DM_VERSION_MAJOR, DM_VERSION_MINOR,
2057 		      DM_VERSION_PATCHLEVEL,
2058 		      kernel_params->version[0],
2059 		      kernel_params->version[1],
2060 		      kernel_params->version[2],
2061 		      cmd);
2062 		r = -EINVAL;
2063 	}
2064 
2065 	/*
2066 	 * Fill in the kernel version.
2067 	 */
2068 	kernel_params->version[0] = DM_VERSION_MAJOR;
2069 	kernel_params->version[1] = DM_VERSION_MINOR;
2070 	kernel_params->version[2] = DM_VERSION_PATCHLEVEL;
2071 	if (copy_to_user(user->version, kernel_params->version, sizeof(kernel_params->version)))
2072 		return -EFAULT;
2073 
2074 	return r;
2075 }
2076 
2077 #define DM_PARAMS_MALLOC	0x0001	/* Params allocated with kvmalloc() */
2078 #define DM_WIPE_BUFFER		0x0010	/* Wipe input buffer before returning from ioctl */
2079 
2080 static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags)
2081 {
2082 	if (param_flags & DM_WIPE_BUFFER)
2083 		memset(param, 0, param_size);
2084 
2085 	if (param_flags & DM_PARAMS_MALLOC)
2086 		kvfree(param);
2087 }
2088 
2089 static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel,
2090 		       int ioctl_flags, struct dm_ioctl **param, int *param_flags)
2091 {
2092 	struct dm_ioctl *dmi;
2093 	int secure_data;
2094 	const size_t minimum_data_size = offsetof(struct dm_ioctl, data);
2095 
2096 	/* check_version() already copied version from userspace, avoid TOCTOU */
2097 	if (copy_from_user((char *)param_kernel + sizeof(param_kernel->version),
2098 			   (char __user *)user + sizeof(param_kernel->version),
2099 			   minimum_data_size - sizeof(param_kernel->version)))
2100 		return -EFAULT;
2101 
2102 	if (unlikely(param_kernel->data_size < minimum_data_size) ||
2103 	    unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) {
2104 		DMERR_LIMIT("Invalid data size in the ioctl structure: %u",
2105 		      param_kernel->data_size);
2106 		return -EINVAL;
2107 	}
2108 
2109 	secure_data = param_kernel->flags & DM_SECURE_DATA_FLAG;
2110 
2111 	*param_flags = secure_data ? DM_WIPE_BUFFER : 0;
2112 
2113 	if (ioctl_flags & IOCTL_FLAGS_NO_PARAMS) {
2114 		dmi = param_kernel;
2115 		dmi->data_size = minimum_data_size;
2116 		goto data_copied;
2117 	}
2118 
2119 	/*
2120 	 * Use __GFP_HIGH to avoid low memory issues when a device is
2121 	 * suspended and the ioctl is needed to resume it.
2122 	 * Use kmalloc() rather than vmalloc() when we can.
2123 	 */
2124 	dmi = NULL;
2125 	dmi = kvmalloc(param_kernel->data_size, GFP_NOIO | __GFP_HIGH);
2126 
2127 	if (!dmi) {
2128 		if (secure_data && clear_user(user, param_kernel->data_size))
2129 			return -EFAULT;
2130 		return -ENOMEM;
2131 	}
2132 
2133 	*param_flags |= DM_PARAMS_MALLOC;
2134 
2135 	/* Copy from param_kernel (which was already copied from user) */
2136 	memcpy(dmi, param_kernel, minimum_data_size);
2137 
2138 	if (copy_from_user(&dmi->data, (char __user *)user + minimum_data_size,
2139 			   param_kernel->data_size - minimum_data_size))
2140 		goto bad;
2141 data_copied:
2142 	/* Wipe the user buffer so we do not return it to userspace */
2143 	if (secure_data && clear_user(user, param_kernel->data_size))
2144 		goto bad;
2145 
2146 	*param = dmi;
2147 	return 0;
2148 
2149 bad:
2150 	free_params(dmi, param_kernel->data_size, *param_flags);
2151 
2152 	return -EFAULT;
2153 }
2154 
2155 static int validate_params(uint cmd, struct dm_ioctl *param)
2156 {
2157 	/* Always clear this flag */
2158 	param->flags &= ~DM_BUFFER_FULL_FLAG;
2159 	param->flags &= ~DM_UEVENT_GENERATED_FLAG;
2160 	param->flags &= ~DM_SECURE_DATA_FLAG;
2161 	param->flags &= ~DM_DATA_OUT_FLAG;
2162 
2163 	/* Ignores parameters */
2164 	if (cmd == DM_REMOVE_ALL_CMD ||
2165 	    cmd == DM_LIST_DEVICES_CMD ||
2166 	    cmd == DM_LIST_VERSIONS_CMD)
2167 		return 0;
2168 
2169 	if (cmd == DM_DEV_CREATE_CMD) {
2170 		if (!*param->name) {
2171 			DMERR("name not supplied when creating device");
2172 			return -EINVAL;
2173 		}
2174 	} else if (*param->uuid && *param->name) {
2175 		DMERR("only supply one of name or uuid, cmd(%u)", cmd);
2176 		return -EINVAL;
2177 	}
2178 
2179 	/* Ensure strings are terminated */
2180 	param->name[DM_NAME_LEN - 1] = '\0';
2181 	param->uuid[DM_UUID_LEN - 1] = '\0';
2182 
2183 	return 0;
2184 }
2185 
2186 static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *user)
2187 {
2188 	int r = 0;
2189 	int ioctl_flags;
2190 	int param_flags;
2191 	unsigned int cmd;
2192 	struct dm_ioctl *param;
2193 	ioctl_fn fn = NULL;
2194 	size_t input_param_size;
2195 	struct dm_ioctl param_kernel;
2196 
2197 	/* only root can play with this */
2198 	if (!capable(CAP_SYS_ADMIN))
2199 		return -EACCES;
2200 
2201 	if (_IOC_TYPE(command) != DM_IOCTL)
2202 		return -ENOTTY;
2203 
2204 	cmd = _IOC_NR(command);
2205 
2206 	/*
2207 	 * Check the interface version passed in.  This also
2208 	 * writes out the kernel's interface version.
2209 	 */
2210 	r = check_version(cmd, user, &param_kernel);
2211 	if (r)
2212 		return r;
2213 
2214 	/*
2215 	 * Nothing more to do for the version command.
2216 	 */
2217 	if (cmd == DM_VERSION_CMD)
2218 		return 0;
2219 
2220 	fn = lookup_ioctl(cmd, &ioctl_flags);
2221 	if (!fn) {
2222 		DMERR("dm_ctl_ioctl: unknown command 0x%x", command);
2223 		return -ENOTTY;
2224 	}
2225 
2226 	/*
2227 	 * Copy the parameters into kernel space.
2228 	 */
2229 	r = copy_params(user, &param_kernel, ioctl_flags, &param, &param_flags);
2230 
2231 	if (r)
2232 		return r;
2233 
2234 	input_param_size = param->data_size;
2235 	r = validate_params(cmd, param);
2236 	if (r)
2237 		goto out;
2238 
2239 	param->data_size = offsetof(struct dm_ioctl, data);
2240 	r = fn(file, param, input_param_size);
2241 
2242 	if (unlikely(param->flags & DM_BUFFER_FULL_FLAG) &&
2243 	    unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS))
2244 		DMERR("ioctl %d tried to output some data but has IOCTL_FLAGS_NO_PARAMS set", cmd);
2245 
2246 	if (!r && ioctl_flags & IOCTL_FLAGS_ISSUE_GLOBAL_EVENT)
2247 		dm_issue_global_event();
2248 
2249 	/*
2250 	 * Copy the results back to userland.
2251 	 */
2252 	if (!r && copy_to_user(user, param, param->data_size))
2253 		r = -EFAULT;
2254 
2255 out:
2256 	free_params(param, input_param_size, param_flags);
2257 	return r;
2258 }
2259 
2260 static long dm_ctl_ioctl(struct file *file, uint command, ulong u)
2261 {
2262 	return (long)ctl_ioctl(file, command, (struct dm_ioctl __user *)u);
2263 }
2264 
2265 #ifdef CONFIG_COMPAT
2266 static long dm_compat_ctl_ioctl(struct file *file, uint command, ulong u)
2267 {
2268 	return (long)dm_ctl_ioctl(file, command, (ulong) compat_ptr(u));
2269 }
2270 #else
2271 #define dm_compat_ctl_ioctl NULL
2272 #endif
2273 
2274 static int dm_open(struct inode *inode, struct file *filp)
2275 {
2276 	int r;
2277 	struct dm_file *priv;
2278 
2279 	r = nonseekable_open(inode, filp);
2280 	if (unlikely(r))
2281 		return r;
2282 
2283 	priv = filp->private_data = kmalloc_obj(struct dm_file);
2284 	if (!priv)
2285 		return -ENOMEM;
2286 
2287 	priv->global_event_nr = atomic_read(&dm_global_event_nr);
2288 
2289 	return 0;
2290 }
2291 
2292 static int dm_release(struct inode *inode, struct file *filp)
2293 {
2294 	kfree(filp->private_data);
2295 	return 0;
2296 }
2297 
2298 static __poll_t dm_poll(struct file *filp, poll_table *wait)
2299 {
2300 	struct dm_file *priv = filp->private_data;
2301 	__poll_t mask = 0;
2302 
2303 	poll_wait(filp, &dm_global_eventq, wait);
2304 
2305 	if ((int)(atomic_read(&dm_global_event_nr) - priv->global_event_nr) > 0)
2306 		mask |= EPOLLIN;
2307 
2308 	return mask;
2309 }
2310 
2311 static const struct file_operations _ctl_fops = {
2312 	.open    = dm_open,
2313 	.release = dm_release,
2314 	.poll    = dm_poll,
2315 	.unlocked_ioctl	 = dm_ctl_ioctl,
2316 	.compat_ioctl = dm_compat_ctl_ioctl,
2317 	.owner	 = THIS_MODULE,
2318 	.llseek  = noop_llseek,
2319 };
2320 
2321 static struct miscdevice _dm_misc = {
2322 	.minor		= MAPPER_CTRL_MINOR,
2323 	.name		= DM_NAME,
2324 	.nodename	= DM_DIR "/" DM_CONTROL_NODE,
2325 	.fops		= &_ctl_fops
2326 };
2327 
2328 MODULE_ALIAS_MISCDEV(MAPPER_CTRL_MINOR);
2329 MODULE_ALIAS("devname:" DM_DIR "/" DM_CONTROL_NODE);
2330 
2331 /*
2332  * Create misc character device and link to DM_DIR/control.
2333  */
2334 int __init dm_interface_init(void)
2335 {
2336 	int r;
2337 
2338 	r = misc_register(&_dm_misc);
2339 	if (r) {
2340 		DMERR("misc_register failed for control device");
2341 		return r;
2342 	}
2343 
2344 	DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
2345 	       DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
2346 	       DM_DRIVER_EMAIL);
2347 	return 0;
2348 }
2349 
2350 void dm_interface_exit(void)
2351 {
2352 	misc_deregister(&_dm_misc);
2353 	dm_hash_exit();
2354 }
2355 
2356 /**
2357  * dm_copy_name_and_uuid - Copy mapped device name & uuid into supplied buffers
2358  * @md: Pointer to mapped_device
2359  * @name: Buffer (size DM_NAME_LEN) for name
2360  * @uuid: Buffer (size DM_UUID_LEN) for uuid or empty string if uuid not defined
2361  */
2362 int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid)
2363 {
2364 	int r = 0;
2365 	struct hash_cell *hc;
2366 
2367 	if (!md)
2368 		return -ENXIO;
2369 
2370 	mutex_lock(&dm_hash_cells_mutex);
2371 	hc = dm_get_mdptr(md);
2372 	if (!hc) {
2373 		r = -ENXIO;
2374 		goto out;
2375 	}
2376 
2377 	if (name)
2378 		strcpy(name, hc->name);
2379 	if (uuid)
2380 		strcpy(uuid, hc->uuid ? : "");
2381 
2382 out:
2383 	mutex_unlock(&dm_hash_cells_mutex);
2384 
2385 	return r;
2386 }
2387 EXPORT_SYMBOL_GPL(dm_copy_name_and_uuid);
2388 
2389 /**
2390  * dm_early_create - create a mapped device in early boot.
2391  *
2392  * @dmi: Contains main information of the device mapping to be created.
2393  * @spec_array: array of pointers to struct dm_target_spec. Describes the
2394  * mapping table of the device.
2395  * @target_params_array: array of strings with the parameters to a specific
2396  * target.
2397  *
2398  * Instead of having the struct dm_target_spec and the parameters for every
2399  * target embedded at the end of struct dm_ioctl (as performed in a normal
2400  * ioctl), pass them as arguments, so the caller doesn't need to serialize them.
2401  * The size of the spec_array and target_params_array is given by
2402  * @dmi->target_count.
2403  * This function is supposed to be called in early boot, so locking mechanisms
2404  * to protect against concurrent loads are not required.
2405  */
2406 int __init dm_early_create(struct dm_ioctl *dmi,
2407 			   struct dm_target_spec **spec_array,
2408 			   char **target_params_array)
2409 {
2410 	int r, m = DM_ANY_MINOR;
2411 	struct dm_table *t, *old_map;
2412 	struct mapped_device *md;
2413 	unsigned int i;
2414 
2415 	if (!dmi->target_count)
2416 		return -EINVAL;
2417 
2418 	r = check_name(dmi->name);
2419 	if (r)
2420 		return r;
2421 
2422 	if (dmi->flags & DM_PERSISTENT_DEV_FLAG)
2423 		m = MINOR(huge_decode_dev(dmi->dev));
2424 
2425 	/* alloc dm device */
2426 	r = dm_create(m, &md);
2427 	if (r)
2428 		return r;
2429 
2430 	/* hash insert */
2431 	r = dm_hash_insert(dmi->name, *dmi->uuid ? dmi->uuid : NULL, md);
2432 	if (r)
2433 		goto err_destroy_dm;
2434 
2435 	/* alloc table */
2436 	r = dm_table_create(&t, get_mode(dmi), dmi->target_count, md);
2437 	if (r)
2438 		goto err_hash_remove;
2439 
2440 	/* add targets */
2441 	for (i = 0; i < dmi->target_count; i++) {
2442 		r = dm_table_add_target(t, spec_array[i]->target_type,
2443 					(sector_t) spec_array[i]->sector_start,
2444 					(sector_t) spec_array[i]->length,
2445 					target_params_array[i]);
2446 		if (r) {
2447 			DMERR("error adding target to table");
2448 			goto err_destroy_table;
2449 		}
2450 	}
2451 
2452 	/* finish table */
2453 	r = dm_table_complete(t);
2454 	if (r)
2455 		goto err_destroy_table;
2456 
2457 	/* setup md->queue to reflect md's type (may block) */
2458 	r = dm_setup_md_queue(md, t);
2459 	if (r) {
2460 		DMERR("unable to set up device queue for new table.");
2461 		goto err_destroy_table;
2462 	}
2463 
2464 	/* Set new map */
2465 	dm_suspend(md, 0);
2466 	old_map = dm_swap_table(md, t);
2467 	if (IS_ERR(old_map)) {
2468 		r = PTR_ERR(old_map);
2469 		goto err_destroy_table;
2470 	}
2471 	set_disk_ro(dm_disk(md), !!(dmi->flags & DM_READONLY_FLAG));
2472 
2473 	/* resume device */
2474 	r = dm_resume(md);
2475 	if (r)
2476 		goto err_destroy_table;
2477 
2478 	DMINFO("%s (%s) is ready", md->disk->disk_name, dmi->name);
2479 	dm_put(md);
2480 	return 0;
2481 
2482 err_destroy_table:
2483 	dm_table_destroy(t);
2484 err_hash_remove:
2485 	down_write(&_hash_lock);
2486 	(void) __hash_remove(__get_name_cell(dmi->name));
2487 	up_write(&_hash_lock);
2488 	/* release reference from __get_name_cell */
2489 	dm_put(md);
2490 err_destroy_dm:
2491 	dm_put(md);
2492 	dm_destroy(md);
2493 	return r;
2494 }
2495