xref: /freebsd/usr.sbin/mfiutil/mfi_config.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*-
2  * Copyright (c) 2008, 2009 Yahoo!, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. The names of the authors may not be used to endorse or promote
14  *    products derived from this software without specific prior written
15  *    permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/param.h>
33 #ifdef DEBUG
34 #include <sys/sysctl.h>
35 #endif
36 #include <err.h>
37 #include <errno.h>
38 #include <libutil.h>
39 #ifdef DEBUG
40 #include <stdint.h>
41 #endif
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 #include "mfiutil.h"
47 
48 #ifdef DEBUG
49 static void	dump_config(int fd, struct mfi_config_data *config);
50 #endif
51 
52 static int	add_spare(int ac, char **av);
53 static int	remove_spare(int ac, char **av);
54 
55 static long
56 dehumanize(const char *value)
57 {
58         char    *vtp;
59         long    iv;
60 
61         if (value == NULL)
62                 return (0);
63         iv = strtoq(value, &vtp, 0);
64         if (vtp == value || (vtp[0] != '\0' && vtp[1] != '\0')) {
65                 return (0);
66         }
67         switch (vtp[0]) {
68         case 't': case 'T':
69                 iv *= 1024;
70         case 'g': case 'G':
71                 iv *= 1024;
72         case 'm': case 'M':
73                 iv *= 1024;
74         case 'k': case 'K':
75                 iv *= 1024;
76         case '\0':
77                 break;
78         default:
79                 return (0);
80         }
81         return (iv);
82 }
83 int
84 mfi_config_read(int fd, struct mfi_config_data **configp)
85 {
86 	struct mfi_config_data *config;
87 	uint32_t config_size;
88 
89 	/*
90 	 * Keep fetching the config in a loop until we have a large enough
91 	 * buffer to hold the entire configuration.
92 	 */
93 	config = NULL;
94 	config_size = 1024;
95 fetch:
96 	config = reallocf(config, config_size);
97 	if (config == NULL)
98 		return (-1);
99 	if (mfi_dcmd_command(fd, MFI_DCMD_CFG_READ, config,
100 	    config_size, NULL, 0, NULL) < 0)
101 		return (-1);
102 
103 	if (config->size > config_size) {
104 		config_size = config->size;
105 		goto fetch;
106 	}
107 
108 	*configp = config;
109 	return (0);
110 }
111 
112 static struct mfi_array *
113 mfi_config_lookup_array(struct mfi_config_data *config, uint16_t array_ref)
114 {
115 	struct mfi_array *ar;
116 	char *p;
117 	int i;
118 
119 	p = (char *)config->array;
120 	for (i = 0; i < config->array_count; i++) {
121 		ar = (struct mfi_array *)p;
122 		if (ar->array_ref == array_ref)
123 			return (ar);
124 		p += config->array_size;
125 	}
126 
127 	return (NULL);
128 }
129 
130 static struct mfi_ld_config *
131 mfi_config_lookup_volume(struct mfi_config_data *config, uint8_t target_id)
132 {
133 	struct mfi_ld_config *ld;
134 	char *p;
135 	int i;
136 
137 	p = (char *)config->array + config->array_count * config->array_size;
138 	for (i = 0; i < config->log_drv_count; i++) {
139 		ld = (struct mfi_ld_config *)p;
140 		if (ld->properties.ld.v.target_id == target_id)
141 			return (ld);
142 		p += config->log_drv_size;
143 	}
144 
145 	return (NULL);
146 }
147 
148 static int
149 clear_config(int ac, char **av)
150 {
151 	struct mfi_ld_list list;
152 	int ch, error, fd;
153 	u_int i;
154 
155 	fd = mfi_open(mfi_unit);
156 	if (fd < 0) {
157 		error = errno;
158 		warn("mfi_open");
159 		return (error);
160 	}
161 
162 	if (!mfi_reconfig_supported()) {
163 		warnx("The current mfi(4) driver does not support "
164 		    "configuration changes.");
165 		return (EOPNOTSUPP);
166 	}
167 
168 	if (mfi_ld_get_list(fd, &list, NULL) < 0) {
169 		error = errno;
170 		warn("Failed to get volume list");
171 		return (error);
172 	}
173 
174 	for (i = 0; i < list.ld_count; i++) {
175 		if (mfi_volume_busy(fd, list.ld_list[i].ld.v.target_id)) {
176 			warnx("Volume %s is busy and cannot be deleted",
177 			    mfi_volume_name(fd, list.ld_list[i].ld.v.target_id));
178 			return (EBUSY);
179 		}
180 	}
181 
182 	printf(
183 	    "Are you sure you wish to clear the configuration on mfi%u? [y/N] ",
184 	    mfi_unit);
185 	ch = getchar();
186 	if (ch != 'y' && ch != 'Y') {
187 		printf("\nAborting\n");
188 		return (0);
189 	}
190 
191 	if (mfi_dcmd_command(fd, MFI_DCMD_CFG_CLEAR, NULL, 0, NULL, 0, NULL) < 0) {
192 		error = errno;
193 		warn("Failed to clear configuration");
194 		return (error);
195 	}
196 
197 	printf("mfi%d: Configuration cleared\n", mfi_unit);
198 	close(fd);
199 
200 	return (0);
201 }
202 MFI_COMMAND(top, clear, clear_config);
203 
204 #define	MFI_ARRAY_SIZE		288
205 #define	MAX_DRIVES_PER_ARRAY						\
206 	((MFI_ARRAY_SIZE - sizeof(struct mfi_array)) / 8)
207 
208 #define	RT_RAID0	0
209 #define	RT_RAID1	1
210 #define	RT_RAID5	2
211 #define	RT_RAID6	3
212 #define	RT_JBOD		4
213 #define	RT_CONCAT	5
214 #define	RT_RAID10	6
215 #define	RT_RAID50	7
216 #define	RT_RAID60	8
217 
218 static int
219 compare_int(const void *one, const void *two)
220 {
221 	int first, second;
222 
223 	first = *(const int *)one;
224 	second = *(const int *)two;
225 
226 	return (first - second);
227 }
228 
229 static struct raid_type_entry {
230 	const char *name;
231 	int	raid_type;
232 } raid_type_table[] = {
233 	{ "raid0",	RT_RAID0 },
234 	{ "raid-0",	RT_RAID0 },
235 	{ "raid1",	RT_RAID1 },
236 	{ "raid-1",	RT_RAID1 },
237 	{ "mirror",	RT_RAID1 },
238 	{ "raid5",	RT_RAID5 },
239 	{ "raid-5",	RT_RAID5 },
240 	{ "raid6",	RT_RAID6 },
241 	{ "raid-6",	RT_RAID6 },
242 	{ "jbod",	RT_JBOD },
243 	{ "concat",	RT_CONCAT },
244 	{ "raid10",	RT_RAID10 },
245 	{ "raid1+0",	RT_RAID10 },
246 	{ "raid-10",	RT_RAID10 },
247 	{ "raid-1+0",	RT_RAID10 },
248 	{ "raid50",	RT_RAID50 },
249 	{ "raid5+0",	RT_RAID50 },
250 	{ "raid-50",	RT_RAID50 },
251 	{ "raid-5+0",	RT_RAID50 },
252 	{ "raid60",	RT_RAID60 },
253 	{ "raid6+0",	RT_RAID60 },
254 	{ "raid-60",	RT_RAID60 },
255 	{ "raid-6+0",	RT_RAID60 },
256 	{ NULL,		0 },
257 };
258 
259 struct config_id_state {
260 	int	array_count;
261 	int	log_drv_count;
262 	int	*arrays;
263 	int	*volumes;
264 	uint16_t array_ref;
265 	uint8_t	target_id;
266 };
267 
268 struct array_info {
269 	int	drive_count;
270 	struct mfi_pd_info *drives;
271 	struct mfi_array *array;
272 };
273 
274 /* Parse a comma-separated list of drives for an array. */
275 static int
276 parse_array(int fd, int raid_type, char *array_str, struct array_info *info)
277 {
278 	struct mfi_pd_info *pinfo;
279 	uint16_t device_id;
280 	char *cp;
281 	u_int count;
282 	int error;
283 
284 	cp = array_str;
285 	for (count = 0; cp != NULL; count++) {
286 		cp = strchr(cp, ',');
287 		if (cp != NULL) {
288 			cp++;
289 			if (*cp == ',') {
290 				warnx("Invalid drive list '%s'", array_str);
291 				return (EINVAL);
292 			}
293 		}
294 	}
295 
296 	/* Validate the number of drives for this array. */
297 	if (count >= MAX_DRIVES_PER_ARRAY) {
298 		warnx("Too many drives for a single array: max is %zu",
299 		    MAX_DRIVES_PER_ARRAY);
300 		return (EINVAL);
301 	}
302 	switch (raid_type) {
303 	case RT_RAID1:
304 	case RT_RAID10:
305 		if (count % 2 != 0) {
306 			warnx("RAID1 and RAID10 require an even number of "
307 			    "drives in each array");
308 			return (EINVAL);
309 		}
310 		break;
311 	case RT_RAID5:
312 	case RT_RAID50:
313 		if (count < 3) {
314 			warnx("RAID5 and RAID50 require at least 3 drives in "
315 			    "each array");
316 			return (EINVAL);
317 		}
318 		break;
319 	case RT_RAID6:
320 	case RT_RAID60:
321 		if (count < 4) {
322 			warnx("RAID6 and RAID60 require at least 4 drives in "
323 			    "each array");
324 			return (EINVAL);
325 		}
326 		break;
327 	}
328 
329 	/* Validate each drive. */
330 	info->drives = calloc(count, sizeof(struct mfi_pd_info));
331 	if (info->drives == NULL) {
332 		warnx("malloc failed");
333 		return (ENOMEM);
334 	}
335 	info->drive_count = count;
336 	for (pinfo = info->drives; (cp = strsep(&array_str, ",")) != NULL;
337 	     pinfo++) {
338 		error = mfi_lookup_drive(fd, cp, &device_id);
339 		if (error)
340 			return (error);
341 
342 		if (mfi_pd_get_info(fd, device_id, pinfo, NULL) < 0) {
343 			error = errno;
344 			warn("Failed to fetch drive info for drive %s", cp);
345 			return (error);
346 		}
347 
348 		if (pinfo->fw_state != MFI_PD_STATE_UNCONFIGURED_GOOD) {
349 			warnx("Drive %u is not available", device_id);
350 			return (EINVAL);
351 		}
352 	}
353 
354 	return (0);
355 }
356 
357 /*
358  * Find the next free array ref assuming that 'array_ref' is the last
359  * one used.  'array_ref' should be 0xffff for the initial test.
360  */
361 static uint16_t
362 find_next_array(struct config_id_state *state)
363 {
364 	int i;
365 
366 	/* Assume the current one is used. */
367 	state->array_ref++;
368 
369 	/* Find the next free one. */
370 	for (i = 0; i < state->array_count; i++)
371 		if (state->arrays[i] == state->array_ref)
372 			state->array_ref++;
373 	return (state->array_ref);
374 }
375 
376 /*
377  * Find the next free volume ID assuming that 'target_id' is the last
378  * one used.  'target_id' should be 0xff for the initial test.
379  */
380 static uint8_t
381 find_next_volume(struct config_id_state *state)
382 {
383 	int i;
384 
385 	/* Assume the current one is used. */
386 	state->target_id++;
387 
388 	/* Find the next free one. */
389 	for (i = 0; i < state->log_drv_count; i++)
390 		if (state->volumes[i] == state->target_id)
391 			state->target_id++;
392 	return (state->target_id);
393 }
394 
395 /* Populate an array with drives. */
396 static void
397 build_array(int fd, char *arrayp, struct array_info *array_info,
398     struct config_id_state *state, int verbose)
399 {
400 	struct mfi_array *ar = (struct mfi_array *)arrayp;
401 	int i;
402 
403 	ar->size = array_info->drives[0].coerced_size;
404 	ar->num_drives = array_info->drive_count;
405 	ar->array_ref = find_next_array(state);
406 	for (i = 0; i < array_info->drive_count; i++) {
407 		if (verbose)
408 			printf("Adding drive %u to array %u\n",
409 			    array_info->drives[i].ref.v.device_id,
410 			    ar->array_ref);
411 		if (ar->size > array_info->drives[i].coerced_size)
412 			ar->size = array_info->drives[i].coerced_size;
413 		ar->pd[i].ref = array_info->drives[i].ref;
414 		ar->pd[i].fw_state = MFI_PD_STATE_ONLINE;
415 	}
416 	array_info->array = ar;
417 }
418 
419 /*
420  * Create a volume that spans one or more arrays.
421  */
422 static void
423 build_volume(char *volumep, int narrays, struct array_info *arrays,
424     int raid_type, long stripe_size, struct config_id_state *state, int verbose)
425 {
426 	struct mfi_ld_config *ld = (struct mfi_ld_config *)volumep;
427 	struct mfi_array *ar;
428 	int i;
429 
430 	/* properties */
431 	ld->properties.ld.v.target_id = find_next_volume(state);
432 	ld->properties.ld.v.seq = 0;
433 	ld->properties.default_cache_policy = MR_LD_CACHE_ALLOW_WRITE_CACHE |
434 	    MR_LD_CACHE_WRITE_BACK;
435 	ld->properties.access_policy = MFI_LD_ACCESS_RW;
436 	ld->properties.disk_cache_policy = MR_PD_CACHE_UNCHANGED;
437 	ld->properties.current_cache_policy = MR_LD_CACHE_ALLOW_WRITE_CACHE |
438 	    MR_LD_CACHE_WRITE_BACK;
439 	ld->properties.no_bgi = 0;
440 
441 	/* params */
442 	switch (raid_type) {
443 	case RT_RAID0:
444 	case RT_JBOD:
445 		ld->params.primary_raid_level = DDF_RAID0;
446 		ld->params.raid_level_qualifier = 0;
447 		ld->params.secondary_raid_level = 0;
448 		break;
449 	case RT_RAID1:
450 		ld->params.primary_raid_level = DDF_RAID1;
451 		ld->params.raid_level_qualifier = 0;
452 		ld->params.secondary_raid_level = 0;
453 		break;
454 	case RT_RAID5:
455 		ld->params.primary_raid_level = DDF_RAID5;
456 		ld->params.raid_level_qualifier = 3;
457 		ld->params.secondary_raid_level = 0;
458 		break;
459 	case RT_RAID6:
460 		ld->params.primary_raid_level = DDF_RAID6;
461 		ld->params.raid_level_qualifier = 3;
462 		ld->params.secondary_raid_level = 0;
463 		break;
464 	case RT_CONCAT:
465 		ld->params.primary_raid_level = DDF_CONCAT;
466 		ld->params.raid_level_qualifier = 0;
467 		ld->params.secondary_raid_level = 0;
468 		break;
469 	case RT_RAID10:
470 		ld->params.primary_raid_level = DDF_RAID1;
471 		ld->params.raid_level_qualifier = 0;
472 		ld->params.secondary_raid_level = 3; /* XXX? */
473 		break;
474 	case RT_RAID50:
475 		/*
476 		 * XXX: This appears to work though the card's BIOS
477 		 * complains that the configuration is foreign.  The
478 		 * BIOS setup does not allow for creation of RAID-50
479 		 * or RAID-60 arrays.  The only nested array
480 		 * configuration it allows for is RAID-10.
481 		 */
482 		ld->params.primary_raid_level = DDF_RAID5;
483 		ld->params.raid_level_qualifier = 3;
484 		ld->params.secondary_raid_level = 3; /* XXX? */
485 		break;
486 	case RT_RAID60:
487 		ld->params.primary_raid_level = DDF_RAID6;
488 		ld->params.raid_level_qualifier = 3;
489 		ld->params.secondary_raid_level = 3; /* XXX? */
490 		break;
491 	}
492 
493 	/*
494 	 * Stripe size is encoded as (2 ^ N) * 512 = stripe_size.  Use
495 	 * ffs() to simulate log2(stripe_size).
496 	 */
497 	ld->params.stripe_size = ffs(stripe_size) - 1 - 9;
498 	ld->params.num_drives = arrays[0].array->num_drives;
499 	ld->params.span_depth = narrays;
500 	ld->params.state = MFI_LD_STATE_OPTIMAL;
501 	ld->params.init_state = MFI_LD_PARAMS_INIT_NO;
502 	ld->params.is_consistent = 0;
503 
504 	/* spans */
505 	for (i = 0; i < narrays; i++) {
506 		ar = arrays[i].array;
507 		if (verbose)
508 			printf("Adding array %u to volume %u\n", ar->array_ref,
509 			    ld->properties.ld.v.target_id);
510 		ld->span[i].start_block = 0;
511 		ld->span[i].num_blocks = ar->size;
512 		ld->span[i].array_ref = ar->array_ref;
513 	}
514 }
515 
516 static int
517 create_volume(int ac, char **av)
518 {
519 	struct mfi_config_data *config;
520 	struct mfi_array *ar;
521 	struct mfi_ld_config *ld;
522 	struct config_id_state state;
523 	size_t config_size;
524 	char *p, *cfg_arrays, *cfg_volumes;
525 	int error, fd, i, raid_type;
526 	int narrays, nvolumes, arrays_per_volume;
527 	struct array_info *arrays;
528 	long stripe_size;
529 #ifdef DEBUG
530 	int dump;
531 #endif
532 	int ch, verbose;
533 
534 	/*
535 	 * Backwards compat.  Map 'create volume' to 'create' and
536 	 * 'create spare' to 'add'.
537 	 */
538 	if (ac > 1) {
539 		if (strcmp(av[1], "volume") == 0) {
540 			av++;
541 			ac--;
542 		} else if (strcmp(av[1], "spare") == 0) {
543 			av++;
544 			ac--;
545 			return (add_spare(ac, av));
546 		}
547 	}
548 
549 	if (ac < 2) {
550 		warnx("create volume: volume type required");
551 		return (EINVAL);
552 	}
553 
554 
555 	fd = mfi_open(mfi_unit);
556 	if (fd < 0) {
557 		error = errno;
558 		warn("mfi_open");
559 		return (error);
560 	}
561 
562 	if (!mfi_reconfig_supported()) {
563 		warnx("The current mfi(4) driver does not support "
564 		    "configuration changes.");
565 		return (EOPNOTSUPP);
566 	}
567 
568 	/* Lookup the RAID type first. */
569 	raid_type = -1;
570 	for (i = 0; raid_type_table[i].name != NULL; i++)
571 		if (strcasecmp(raid_type_table[i].name, av[1]) == 0) {
572 			raid_type = raid_type_table[i].raid_type;
573 			break;
574 		}
575 
576 	if (raid_type == -1) {
577 		warnx("Unknown or unsupported volume type %s", av[1]);
578 		return (EINVAL);
579 	}
580 
581 	/* Parse any options. */
582 	optind = 2;
583 #ifdef DEBUG
584 	dump = 0;
585 #endif
586 	verbose = 0;
587 	stripe_size = 64 * 1024;
588 
589 	while ((ch = getopt(ac, av, "ds:v")) != -1) {
590 		switch (ch) {
591 #ifdef DEBUG
592 		case 'd':
593 			dump = 1;
594 			break;
595 #endif
596 		case 's':
597 			stripe_size = dehumanize(optarg);
598 			if ((stripe_size < 512) || (!powerof2(stripe_size)))
599 				stripe_size = 64 * 1024;
600 			break;
601 		case 'v':
602 			verbose = 1;
603 			break;
604 		case '?':
605 		default:
606 			return (EINVAL);
607 		}
608 	}
609 	ac -= optind;
610 	av += optind;
611 
612 	/* Parse all the arrays. */
613 	narrays = ac;
614 	if (narrays == 0) {
615 		warnx("At least one drive list is required");
616 		return (EINVAL);
617 	}
618 	switch (raid_type) {
619 	case RT_RAID0:
620 	case RT_RAID1:
621 	case RT_RAID5:
622 	case RT_RAID6:
623 	case RT_CONCAT:
624 		if (narrays != 1) {
625 			warnx("Only one drive list can be specified");
626 			return (EINVAL);
627 		}
628 		break;
629 	case RT_RAID10:
630 	case RT_RAID50:
631 	case RT_RAID60:
632 		if (narrays < 1) {
633 			warnx("RAID10, RAID50, and RAID60 require at least "
634 			    "two drive lists");
635 			return (EINVAL);
636 		}
637 		if (narrays > MFI_MAX_SPAN_DEPTH) {
638 			warnx("Volume spans more than %d arrays",
639 			    MFI_MAX_SPAN_DEPTH);
640 			return (EINVAL);
641 		}
642 		break;
643 	}
644 	arrays = calloc(narrays, sizeof(*arrays));
645 	if (arrays == NULL) {
646 		warnx("malloc failed");
647 		return (ENOMEM);
648 	}
649 	for (i = 0; i < narrays; i++) {
650 		error = parse_array(fd, raid_type, av[i], &arrays[i]);
651 		if (error)
652 			return (error);
653 	}
654 
655 	switch (raid_type) {
656 	case RT_RAID10:
657 	case RT_RAID50:
658 	case RT_RAID60:
659 		for (i = 1; i < narrays; i++) {
660 			if (arrays[i].drive_count != arrays[0].drive_count) {
661 				warnx("All arrays must contain the same "
662 				    "number of drives");
663 				return (EINVAL);
664 			}
665 		}
666 		break;
667 	}
668 
669 	/*
670 	 * Fetch the current config and build sorted lists of existing
671 	 * array and volume identifiers.
672 	 */
673 	if (mfi_config_read(fd, &config) < 0) {
674 		error = errno;
675 		warn("Failed to read configuration");
676 		return (error);
677 	}
678 	p = (char *)config->array;
679 	state.array_ref = 0xffff;
680 	state.target_id = 0xff;
681 	state.array_count = config->array_count;
682 	if (config->array_count > 0) {
683 		state.arrays = calloc(config->array_count, sizeof(int));
684 		if (state.arrays == NULL) {
685 			warnx("malloc failed");
686 			return (ENOMEM);
687 		}
688 		for (i = 0; i < config->array_count; i++) {
689 			ar = (struct mfi_array *)p;
690 			state.arrays[i] = ar->array_ref;
691 			p += config->array_size;
692 		}
693 		qsort(state.arrays, config->array_count, sizeof(int),
694 		    compare_int);
695 	} else
696 		state.arrays = NULL;
697 	state.log_drv_count = config->log_drv_count;
698 	if (config->log_drv_count) {
699 		state.volumes = calloc(config->log_drv_count, sizeof(int));
700 		if (state.volumes == NULL) {
701 			warnx("malloc failed");
702 			return (ENOMEM);
703 		}
704 		for (i = 0; i < config->log_drv_count; i++) {
705 			ld = (struct mfi_ld_config *)p;
706 			state.volumes[i] = ld->properties.ld.v.target_id;
707 			p += config->log_drv_size;
708 		}
709 		qsort(state.volumes, config->log_drv_count, sizeof(int),
710 		    compare_int);
711 	} else
712 		state.volumes = NULL;
713 	free(config);
714 
715 	/* Determine the size of the configuration we will build. */
716 	switch (raid_type) {
717 	case RT_RAID0:
718 	case RT_RAID1:
719 	case RT_RAID5:
720 	case RT_RAID6:
721 	case RT_CONCAT:
722 	case RT_JBOD:
723 		/* Each volume spans a single array. */
724 		nvolumes = narrays;
725 		break;
726 	case RT_RAID10:
727 	case RT_RAID50:
728 	case RT_RAID60:
729 		/* A single volume spans multiple arrays. */
730 		nvolumes = 1;
731 		break;
732 	default:
733 		/* Pacify gcc. */
734 		abort();
735 	}
736 
737 	config_size = sizeof(struct mfi_config_data) +
738 	    sizeof(struct mfi_ld_config) * nvolumes + MFI_ARRAY_SIZE * narrays;
739 	config = calloc(1, config_size);
740 	if (config == NULL) {
741 		warnx("malloc failed");
742 		return (ENOMEM);
743 	}
744 	config->size = config_size;
745 	config->array_count = narrays;
746 	config->array_size = MFI_ARRAY_SIZE;	/* XXX: Firmware hardcode */
747 	config->log_drv_count = nvolumes;
748 	config->log_drv_size = sizeof(struct mfi_ld_config);
749 	config->spares_count = 0;
750 	config->spares_size = 40;		/* XXX: Firmware hardcode */
751 	cfg_arrays = (char *)config->array;
752 	cfg_volumes = cfg_arrays + config->array_size * narrays;
753 
754 	/* Build the arrays. */
755 	for (i = 0; i < narrays; i++) {
756 		build_array(fd, cfg_arrays, &arrays[i], &state, verbose);
757 		cfg_arrays += config->array_size;
758 	}
759 
760 	/* Now build the volume(s). */
761 	arrays_per_volume = narrays / nvolumes;
762 	for (i = 0; i < nvolumes; i++) {
763 		build_volume(cfg_volumes, arrays_per_volume,
764 		    &arrays[i * arrays_per_volume], raid_type, stripe_size,
765 		    &state, verbose);
766 		cfg_volumes += config->log_drv_size;
767 	}
768 
769 #ifdef DEBUG
770 	if (dump)
771 		dump_config(fd, config);
772 #endif
773 
774 	/* Send the new config to the controller. */
775 	if (mfi_dcmd_command(fd, MFI_DCMD_CFG_ADD, config, config_size,
776 	    NULL, 0, NULL) < 0) {
777 		error = errno;
778 		warn("Failed to add volume");
779 		return (error);
780 	}
781 
782 	/* Clean up. */
783 	free(config);
784 	if (state.log_drv_count > 0)
785 		free(state.volumes);
786 	if (state.array_count > 0)
787 		free(state.arrays);
788 	for (i = 0; i < narrays; i++)
789 		free(arrays[i].drives);
790 	free(arrays);
791 	close(fd);
792 
793 	return (0);
794 }
795 MFI_COMMAND(top, create, create_volume);
796 
797 static int
798 delete_volume(int ac, char **av)
799 {
800 	struct mfi_ld_info info;
801 	int error, fd;
802 	uint8_t target_id, mbox[4];
803 
804 	/*
805 	 * Backwards compat.  Map 'delete volume' to 'delete' and
806 	 * 'delete spare' to 'remove'.
807 	 */
808 	if (ac > 1) {
809 		if (strcmp(av[1], "volume") == 0) {
810 			av++;
811 			ac--;
812 		} else if (strcmp(av[1], "spare") == 0) {
813 			av++;
814 			ac--;
815 			return (remove_spare(ac, av));
816 		}
817 	}
818 
819 	if (ac != 2) {
820 		warnx("delete volume: volume required");
821 		return (EINVAL);
822 	}
823 
824 	fd = mfi_open(mfi_unit);
825 	if (fd < 0) {
826 		error = errno;
827 		warn("mfi_open");
828 		return (error);
829 	}
830 
831 	if (!mfi_reconfig_supported()) {
832 		warnx("The current mfi(4) driver does not support "
833 		    "configuration changes.");
834 		return (EOPNOTSUPP);
835 	}
836 
837 	if (mfi_lookup_volume(fd, av[1], &target_id) < 0) {
838 		error = errno;
839 		warn("Invalid volume %s", av[1]);
840 		return (error);
841 	}
842 
843 	if (mfi_ld_get_info(fd, target_id, &info, NULL) < 0) {
844 		error = errno;
845 		warn("Failed to get info for volume %d", target_id);
846 		return (error);
847 	}
848 
849 	if (mfi_volume_busy(fd, target_id)) {
850 		warnx("Volume %s is busy and cannot be deleted",
851 		    mfi_volume_name(fd, target_id));
852 		return (EBUSY);
853 	}
854 
855 	mbox_store_ldref(mbox, &info.ld_config.properties.ld);
856 	if (mfi_dcmd_command(fd, MFI_DCMD_LD_DELETE, NULL, 0, mbox,
857 	    sizeof(mbox), NULL) < 0) {
858 		error = errno;
859 		warn("Failed to delete volume");
860 		return (error);
861 	}
862 
863 	close(fd);
864 
865 	return (0);
866 }
867 MFI_COMMAND(top, delete, delete_volume);
868 
869 static int
870 add_spare(int ac, char **av)
871 {
872 	struct mfi_pd_info info;
873 	struct mfi_config_data *config;
874 	struct mfi_array *ar;
875 	struct mfi_ld_config *ld;
876 	struct mfi_spare *spare;
877 	uint16_t device_id;
878 	uint8_t target_id;
879 	char *p;
880 	int error, fd, i;
881 
882 	if (ac < 2) {
883 		warnx("add spare: drive required");
884 		return (EINVAL);
885 	}
886 
887 	fd = mfi_open(mfi_unit);
888 	if (fd < 0) {
889 		error = errno;
890 		warn("mfi_open");
891 		return (error);
892 	}
893 
894 	error = mfi_lookup_drive(fd, av[1], &device_id);
895 	if (error)
896 		return (error);
897 
898 	if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) {
899 		error = errno;
900 		warn("Failed to fetch drive info");
901 		return (error);
902 	}
903 
904 	if (info.fw_state != MFI_PD_STATE_UNCONFIGURED_GOOD) {
905 		warnx("Drive %u is not available", device_id);
906 		return (EINVAL);
907 	}
908 
909 	if (ac > 2) {
910 		if (mfi_lookup_volume(fd, av[2], &target_id) < 0) {
911 			error = errno;
912 			warn("Invalid volume %s", av[2]);
913 			return (error);
914 		}
915 	}
916 
917 	if (mfi_config_read(fd, &config) < 0) {
918 		error = errno;
919 		warn("Failed to read configuration");
920 		return (error);
921 	}
922 
923 	spare = malloc(sizeof(struct mfi_spare) + sizeof(uint16_t) *
924 	    config->array_count);
925 	if (spare == NULL) {
926 		warnx("malloc failed");
927 		return (ENOMEM);
928 	}
929 	bzero(spare, sizeof(struct mfi_spare));
930 	spare->ref = info.ref;
931 
932 	if (ac == 2) {
933 		/* Global spare backs all arrays. */
934 		p = (char *)config->array;
935 		for (i = 0; i < config->array_count; i++) {
936 			ar = (struct mfi_array *)p;
937 			if (ar->size > info.coerced_size) {
938 				warnx("Spare isn't large enough for array %u",
939 				    ar->array_ref);
940 				return (EINVAL);
941 			}
942 			p += config->array_size;
943 		}
944 		spare->array_count = 0;
945 	} else  {
946 		/*
947 		 * Dedicated spares only back the arrays for a
948 		 * specific volume.
949 		 */
950 		ld = mfi_config_lookup_volume(config, target_id);
951 		if (ld == NULL) {
952 			warnx("Did not find volume %d", target_id);
953 			return (EINVAL);
954 		}
955 
956 		spare->spare_type |= MFI_SPARE_DEDICATED;
957 		spare->array_count = ld->params.span_depth;
958 		for (i = 0; i < ld->params.span_depth; i++) {
959 			ar = mfi_config_lookup_array(config,
960 			    ld->span[i].array_ref);
961 			if (ar == NULL) {
962 				warnx("Missing array; inconsistent config?");
963 				return (ENXIO);
964 			}
965 			if (ar->size > info.coerced_size) {
966 				warnx("Spare isn't large enough for array %u",
967 				    ar->array_ref);
968 				return (EINVAL);
969 			}
970 			spare->array_ref[i] = ar->array_ref;
971 		}
972 	}
973 	free(config);
974 
975 	if (mfi_dcmd_command(fd, MFI_DCMD_CFG_MAKE_SPARE, spare,
976 	    sizeof(struct mfi_spare) + sizeof(uint16_t) * spare->array_count,
977 	    NULL, 0, NULL) < 0) {
978 		error = errno;
979 		warn("Failed to assign spare");
980 		return (error);
981 	}
982 
983 	close(fd);
984 
985 	return (0);
986 }
987 MFI_COMMAND(top, add, add_spare);
988 
989 static int
990 remove_spare(int ac, char **av)
991 {
992 	struct mfi_pd_info info;
993 	int error, fd;
994 	uint16_t device_id;
995 	uint8_t mbox[4];
996 
997 	if (ac != 2) {
998 		warnx("remove spare: drive required");
999 		return (EINVAL);
1000 	}
1001 
1002 	fd = mfi_open(mfi_unit);
1003 	if (fd < 0) {
1004 		error = errno;
1005 		warn("mfi_open");
1006 		return (error);
1007 	}
1008 
1009 	error = mfi_lookup_drive(fd, av[1], &device_id);
1010 	if (error)
1011 		return (error);
1012 
1013 	/* Get the info for this drive. */
1014 	if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) {
1015 		error = errno;
1016 		warn("Failed to fetch info for drive %u", device_id);
1017 		return (error);
1018 	}
1019 
1020 	if (info.fw_state != MFI_PD_STATE_HOT_SPARE) {
1021 		warnx("Drive %u is not a hot spare", device_id);
1022 		return (EINVAL);
1023 	}
1024 
1025 	mbox_store_pdref(mbox, &info.ref);
1026 	if (mfi_dcmd_command(fd, MFI_DCMD_CFG_REMOVE_SPARE, NULL, 0, mbox,
1027 	    sizeof(mbox), NULL) < 0) {
1028 		error = errno;
1029 		warn("Failed to delete spare");
1030 		return (error);
1031 	}
1032 
1033 	close(fd);
1034 
1035 	return (0);
1036 }
1037 MFI_COMMAND(top, remove, remove_spare);
1038 
1039 #ifdef DEBUG
1040 /* Display raw data about a config. */
1041 static void
1042 dump_config(int fd, struct mfi_config_data *config)
1043 {
1044 	struct mfi_array *ar;
1045 	struct mfi_ld_config *ld;
1046 	struct mfi_spare *sp;
1047 	struct mfi_pd_info pinfo;
1048 	uint16_t device_id;
1049 	char *p;
1050 	int i, j;
1051 
1052 	printf(
1053 	    "mfi%d Configuration (Debug): %d arrays, %d volumes, %d spares\n",
1054 	    mfi_unit, config->array_count, config->log_drv_count,
1055 	    config->spares_count);
1056 	printf("  array size: %u\n", config->array_size);
1057 	printf("  volume size: %u\n", config->log_drv_size);
1058 	printf("  spare size: %u\n", config->spares_size);
1059 	p = (char *)config->array;
1060 
1061 	for (i = 0; i < config->array_count; i++) {
1062 		ar = (struct mfi_array *)p;
1063 		printf("    array %u of %u drives:\n", ar->array_ref,
1064 		    ar->num_drives);
1065 		printf("      size = %ju\n", (uintmax_t)ar->size);
1066 		for (j = 0; j < ar->num_drives; j++) {
1067 			device_id = ar->pd[j].ref.v.device_id;
1068 			if (device_id == 0xffff)
1069 				printf("        drive MISSING\n");
1070 			else {
1071 				printf("        drive %u %s\n", device_id,
1072 				    mfi_pdstate(ar->pd[j].fw_state));
1073 				if (mfi_pd_get_info(fd, device_id, &pinfo,
1074 				    NULL) >= 0) {
1075 					printf("          raw size: %ju\n",
1076 					    (uintmax_t)pinfo.raw_size);
1077 					printf("          non-coerced size: %ju\n",
1078 					    (uintmax_t)pinfo.non_coerced_size);
1079 					printf("          coerced size: %ju\n",
1080 					    (uintmax_t)pinfo.coerced_size);
1081 				}
1082 			}
1083 		}
1084 		p += config->array_size;
1085 	}
1086 
1087 	for (i = 0; i < config->log_drv_count; i++) {
1088 		ld = (struct mfi_ld_config *)p;
1089 		printf("    volume %s ",
1090 		    mfi_volume_name(fd, ld->properties.ld.v.target_id));
1091 		printf("%s %s",
1092 		    mfi_raid_level(ld->params.primary_raid_level,
1093 			ld->params.secondary_raid_level),
1094 		    mfi_ldstate(ld->params.state));
1095 		if (ld->properties.name[0] != '\0')
1096 			printf(" <%s>", ld->properties.name);
1097 		printf("\n");
1098 		printf("      primary raid level: %u\n",
1099 		    ld->params.primary_raid_level);
1100 		printf("      raid level qualifier: %u\n",
1101 		    ld->params.raid_level_qualifier);
1102 		printf("      secondary raid level: %u\n",
1103 		    ld->params.secondary_raid_level);
1104 		printf("      stripe size: %u\n", ld->params.stripe_size);
1105 		printf("      num drives: %u\n", ld->params.num_drives);
1106 		printf("      init state: %u\n", ld->params.init_state);
1107 		printf("      consistent: %u\n", ld->params.is_consistent);
1108 		printf("      no bgi: %u\n", ld->properties.no_bgi);
1109 		printf("      spans:\n");
1110 		for (j = 0; j < ld->params.span_depth; j++) {
1111 			printf("        array %u @ ", ld->span[j].array_ref);
1112 			printf("%ju : %ju\n",
1113 			    (uintmax_t)ld->span[j].start_block,
1114 			    (uintmax_t)ld->span[j].num_blocks);
1115 		}
1116 		p += config->log_drv_size;
1117 	}
1118 
1119 	for (i = 0; i < config->spares_count; i++) {
1120 		sp = (struct mfi_spare *)p;
1121 		printf("    %s spare %u ",
1122 		    sp->spare_type & MFI_SPARE_DEDICATED ? "dedicated" :
1123 		    "global", sp->ref.v.device_id);
1124 		printf("%s", mfi_pdstate(MFI_PD_STATE_HOT_SPARE));
1125 		printf(" backs:\n");
1126 		for (j = 0; j < sp->array_count; j++)
1127 			printf("        array %u\n", sp->array_ref[j]);
1128 		p += config->spares_size;
1129 	}
1130 }
1131 
1132 static int
1133 debug_config(int ac, char **av)
1134 {
1135 	struct mfi_config_data *config;
1136 	int error, fd;
1137 
1138 	if (ac != 1) {
1139 		warnx("debug: extra arguments");
1140 		return (EINVAL);
1141 	}
1142 
1143 	fd = mfi_open(mfi_unit);
1144 	if (fd < 0) {
1145 		error = errno;
1146 		warn("mfi_open");
1147 		return (error);
1148 	}
1149 
1150 	/* Get the config from the controller. */
1151 	if (mfi_config_read(fd, &config) < 0) {
1152 		error = errno;
1153 		warn("Failed to get config");
1154 		return (error);
1155 	}
1156 
1157 	/* Dump out the configuration. */
1158 	dump_config(fd, config);
1159 	free(config);
1160 	close(fd);
1161 
1162 	return (0);
1163 }
1164 MFI_COMMAND(top, debug, debug_config);
1165 
1166 static int
1167 dump(int ac, char **av)
1168 {
1169 	struct mfi_config_data *config;
1170 	char buf[64];
1171 	size_t len;
1172 	int error, fd;
1173 
1174 	if (ac != 1) {
1175 		warnx("dump: extra arguments");
1176 		return (EINVAL);
1177 	}
1178 
1179 	fd = mfi_open(mfi_unit);
1180 	if (fd < 0) {
1181 		error = errno;
1182 		warn("mfi_open");
1183 		return (error);
1184 	}
1185 
1186 	/* Get the stashed copy of the last dcmd from the driver. */
1187 	snprintf(buf, sizeof(buf), "dev.mfi.%d.debug_command", mfi_unit);
1188 	if (sysctlbyname(buf, NULL, &len, NULL, 0) < 0) {
1189 		error = errno;
1190 		warn("Failed to read debug command");
1191 		if (error == ENOENT)
1192 			error = EOPNOTSUPP;
1193 		return (error);
1194 	}
1195 
1196 	config = malloc(len);
1197 	if (config == NULL) {
1198 		warnx("malloc failed");
1199 		return (ENOMEM);
1200 	}
1201 	if (sysctlbyname(buf, config, &len, NULL, 0) < 0) {
1202 		error = errno;
1203 		warn("Failed to read debug command");
1204 		return (error);
1205 	}
1206 	dump_config(fd, config);
1207 	free(config);
1208 	close(fd);
1209 
1210 	return (0);
1211 }
1212 MFI_COMMAND(top, dump, dump);
1213 #endif
1214